├── .git-blame-ignore-revs
├── .github
    ├── codecov.yml
    └── workflows
    │   ├── black.yml
    │   ├── flake8.yml
    │   ├── isort.yml
    │   └── unit_tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── CITATION.cff
├── LICENSE
├── MANIFEST.in
├── NOTICE
├── README.md
├── VERSION
├── docs
    ├── Makefile
    ├── api.rst
    ├── cli.rst
    ├── conf.py
    ├── corpus.rst
    ├── cuts.rst
    ├── datasets.rst
    ├── features.rst
    ├── getting-started.rst
    ├── index.rst
    ├── kaldi.rst
    ├── lhotse-concept-graph.png
    ├── lhotse-cut-illustration.png
    ├── logo.png
    ├── make.bat
    ├── parallelism.rst
    ├── requirements.txt
    └── vad_sample.png
├── examples
    ├── 00-basic-workflow.ipynb
    ├── 01-cut-python-api.ipynb
    ├── 02-webdataset-integration.ipynb
    ├── 03-combining-datasets.ipynb
    ├── 04-lhotse-shar.ipynb
    └── 05-image-and-video-loading.ipynb
├── lhotse
    ├── __init__.py
    ├── array.py
    ├── audio
    │   ├── __init__.py
    │   ├── backend.py
    │   ├── mixer.py
    │   ├── recording.py
    │   ├── recording_set.py
    │   ├── source.py
    │   └── utils.py
    ├── augmentation
    │   ├── __init__.py
    │   ├── common.py
    │   ├── loudness.py
    │   ├── resample.py
    │   ├── rir.py
    │   ├── torchaudio.py
    │   ├── transform.py
    │   ├── utils.py
    │   └── wpe.py
    ├── bin
    │   ├── __init__.py
    │   ├── lhotse.py
    │   └── modes
    │   │   ├── __init__.py
    │   │   ├── cli_base.py
    │   │   ├── cut.py
    │   │   ├── features.py
    │   │   ├── install_tools.py
    │   │   ├── kaldi.py
    │   │   ├── manipulation.py
    │   │   ├── recipes
    │   │       ├── __init__.py
    │   │       ├── adept.py
    │   │       ├── aidatatang_200zh.py
    │   │       ├── aishell.py
    │   │       ├── aishell2.py
    │   │       ├── aishell3.py
    │   │       ├── aishell4.py
    │   │       ├── ali_meeting.py
    │   │       ├── ami.py
    │   │       ├── aspire.py
    │   │       ├── atcosim.py
    │   │       ├── audio_mnist.py
    │   │       ├── babel.py
    │   │       ├── baker_zh.py
    │   │       ├── bengaliai_speech.py
    │   │       ├── broadcast_news.py
    │   │       ├── but_reverb_db.py
    │   │       ├── bvcc.py
    │   │       ├── callhome_egyptian.py
    │   │       ├── callhome_english.py
    │   │       ├── cdsd.py
    │   │       ├── chime6.py
    │   │       ├── cmu_arctic.py
    │   │       ├── cmu_indic.py
    │   │       ├── cmu_kids.py
    │   │       ├── commonvoice.py
    │   │       ├── csj.py
    │   │       ├── cslu_kids.py
    │   │       ├── daily_talk.py
    │   │       ├── dihard3.py
    │   │       ├── dipco.py
    │   │       ├── earnings21.py
    │   │       ├── earnings22.py
    │   │       ├── ears.py
    │   │       ├── edacc.py
    │   │       ├── emilia.py
    │   │       ├── eval2000.py
    │   │       ├── fisher_english.py
    │   │       ├── fisher_spanish.py
    │   │       ├── fleurs.py
    │   │       ├── gale_arabic.py
    │   │       ├── gale_mandarin.py
    │   │       ├── gigaspeech.py
    │   │       ├── gigast.py
    │   │       ├── grid.py
    │   │       ├── heroico.py
    │   │       ├── hifitts.py
    │   │       ├── himia.py
    │   │       ├── icmcasr.py
    │   │       ├── icsi.py
    │   │       ├── iwslt22_ta.py
    │   │       ├── kespeech.py
    │   │       ├── ksponspeech.py
    │   │       ├── l2_arctic.py
    │   │       ├── libricss.py
    │   │       ├── librilight.py
    │   │       ├── librimix.py
    │   │       ├── librispeech.py
    │   │       ├── libritts.py
    │   │       ├── ljspeech.py
    │   │       ├── magicdata.py
    │   │       ├── mdcc.py
    │   │       ├── medical.py
    │   │       ├── mgb2.py
    │   │       ├── mls.py
    │   │       ├── mtedx.py
    │   │       ├── musan.py
    │   │       ├── must_c.py
    │   │       ├── nsc.py
    │   │       ├── peoples_speech.py
    │   │       ├── primewords.py
    │   │       ├── radio.py
    │   │       ├── reazonspeech.py
    │   │       ├── rir_noise.py
    │   │       ├── sbcsae.py
    │   │       ├── slu.py
    │   │       ├── spatial_librispeech.py
    │   │       ├── speechcommands.py
    │   │       ├── speechio.py
    │   │       ├── spgispeech.py
    │   │       ├── stcmds.py
    │   │       ├── switchboard.py
    │   │       ├── tal_asr.py
    │   │       ├── tal_csasr.py
    │   │       ├── tedlium.py
    │   │       ├── tedlium2.py
    │   │       ├── thchs_30.py
    │   │       ├── this_american_life.py
    │   │       ├── timit.py
    │   │       ├── uwb_atcc.py
    │   │       ├── vctk.py
    │   │       ├── voxceleb.py
    │   │       ├── voxconverse.py
    │   │       ├── voxpopuli.py
    │   │       ├── wenet_speech.py
    │   │       ├── wenetspeech4tts.py
    │   │       ├── xbmu_amdo31.py
    │   │       └── yesno.py
    │   │   ├── shar.py
    │   │   ├── supervision.py
    │   │   ├── utils.py
    │   │   ├── validate.py
    │   │   └── workflows.py
    ├── caching.py
    ├── custom.py
    ├── cut
    │   ├── __init__.py
    │   ├── base.py
    │   ├── data.py
    │   ├── describe.py
    │   ├── mixed.py
    │   ├── mono.py
    │   ├── multi.py
    │   ├── padding.py
    │   ├── set.py
    │   └── text.py
    ├── dataset
    │   ├── __init__.py
    │   ├── audio_tagging.py
    │   ├── collation.py
    │   ├── cut_transforms
    │   │   ├── __init__.py
    │   │   ├── concatenate.py
    │   │   ├── extra_padding.py
    │   │   ├── mix.py
    │   │   ├── perturb_speed.py
    │   │   ├── perturb_tempo.py
    │   │   ├── perturb_volume.py
    │   │   └── reverberate.py
    │   ├── dataloading.py
    │   ├── diarization.py
    │   ├── input_strategies.py
    │   ├── iterable_dataset.py
    │   ├── sampling
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── bucketing.py
    │   │   ├── cut_pairs.py
    │   │   ├── data_source.py
    │   │   ├── dynamic.py
    │   │   ├── dynamic_bucketing.py
    │   │   ├── round_robin.py
    │   │   ├── simple.py
    │   │   ├── stateless.py
    │   │   ├── utils.py
    │   │   ├── weighted_simple.py
    │   │   └── zip.py
    │   ├── signal_transforms.py
    │   ├── source_separation.py
    │   ├── speech_recognition.py
    │   ├── speech_synthesis.py
    │   ├── speech_translation.py
    │   ├── surt.py
    │   ├── unsupervised.py
    │   ├── vad.py
    │   ├── video.py
    │   ├── vis.py
    │   └── webdataset.py
    ├── features
    │   ├── __init__.py
    │   ├── base.py
    │   ├── compression.py
    │   ├── fbank.py
    │   ├── io.py
    │   ├── kaldi
    │   │   ├── __init__.py
    │   │   ├── extractors.py
    │   │   └── layers.py
    │   ├── kaldifeat.py
    │   ├── librosa_fbank.py
    │   ├── mfcc.py
    │   ├── mixer.py
    │   ├── opensmile.py
    │   ├── spectrogram.py
    │   ├── ssl.py
    │   └── whisper_fbank.py
    ├── hf.py
    ├── image
    │   ├── __init__.py
    │   ├── image.py
    │   └── io.py
    ├── kaldi.py
    ├── lazy.py
    ├── manipulation.py
    ├── parallel.py
    ├── qa.py
    ├── recipes
    │   ├── __init__.py
    │   ├── adept.py
    │   ├── aidatatang_200zh.py
    │   ├── aishell.py
    │   ├── aishell2.py
    │   ├── aishell3.py
    │   ├── aishell4.py
    │   ├── ali_meeting.py
    │   ├── ami.py
    │   ├── aspire.py
    │   ├── atcosim.py
    │   ├── audio_mnist.py
    │   ├── babel.py
    │   ├── baker_zh.py
    │   ├── bengaliai_speech.py
    │   ├── broadcast_news.py
    │   ├── but_reverb_db.py
    │   ├── bvcc.py
    │   ├── callhome_egyptian.py
    │   ├── callhome_english.py
    │   ├── cdsd.py
    │   ├── chime6.py
    │   ├── cmu_arctic.py
    │   ├── cmu_indic.py
    │   ├── cmu_kids.py
    │   ├── commonvoice.py
    │   ├── csj.py
    │   ├── cslu_kids.py
    │   ├── daily_talk.py
    │   ├── dihard3.py
    │   ├── dipco.py
    │   ├── earnings21.py
    │   ├── earnings22.py
    │   ├── ears.py
    │   ├── edacc.py
    │   ├── emilia.py
    │   ├── eval2000.py
    │   ├── fisher_english.py
    │   ├── fisher_spanish.py
    │   ├── fleurs.py
    │   ├── gale_arabic.py
    │   ├── gale_mandarin.py
    │   ├── gigaspeech.py
    │   ├── gigast.py
    │   ├── grid.py
    │   ├── heroico.py
    │   ├── hifitts.py
    │   ├── himia.py
    │   ├── icmcasr.py
    │   ├── icsi.py
    │   ├── iwslt22_ta.py
    │   ├── kespeech.py
    │   ├── ksponspeech.py
    │   ├── l2_arctic.py
    │   ├── libricss.py
    │   ├── librilight.py
    │   ├── librimix.py
    │   ├── librispeech.py
    │   ├── libritts.py
    │   ├── ljspeech.py
    │   ├── magicdata.py
    │   ├── mdcc.py
    │   ├── medical.py
    │   ├── mgb2.py
    │   ├── mls.py
    │   ├── mobvoihotwords.py
    │   ├── mtedx.py
    │   ├── musan.py
    │   ├── must_c.py
    │   ├── nsc.py
    │   ├── peoples_speech.py
    │   ├── primewords.py
    │   ├── radio.py
    │   ├── reazonspeech.py
    │   ├── rir_noise.py
    │   ├── sbcsae.py
    │   ├── slu.py
    │   ├── spatial_librispeech.py
    │   ├── speechcommands.py
    │   ├── speechio.py
    │   ├── spgispeech.py
    │   ├── stcmds.py
    │   ├── switchboard.py
    │   ├── tal_asr.py
    │   ├── tal_csasr.py
    │   ├── tedlium.py
    │   ├── tedlium2.py
    │   ├── thchs_30.py
    │   ├── this_american_life.py
    │   ├── timit.py
    │   ├── utils.py
    │   ├── uwb_atcc.py
    │   ├── vctk.py
    │   ├── voxceleb.py
    │   ├── voxconverse.py
    │   ├── voxpopuli.py
    │   ├── wenet_speech.py
    │   ├── wenetspeech4tts.py
    │   ├── xbmu_amdo31.py
    │   └── yesno.py
    ├── serialization.py
    ├── shar
    │   ├── __init__.py
    │   ├── readers
    │   │   ├── __init__.py
    │   │   ├── lazy.py
    │   │   ├── tar.py
    │   │   └── utils.py
    │   ├── utils.py
    │   └── writers
    │   │   ├── __init__.py
    │   │   ├── array.py
    │   │   ├── audio.py
    │   │   ├── cut.py
    │   │   ├── shar.py
    │   │   └── tar.py
    ├── supervision.py
    ├── testing
    │   ├── __init__.py
    │   ├── dummies.py
    │   ├── fixtures.py
    │   └── random.py
    ├── tools
    │   ├── __init__.py
    │   ├── env.py
    │   └── sph2pipe.py
    ├── utils.py
    ├── workarounds.py
    └── workflows
    │   ├── __init__.py
    │   ├── activity_detection
    │       ├── README.md
    │       ├── __init__.py
    │       ├── base.py
    │       └── silero_vad.py
    │   ├── dnsmos.py
    │   ├── forced_alignment
    │       ├── __init__.py
    │       ├── asr_aligner.py
    │       ├── base.py
    │       ├── mms_aligner.py
    │       └── workflow.py
    │   ├── meeting_simulation
    │       ├── __init__.py
    │       ├── base.py
    │       ├── conversational.py
    │       └── speaker_independent.py
    │   └── whisper.py
├── pyproject.toml
├── setup.py
├── test
    ├── __init__.py
    ├── audio
    │   ├── __init__.py
    │   ├── test_audio_backend.py
    │   ├── test_audio_reads.py
    │   ├── test_recording_set.py
    │   └── test_resample_randomized.py
    ├── augmentation
    │   ├── __init__.py
    │   └── test_torchaudio.py
    ├── cut
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_copy_data.py
    │   ├── test_custom_attrs.py
    │   ├── test_custom_attrs_randomized.py
    │   ├── test_cut.py
    │   ├── test_cut_augmentation.py
    │   ├── test_cut_drop_attributes.py
    │   ├── test_cut_extend_by.py
    │   ├── test_cut_fill_supervision.py
    │   ├── test_cut_merge_supervisions.py
    │   ├── test_cut_mixing.py
    │   ├── test_cut_ops_preserve_id.py
    │   ├── test_cut_set.py
    │   ├── test_cut_set_mix.py
    │   ├── test_cut_trim_to_supervisions.py
    │   ├── test_cut_truncate.py
    │   ├── test_cut_with_in_memory_data.py
    │   ├── test_feature_extraction.py
    │   ├── test_invariants_randomized.py
    │   ├── test_masks.py
    │   ├── test_multi_cut_augmentation.py
    │   └── test_padding_cut.py
    ├── dataset
    │   ├── __init__.py
    │   ├── sampling
    │   │   ├── __init__.py
    │   │   ├── test_dynamic_bucketing.py
    │   │   ├── test_sampler_pickling.py
    │   │   ├── test_sampler_restoring.py
    │   │   ├── test_sampling.py
    │   │   ├── test_stateless_sampler.py
    │   │   └── test_text_sampling.py
    │   ├── test_audio_chunk_dataset.py
    │   ├── test_audio_tagging.py
    │   ├── test_batch_io.py
    │   ├── test_collation.py
    │   ├── test_controllable_weights.py
    │   ├── test_cut_transforms.py
    │   ├── test_diarization.py
    │   ├── test_iterable_dataset.py
    │   ├── test_signal_transforms.py
    │   ├── test_speech_recognition_dataset.py
    │   ├── test_speech_recognition_dataset_randomized.py
    │   ├── test_speech_synthesis_dataset.py
    │   ├── test_surt_dataset.py
    │   ├── test_unsupervised_dataset.py
    │   ├── test_vad_dataset.py
    │   ├── test_webdataset.py
    │   └── test_webdataset_ddp.py
    ├── features
    │   ├── __init__.py
    │   ├── test_array.py
    │   ├── test_chunky_writer.py
    │   ├── test_copy_feats.py
    │   ├── test_feature_writer.py
    │   ├── test_kaldi_features.py
    │   ├── test_kaldi_layers.py
    │   ├── test_kaldifeat_features.py
    │   ├── test_librosa_fbank.py
    │   ├── test_opensmile.py
    │   ├── test_s3prl.py
    │   ├── test_temporal_array.py
    │   ├── test_torchaudio_features.py
    │   ├── test_whisper_fbank.py
    │   └── test_writer_append.py
    ├── fixtures
    │   ├── ami
    │   │   ├── 350b3ee0-a6fd-47ab-b921-fd298b1d53c0.llc
    │   │   ├── ES2011a.Headset-0-40s-46s.wav
    │   │   ├── ES2011a_sups.jsonl.gz
    │   │   └── cuts.json
    │   ├── audio.json
    │   ├── big_buck_bunny_small.mp4
    │   ├── common_voice_en_651325.mp3
    │   ├── dummy_feats
    │   │   ├── feature_manifest.json
    │   │   └── storage
    │   │   │   ├── 25959652-8816-4810-a88a-0b022d6b9b6d.llc
    │   │   │   ├── 89739de9-308c-4487-9fa5-1c690d44e718.llc
    │   │   │   ├── d3466ce9-d604-48c3-8c1f-26480aaf07d1.llc
    │   │   │   └── dbf9a0ec-f79d-4eb8-ae83-143a6d5de64d.llc
    │   ├── feature_config.yml
    │   ├── libri
    │   │   ├── audio.json
    │   │   ├── cuts.json
    │   │   ├── cuts_multi.json
    │   │   ├── cuts_no_feats.json
    │   │   ├── cuts_no_recording.json
    │   │   ├── feature_manifest.json.gz
    │   │   ├── libri-1088-134315-0000.wav
    │   │   ├── libri-1088-134315-0000_8ch.wav
    │   │   ├── libri-1088-134315-0000_rvb.wav
    │   │   ├── recreate.sh
    │   │   └── storage
    │   │   │   └── 30c2440c-93cb-4e83-b382-f2a59b3859b4.llc
    │   ├── ljspeech
    │   │   ├── cuts.json
    │   │   ├── feats
    │   │   │   ├── 5bb
    │   │   │   │   └── 5bb52a3d-aaf6-42ff-8891-2be7852a4858.llc
    │   │   │   └── d39
    │   │   │   │   └── d39cf273-a42d-433a-a63c-ba6357f1669e.llc
    │   │   └── storage
    │   │   │   ├── LJ002-0020.wav
    │   │   │   └── LJ002-0035.wav
    │   ├── lsmix
    │   │   ├── cuts.000000.jsonl.gz
    │   │   ├── features.000000.tar
    │   │   ├── recording.000000.tar
    │   │   └── source_feats.000000.tar
    │   ├── mini_librispeech
    │   │   ├── conf
    │   │   │   └── mfcc.conf
    │   │   ├── lhotse-b
    │   │   │   ├── recordings.jsonl.gz
    │   │   │   └── supervisions.jsonl.gz
    │   │   ├── lhotse
    │   │   │   ├── recordings.jsonl.gz
    │   │   │   └── supervisions.jsonl.gz
    │   │   ├── reco2dur
    │   │   ├── segments
    │   │   ├── spk2gender
    │   │   ├── spk2utt
    │   │   ├── text
    │   │   ├── utt2dur
    │   │   ├── utt2num_frames
    │   │   ├── utt2spk
    │   │   └── wav.scp
    │   ├── mini_librispeech2
    │   │   ├── conf
    │   │   │   └── mfcc.conf
    │   │   ├── data
    │   │   │   ├── raw_mfcc_mini_librispeech2.1.ark
    │   │   │   └── raw_mfcc_mini_librispeech2.1.scp
    │   │   ├── feats.scp
    │   │   ├── frame_shift
    │   │   ├── lhotse
    │   │   │   ├── features.jsonl.gz
    │   │   │   ├── recordings.jsonl.gz
    │   │   │   └── supervisions.jsonl.gz
    │   │   ├── reco2dur
    │   │   ├── segments
    │   │   ├── spk2gender
    │   │   ├── spk2utt
    │   │   ├── text
    │   │   ├── utt2dur
    │   │   ├── utt2num_frames
    │   │   ├── utt2spk
    │   │   └── wav.scp
    │   ├── mix_cut_test
    │   │   ├── audio
    │   │   │   └── storage
    │   │   │   │   ├── 2412-153948-0000.flac
    │   │   │   │   └── 2412-153948-0001.flac
    │   │   ├── feats
    │   │   │   └── storage
    │   │   │   │   ├── 5078e7eb-57a6-4000-b0f2-fa4bf9c52090.llc
    │   │   │   │   └── 9dc645db-cbe4-4529-85e4-b6ed4f59c340.llc
    │   │   ├── offseted_audio_cut_manifest.json
    │   │   ├── overlayed_audio_cut_manifest.json
    │   │   └── overlayed_cut_manifest.json
    │   ├── mono_c0.opus
    │   ├── mono_c0.wav
    │   ├── mono_c1.wav
    │   ├── rir
    │   │   ├── real_8ch.wav
    │   │   └── sim_1ch.wav
    │   ├── stereo.mp3
    │   ├── stereo.opus
    │   ├── stereo.sph
    │   ├── stereo.wav
    │   ├── supervision.ctm
    │   ├── supervision.json
    │   └── supervision_with_scores.ctm
    ├── known_issues
    │   ├── __init__.py
    │   ├── test_augment_with_executor.py
    │   ├── test_cut_consistency.py
    │   ├── test_lazy_cuts_issues.py
    │   ├── test_mixed_cut_num_frames.py
    │   └── test_mixing_zero_energy_cuts.py
    ├── recipes
    │   ├── __init__.py
    │   └── test_utils.py
    ├── shar
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_dataloading.py
    │   ├── test_missing_values.py
    │   ├── test_read_lazy.py
    │   └── test_write.py
    ├── test_feature_set.py
    ├── test_image.py
    ├── test_kaldi_dirs.py
    ├── test_lazy.py
    ├── test_manipulation.py
    ├── test_missing_torchaudio.py
    ├── test_multiplexing_iterables.py
    ├── test_parallel.py
    ├── test_qa.py
    ├── test_serialization.py
    ├── test_supervision_set.py
    ├── test_utils.py
    ├── test_workflows.py
    ├── video
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_video_cut.py
    │   ├── test_video_dataset.py
    │   └── test_video_recording.py
    └── workflows
    │   └── test_activity_detection.py
└── tools
    └── make_release.sh


/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # Migrate code style to Black
2 | b3c4db1cd7e22ee4dbfd8a5c3bfca6851605c76a
3 | 7b9fe724f570a6df86466b7bc0a19e9caef7b86c
4 | 


--------------------------------------------------------------------------------
/.github/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | ignore:
 3 |   # Recipe code is not subject to testing.
 4 |   - "lhotse/recipes/**/*"
 5 |   - "lhotse/recipes/*"
 6 |   # Testing utilities shouldn't count.
 7 |   - "lhotse/testing/**/*"
 8 |   - "lhotse/testing/*"
 9 |   # Unit tests code artificially increases the coverage.
10 |   - "test/**/*"
11 |   - "test/*"
12 | 


--------------------------------------------------------------------------------
/.github/workflows/black.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: black
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   black:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v2
17 |       - uses: psf/black@stable
18 |         with:
19 |           options: "--check --diff --color"
20 |           version: "22.3.0"
21 | 


--------------------------------------------------------------------------------
/.github/workflows/flake8.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: flake8
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   flake8:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       matrix:
18 |         python-version: [ 3.12 ]
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v2
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v1
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install flake8
27 |       run: |
28 |         python -m pip install --upgrade pip flake8==7.1.1
29 |     - name: Lint with flake8
30 |       run: |
31 |         # stop the build if there are Python syntax errors or undefined names
32 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
33 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
34 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
35 | 


--------------------------------------------------------------------------------
/.github/workflows/isort.yml:
--------------------------------------------------------------------------------
 1 | name: isort
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   isort:
11 | 
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       matrix:
15 |         python-version: [ 3.12 ]
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v2
19 |     - name: Set up Python ${{ matrix.python-version }}
20 |       uses: actions/setup-python@v1
21 |       with:
22 |         python-version: ${{ matrix.python-version }}
23 |     - name: Install isort
24 |       run: |
25 |         python -m pip install --upgrade pip isort==5.10.1
26 |     - name: Check that imports are sorted
27 |       run: |
28 |         isort --check --diff lhotse
29 |         isort --check --diff test
30 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.2.0
 4 |     hooks:
 5 |       - id: check-executables-have-shebangs
 6 |       - id: end-of-file-fixer
 7 |       - id: mixed-line-ending
 8 |       - id: trailing-whitespace
 9 | 
10 |   - repo: https://github.com/PyCQA/flake8
11 |     rev: 7.1.1
12 |     hooks:
13 |       - id: flake8
14 |         args: ['--select=E9,F63,F7,F82']
15 | 
16 |   - repo: https://github.com/pycqa/isort
17 |     rev: 5.12.0
18 |     hooks:
19 |       - id: isort
20 |         args: [--profile=black]
21 | 
22 |   - repo: https://github.com/psf/black
23 |     rev: 22.3.0
24 |     hooks:
25 |       - id: black
26 |         additional_dependencies: ['click==8.0.1']
27 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | build:
 8 |   os: "ubuntu-22.04"
 9 |   tools:
10 |     python: "3.10"
11 |   apt_packages:
12 |     - "libsndfile-dev"
13 | 
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 |   configuration: "docs/conf.py"
17 | 
18 | # Optionally set the version of Python and requirements required to build your docs
19 | python:
20 |   install:
21 |     - requirements: "docs/requirements.txt"  # install this first to get numpy
22 |     - method: "pip"
23 |       path: "."
24 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Żelasko"
 5 |   given-names: "Piotr"
 6 |   orcid: "https://orcid.org/0000-0002-8245-0413"
 7 | - family-names: "Povey"
 8 |   given-names: "Daniel"
 9 |   orcid: "https://orcid.org/0000-0002-0611-3634"
10 | - family-names: "Trmal"
11 |   given-names: "Jan"
12 | - family-names: "Khudanpur"
13 |   given-names: "Sanjeev"
14 | license: Apache-2.0 License
15 | title: "Lhotse: a speech data representation library for the modern deep learning ecosystem"
16 | date-released: 2020-04-24
17 | url: "https://github.com/lhotse-speech/lhotse"
18 | preferred-citation:
19 |   type: proceedings
20 |   authors:
21 |   - family-names: "Żelasko"
22 |     given-names: "Piotr"
23 |     orcid: "https://orcid.org/0000-0002-8245-0413"
24 |   - family-names: "Povey"
25 |     given-names: "Daniel"
26 |     orcid: "https://orcid.org/0000-0002-0611-3634"
27 |   - family-names: "Trmal"
28 |     given-names: "Jan"
29 |   - family-names: "Khudanpur"
30 |     given-names: "Sanjeev"
31 |   conference:
32 |     name: "NeurIPS Data-Centric AI Workshop"
33 |   title: "Lhotse: a speech data representation library for the modern deep learning ecosystem"
34 |   url: "https://arxiv.org/abs/2110.12561"
35 |   year: 2021
36 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE README.md VERSION
2 | include docs/requirements.txt
3 | recursive-exclude * __pycache__
4 | recursive-exclude * *.pyc
5 | recursive-exclude * *.pyo
6 | recursive-exclude * *.orig
7 | prune test*
8 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
 1 | Lhotse
 2 | Copyright 2020-2024 Piotr Żelasko
 3 | Copyright 2020-2024 Johns Hopkins University
 4 | Copyright 2020-2024 Xiaomi Corporation
 5 | Copyright 2022-2023 Meaning.Team Inc.
 6 | Copyright 2023-2024 NVIDIA Corporation
 7 | 
 8 | This repository includes software developed by:
 9 | - Johns Hopkins University
10 | - Xiaomi Corporation
11 | - Meaning.Team Inc.
12 | - NVIDIA Corporation
13 | - other organizations and individuals.
14 | 
15 | This project includes contributions from various organizations and individuals.
16 | Only major copyright holders are listed here.
17 | For a complete list of contributors, please refer to the project's version control history.
18 | 
19 | Licensed under the Apache License, Version 2.0 (the "License").
20 | See the LICENSE file for the full contents of the license.
21 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 1.31.0
2 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/cli.rst:
--------------------------------------------------------------------------------
1 | Command-line interface
2 | ======================
3 | 
4 | .. click:: lhotse.bin:cli
5 |    :prog: lhotse
6 |    :nested: full
7 | 


--------------------------------------------------------------------------------
/docs/cuts.rst:
--------------------------------------------------------------------------------
 1 | Cuts
 2 | ====
 3 | 
 4 | Overview
 5 | ********
 6 | 
 7 | Audio cuts are one of the main Lhotse features.
 8 | Cut is a part of a recording, but it can be longer than a supervision segment, or even span multiple segments.
 9 | The regions without a supervision are just audio that we don't assume we know anything about - there may be silence,
10 | noise, non-transcribed speech, etc.
11 | Task-specific datasets can leverage this information to generate masks for such regions.
12 | 
13 | .. autoclass:: lhotse.cut.Cut
14 |     :no-members:
15 |     :no-special-members:
16 |     :noindex:
17 | 
18 | .. autoclass:: lhotse.cut.CutSet
19 |     :no-members:
20 |     :no-special-members:
21 |     :noindex:
22 | 
23 | Types of cuts
24 | *************
25 | 
26 | There are three cut classes: :class:`~lhotse.cut.MonoCut`, :class:`~lhotse.cut.MixedCut`, and :class:`~lhotse.cut.PaddingCut` that are described below in more detail:
27 | 
28 | .. autoclass:: lhotse.cut.MonoCut
29 |     :no-members:
30 |     :no-special-members:
31 |     :noindex:
32 | 
33 | .. autoclass:: lhotse.cut.MixedCut
34 |     :no-members:
35 |     :no-special-members:
36 |     :noindex:
37 | 
38 | 
39 | .. autoclass:: lhotse.cut.PaddingCut
40 |     :no-members:
41 |     :no-special-members:
42 |     :noindex:
43 | 
44 | CLI
45 | ***
46 | 
47 | We provide a limited CLI to manipulate Lhotse manifests.
48 | Some examples of how to perform manipulations in the terminal:
49 | 
50 | .. code-block:: bash
51 | 
52 |     # Reject short segments
53 |     lhotse filter 'duration>=3.0' cuts.jsonl cuts-3s.jsonl
54 |     # Pad short segments to 5 seconds.
55 |     lhotse cut pad --duration 5.0 cuts-3s.jsonl cuts-5s-pad.jsonl
56 |     # Truncate longer segments to 5 seconds.
57 |     lhotse cut truncate --max-duration 5.0 --offset-type random cuts-5s-pad.jsonl cuts-5s.jsonl
58 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. lhotse documentation master file, created by
 2 |    sphinx-quickstart on Thu Jul  2 08:36:51 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to lhotse's documentation!
 7 | ==================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 |    getting-started.rst
14 |    corpus.rst
15 |    cuts.rst
16 |    features.rst
17 |    parallelism.rst
18 |    datasets.rst
19 |    kaldi.rst
20 |    cli.rst
21 |    api.rst
22 | 
23 | 
24 | Indices and tables
25 | ==================
26 | 
27 | * :ref:`genindex`
28 | * :ref:`modindex`
29 | * :ref:`search`
30 | 


--------------------------------------------------------------------------------
/docs/lhotse-concept-graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/docs/lhotse-concept-graph.png


--------------------------------------------------------------------------------
/docs/lhotse-cut-illustration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/docs/lhotse-cut-illustration.png


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/docs/logo.png


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.18.1
2 | sphinx_rtd_theme==2.0.0
3 | sphinx==7.1.2
4 | sphinx-click==5.1.0
5 | sphinx-autodoc-typehints==2.0.0
6 | 


--------------------------------------------------------------------------------
/docs/vad_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/docs/vad_sample.png


--------------------------------------------------------------------------------
/lhotse/audio/__init__.py:
--------------------------------------------------------------------------------
 1 | from .backend import (
 2 |     audio_backend,
 3 |     available_audio_backends,
 4 |     get_current_audio_backend,
 5 |     get_default_audio_backend,
 6 |     get_ffmpeg_torchaudio_info_enabled,
 7 |     info,
 8 |     read_audio,
 9 |     save_audio,
10 |     set_current_audio_backend,
11 |     set_ffmpeg_torchaudio_info_enabled,
12 | )
13 | from .recording import Recording
14 | from .recording_set import RecordingSet
15 | from .source import AudioSource
16 | from .utils import (
17 |     AudioLoadingError,
18 |     DurationMismatchError,
19 |     VideoInfo,
20 |     get_audio_duration_mismatch_tolerance,
21 |     null_result_on_audio_loading_error,
22 |     set_audio_duration_mismatch_tolerance,
23 |     suppress_audio_loading_errors,
24 | )
25 | 
26 | __all__ = [
27 |     "AudioSource",
28 |     "Recording",
29 |     "RecordingSet",
30 |     "AudioLoadingError",
31 |     "DurationMismatchError",
32 |     "VideoInfo",
33 |     "audio_backend",
34 |     "available_audio_backends",
35 |     "get_current_audio_backend",
36 |     "get_default_audio_backend",
37 |     "get_audio_duration_mismatch_tolerance",
38 |     "get_ffmpeg_torchaudio_info_enabled",
39 |     "info",
40 |     "read_audio",
41 |     "save_audio",
42 |     "set_current_audio_backend",
43 |     "set_audio_duration_mismatch_tolerance",
44 |     "set_ffmpeg_torchaudio_info_enabled",
45 |     "null_result_on_audio_loading_error",
46 |     "suppress_audio_loading_errors",
47 | ]
48 | 


--------------------------------------------------------------------------------
/lhotse/augmentation/__init__.py:
--------------------------------------------------------------------------------
1 | from .common import AugmentFn
2 | from .loudness import LoudnessNormalization
3 | from .rir import ReverbWithImpulseResponse
4 | from .torchaudio import *
5 | from .transform import AudioTransform
6 | from .utils import FastRandomRIRGenerator, convolve1d
7 | from .wpe import DereverbWPE, dereverb_wpe_numpy, dereverb_wpe_torch
8 | 


--------------------------------------------------------------------------------
/lhotse/augmentation/common.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 | 
3 | import numpy as np
4 | 
5 | # def augment_fn(audio: np.ndarray, sampling_rate: int) -> np.ndarray
6 | AugmentFn = Callable[[np.ndarray, int], np.ndarray]
7 | 


--------------------------------------------------------------------------------
/lhotse/bin/__init__.py:
--------------------------------------------------------------------------------
1 | from lhotse.bin.modes import *
2 | 


--------------------------------------------------------------------------------
/lhotse/bin/lhotse.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Use this script like: https://lhotse.readthedocs.io/en/latest/cli.html
 4 | """
 5 | 
 6 | # Note: we import all the CLI modes here so they get auto-registered
 7 | #       in Lhotse's main CLI entry-point. Then, setuptools is told to
 8 | #       invoke the "cli()" method from this script.
 9 | from lhotse.bin.modes import *
10 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/__init__.py:
--------------------------------------------------------------------------------
 1 | from .cli_base import *
 2 | from .cut import *
 3 | from .features import *
 4 | from .install_tools import *
 5 | from .kaldi import *
 6 | from .manipulation import *
 7 | from .recipes import *
 8 | from .shar import *
 9 | from .supervision import *
10 | from .utils import *
11 | from .validate import *
12 | from .workflows import *
13 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/cli_base.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import click
 4 | 
 5 | 
 6 | @click.group()
 7 | @click.option("-s", "--seed", type=int, help="Random seed.")
 8 | def cli(seed):
 9 |     """
10 |     The shell entry point to Lhotse, a tool and a library for audio data manipulation in high altitudes.
11 |     """
12 |     logging.basicConfig(
13 |         format="%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s",
14 |         level=logging.INFO,
15 |     )
16 |     if seed is not None:
17 |         from lhotse.utils import fix_random_seed
18 | 
19 |         fix_random_seed(seed)
20 | 
21 | 
22 | @cli.group()
23 | def prepare():
24 |     """Command group with data preparation recipes."""
25 |     pass
26 | 
27 | 
28 | @cli.group()
29 | def download():
30 |     """Command group for download and extract data."""
31 |     pass
32 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/install_tools.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from ...tools.env import default_tools_cachedir
 4 | from ...tools.sph2pipe import SPH2PIPE_URL
 5 | from .cli_base import cli
 6 | 
 7 | 
 8 | @cli.command(context_settings=dict(show_default=True))
 9 | @click.option(
10 |     "--install-dir",
11 |     type=click.Path(),
12 |     default=default_tools_cachedir(),
13 |     help="Directory where sph2pipe will be downloaded and installed.",
14 | )
15 | @click.option(
16 |     "--url", default=SPH2PIPE_URL, help="URL from which to download sph2pipe."
17 | )
18 | def install_sph2pipe(install_dir: str, url: str):
19 |     """
20 |     Install the sph2pipe program to handle sphere (.sph) audio files with
21 |     "shorten" codec compression (needed for older LDC data).
22 | 
23 |     It downloads an archive and then decompresses and compiles the contents.
24 |     """
25 |     from lhotse.tools.sph2pipe import install_sph2pipe
26 | 
27 |     install_sph2pipe(where=install_dir, download_from=url)
28 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/adept.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes import download_adept, prepare_adept
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command(context_settings=dict(show_default=True))
 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
10 | @click.argument("output_dir", type=click.Path())
11 | def adept(
12 |     corpus_dir: Pathlike,
13 |     output_dir: Pathlike,
14 | ):
15 |     """ADEPT prosody transfer evaluation corpus data preparation."""
16 |     prepare_adept(corpus_dir, output_dir=output_dir)
17 | 
18 | 
19 | @download.command(context_settings=dict(show_default=True))
20 | @click.argument("target_dir", type=click.Path())
21 | def adept(
22 |     target_dir: Pathlike,
23 | ):
24 |     """ADEPT prosody transfer evaluation corpus download."""
25 |     download_adept(target_dir)
26 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/aidatatang_200zh.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.aidatatang_200zh import (
 5 |     download_aidatatang_200zh,
 6 |     prepare_aidatatang_200zh,
 7 | )
 8 | from lhotse.utils import Pathlike
 9 | 
10 | __all__ = ["aidatatang_200zh"]
11 | 
12 | 
13 | @prepare.command(context_settings=dict(show_default=True))
14 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
15 | @click.argument("output_dir", type=click.Path())
16 | def aidatatang_200zh(corpus_dir: Pathlike, output_dir: Pathlike):
17 |     """aidatatang_200zh ASR data preparation.
18 |     Args:
19 |       corpus_dir:
20 |         It should contain a subdirectory "aidatatang_200zh"
21 |       output_dir:
22 |         The output directory.
23 |     """
24 |     prepare_aidatatang_200zh(corpus_dir, output_dir=output_dir)
25 | 
26 | 
27 | @download.command(context_settings=dict(show_default=True))
28 | @click.argument(
29 |     "target_dir",
30 |     type=click.Path(),
31 | )
32 | def aidatatang_200zh(target_dir: Pathlike):
33 |     """aidatatang_200zh download.
34 |     Args:
35 |       target_dir:
36 |         It will create a dir aidatatang_200zh to contain all
37 |         downloaded/extracted files
38 |     """
39 |     download_aidatatang_200zh(target_dir)
40 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/aishell.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.aishell import download_aishell, prepare_aishell
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["aishell"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | def aishell(corpus_dir: Pathlike, output_dir: Pathlike):
14 |     """Aishell ASR data preparation."""
15 |     prepare_aishell(corpus_dir, output_dir=output_dir)
16 | 
17 | 
18 | @download.command(context_settings=dict(show_default=True))
19 | @click.argument("target_dir", type=click.Path())
20 | def aishell(target_dir: Pathlike):
21 |     """Aishell download."""
22 |     download_aishell(target_dir)
23 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/aishell2.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes.aishell2 import prepare_aishell2
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["aishell2"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "-j",
15 |     "--num-jobs",
16 |     type=int,
17 |     default=1,
18 |     help="How many threads to use (can give good speed-ups with slow disks).",
19 | )
20 | def aishell2(corpus_dir: Pathlike, output_dir: Pathlike, num_jobs: int):
21 |     """Aishell2 ASR data preparation."""
22 |     prepare_aishell2(corpus_dir, output_dir=output_dir, num_jobs=num_jobs)
23 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/aishell3.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.aishell3 import download_aishell3, prepare_aishell3
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["aishell3"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | def aishell3(corpus_dir: Pathlike, output_dir: Pathlike):
14 |     """aishell3 data preparation."""
15 |     prepare_aishell3(corpus_dir, output_dir=output_dir)
16 | 
17 | 
18 | @download.command(context_settings=dict(show_default=True))
19 | @click.argument("target_dir", type=click.Path(), default=".")
20 | def aishell3(target_dir: Pathlike):
21 |     """aishell3 download."""
22 |     download_aishell3(target_dir)
23 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/aishell4.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.aishell4 import download_aishell4, prepare_aishell4
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["aishell4"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "--normalize-text",
15 |     is_flag=True,
16 |     default=False,
17 |     help="Conduct text normalization (remove punctuation, uppercase, etc.)",
18 | )
19 | def aishell4(corpus_dir: Pathlike, output_dir: Pathlike, normalize_text: bool):
20 |     """AISHELL-4 data preparation."""
21 |     prepare_aishell4(corpus_dir, output_dir=output_dir, normalize_text=normalize_text)
22 | 
23 | 
24 | @download.command(context_settings=dict(show_default=True))
25 | @click.argument("target_dir", type=click.Path())
26 | def aishell4(target_dir: Pathlike):
27 |     """AISHELL-4 download."""
28 |     download_aishell4(target_dir)
29 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/ali_meeting.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.ali_meeting import download_ali_meeting, prepare_ali_meeting
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["ali_meeting"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "--mic", type=click.Choice(["near", "far", "ihm", "sdm", "mdm"]), default="far"
15 | )
16 | @click.option(
17 |     "--normalize-text",
18 |     type=click.Choice(["none", "m2met"], case_sensitive=False),
19 |     default="none",
20 |     help="Type of text normalization to apply (M2MeT style is from the official challenge)",
21 | )
22 | @click.option(
23 |     "--save-mono",
24 |     is_flag=True,
25 |     default=False,
26 |     help="If True and `mic` is sdm, extract first channel and save as new recording.",
27 | )
28 | def ali_meeting(
29 |     corpus_dir: Pathlike,
30 |     output_dir: Pathlike,
31 |     mic: str,
32 |     normalize_text: str,
33 |     save_mono: bool,
34 | ):
35 |     """AliMeeting data preparation."""
36 |     prepare_ali_meeting(
37 |         corpus_dir,
38 |         output_dir=output_dir,
39 |         mic=mic,
40 |         normalize_text=normalize_text,
41 |         save_mono=save_mono,
42 |     )
43 | 
44 | 
45 | @download.command(context_settings=dict(show_default=True))
46 | @click.argument("target_dir", type=click.Path())
47 | @click.option("--force-download", is_flag=True, default=False)
48 | def ali_meeting(target_dir: Pathlike, force_download: bool):
49 |     """AliMeeting download."""
50 |     download_ali_meeting(target_dir, force_download=force_download)
51 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/aspire.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes.aspire import prepare_aspire
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["aspire"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option("--mic", type=click.Choice(["single", "multi"]), default="single")
14 | def aspire(corpus_dir: Pathlike, output_dir: Pathlike, mic: str):
15 |     """ASpIRE data preparation."""
16 |     prepare_aspire(corpus_dir, output_dir=output_dir, mic=mic)
17 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/atcosim.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.atcosim import download_atcosim, prepare_atcosim
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["atcosim"]
 8 | 
 9 | 
10 | @download.command(context_settings=dict(show_default=True))
11 | @click.argument("target_dir", type=click.Path())
12 | def atcosim(target_dir: Pathlike):
13 |     """ATCOSIM download."""
14 |     download_atcosim(target_dir)
15 | 
16 | 
17 | @prepare.command(context_settings=dict(show_default=True))
18 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
19 | @click.argument("output_dir", type=click.Path())
20 | @click.option("--silence-sym", type=str, default="")
21 | @click.option("--breath-sym", type=str, default="")
22 | @click.option("--foreign-sym", type=str, default="<unk>")
23 | @click.option("--partial-sym", type=str, default="<unk>")
24 | @click.option("--unknown-sym", type=str, default="<unk>")
25 | def atcosim(
26 |     corpus_dir: Pathlike,
27 |     output_dir: Pathlike,
28 |     silence_sym: str,
29 |     breath_sym: str,
30 |     foreign_sym: str,
31 |     partial_sym: str,
32 |     unknown_sym: str,
33 | ):
34 |     """ATCOSIM data preparation."""
35 |     prepare_atcosim(
36 |         corpus_dir,
37 |         output_dir=output_dir,
38 |         silence_sym=silence_sym,
39 |         breath_sym=breath_sym,
40 |         foreign_sym=foreign_sym,
41 |         partial_sym=partial_sym,
42 |         unknown_sym=unknown_sym,
43 |     )
44 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/audio_mnist.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.audio_mnist import download_audio_mnist, prepare_audio_mnist
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @download.command(context_settings=dict(show_default=True))
11 | @click.argument("target_dir", type=click.Path())
12 | @click.option(
13 |     "--force-download",
14 |     type=bool,
15 |     default=False,
16 |     help="If True, download even if file is present.",
17 | )
18 | def audio_mnist(target_dir: Pathlike, force_download: bool):
19 |     """AudioMNIST dataset download."""
20 |     download_audio_mnist(target_dir, force_download)
21 | 
22 | 
23 | @prepare.command(context_settings=dict(show_default=True))
24 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
25 | @click.argument("output_dir", type=click.Path())
26 | def audio_mnist(corpus_dir: Pathlike, output_dir: Pathlike):
27 |     """AudioMNIST corpus data preparation."""
28 |     logging.basicConfig(level=logging.INFO)
29 |     prepare_audio_mnist(
30 |         corpus_dir,
31 |         output_dir=output_dir,
32 |     )
33 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/babel.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes.babel import prepare_single_babel_language
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command(context_settings=dict(show_default=True))
 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
10 | @click.argument("output_dir", type=click.Path())
11 | def babel(
12 |     corpus_dir: Pathlike,
13 |     output_dir: Pathlike,
14 | ):
15 |     """
16 |     This is a data preparation recipe for the IARPA BABEL corpus
17 |     (see: https://www.iarpa.gov/index.php/research-programs/babel).
18 |     It should support all of the languages available in BABEL.
19 |     It will prepare the data from the "conversational" part of BABEL.
20 | 
21 |     This script should be invoked separately for each language you want to prepare, e.g.:
22 |     $ lhotse prepare babel /export/corpora5/Babel/IARPA_BABEL_BP_101 data/cantonese
23 |     $ lhotse prepare babel /export/corpora5/Babel/BABEL_OP1_103 data/bengali
24 |     """
25 |     prepare_single_babel_language(corpus_dir, output_dir=output_dir)
26 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/baker_zh.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.baker_zh import download_baker_zh, prepare_baker_zh
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["baker_zh"]
 8 | 
 9 | 
10 | @download.command(context_settings=dict(show_default=True))
11 | @click.argument("target_dir", type=click.Path(), default=".")
12 | def baker_zh(target_dir: Pathlike):
13 |     """bazker_zh download."""
14 |     download_baker_zh(target_dir)
15 | 
16 | 
17 | @prepare.command(context_settings=dict(show_default=True))
18 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
19 | @click.argument("output_dir", type=click.Path())
20 | def baker_zh(
21 |     corpus_dir: Pathlike,
22 |     output_dir: Pathlike,
23 | ):
24 |     """bazker_zh data preparation."""
25 |     prepare_baker_zh(corpus_dir, output_dir=output_dir)
26 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/bengaliai_speech.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Optional, Tuple, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.bengaliai_speech import prepare_bengaliai_speech
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "-j",
15 |     "--num-jobs",
16 |     type=int,
17 |     default=1,
18 |     help="How many threads to use (can give good speed-ups with slow disks).",
19 | )
20 | def bengaliai_speech(
21 |     corpus_dir: Pathlike,
22 |     output_dir: Optional[Pathlike] = None,
23 |     num_jobs: int = 1,
24 | ):
25 |     """Bengali.AI Speech data preparation."""
26 |     prepare_bengaliai_speech(
27 |         corpus_dir=corpus_dir,
28 |         output_dir=output_dir,
29 |         num_jobs=num_jobs,
30 |     )
31 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/broadcast_news.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes.broadcast_news import prepare_broadcast_news
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command()
 9 | @click.argument("audio_dir", type=click.Path(exists=True, file_okay=False))
10 | @click.argument("transcript_dir", type=click.Path(exists=True, file_okay=False))
11 | @click.argument("output_dir", type=click.Path())
12 | def broadcast_news(audio_dir: Pathlike, transcript_dir: Pathlike, output_dir: Pathlike):
13 |     """
14 |     English Broadcast News 1997 data preparation.
15 |     It will output three manifests: for recordings, topic sections, and speech segments.
16 |     It supports the following LDC distributions:
17 | 
18 |     \b
19 |     * 1997 English Broadcast News Train (HUB4)
20 |         Speech       LDC98S71
21 |         Transcripts  LDC98T28
22 | 
23 |     This data is not available for free - your institution needs to have an LDC subscription.
24 |     """
25 |     prepare_broadcast_news(
26 |         audio_dir=audio_dir, transcripts_dir=transcript_dir, output_dir=output_dir
27 |     )
28 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/but_reverb_db.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes import download_but_reverb_db, prepare_but_reverb_db
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["but_reverb_db"]
10 | 
11 | 
12 | @prepare.command()
13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
14 | @click.argument("output_dir", type=click.Path())
15 | @click.option(
16 |     "--parts",
17 |     "-p",
18 |     type=str,
19 |     multiple=True,
20 |     default=["silence", "rir"],
21 |     show_default=True,
22 |     help="Parts to prepare.",
23 | )
24 | def but_reverb_db(
25 |     corpus_dir: Pathlike, output_dir: Pathlike, parts: Union[str, Sequence[str]]
26 | ):
27 |     """BUT Reverb DB data preparation."""
28 |     prepare_but_reverb_db(corpus_dir, output_dir=output_dir, parts=parts)
29 | 
30 | 
31 | @download.command()
32 | @click.argument("target_dir", type=click.Path())
33 | @click.option(
34 |     "--force-download",
35 |     type=bool,
36 |     default=False,
37 |     help="If True, download even if file is present.",
38 | )
39 | def but_reverb_db(target_dir: Pathlike, force_download: bool):
40 |     """BUT Reverb DB download."""
41 |     download_but_reverb_db(target_dir, force_download=force_download)
42 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/bvcc.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes import download_bvcc, prepare_bvcc
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["bvcc"]
 8 | 
 9 | 
10 | @prepare.command()
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option("-nj", "--num_jobs", type=int, default=1)
14 | def bvcc(corpus_dir: Pathlike, output_dir: Pathlike, num_jobs):
15 |     """BVCC data preparation.
16 | 
17 |     CORPUS_DIR should contain the following dir structure
18 | 
19 |         ./phase1-main/README
20 |         ./phase1-main/DATA/sets/*
21 |         ./phase1-main/DATA/wav/*
22 |         ...
23 | 
24 |         ./phase1-ood/README
25 |         ./phase1-ood/DATA/sets/
26 |         ./phase1-ood/DATA/wav/
27 |         ...
28 | 
29 |     Check the READMEs for details.
30 | 
31 |     See 'lhotse download bvcc' for links to instructions how to obtain the corpus.
32 |     """
33 |     prepare_bvcc(corpus_dir, output_dir=output_dir, num_jobs=num_jobs)
34 | 
35 | 
36 | @download.command()
37 | def bvcc():
38 |     """BVCC/VoiceMOS challange data cannot be downloaded.
39 | 
40 |     See info and instructions how to obtain BVCC dataset used for VoiceMOS challange:
41 |     - https://arxiv.org/abs/2105.02373
42 |     - https://nii-yamagishilab.github.io/ecooper-demo/VoiceMOS2022/index.html
43 |     - https://codalab.lisn.upsaclay.fr/competitions/695
44 |     """
45 |     download_bvcc(
46 |         target_dir="Not needed - just prints the docstring. Hopefully the license will be lifted."
47 |     )
48 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/callhome_egyptian.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes import prepare_callhome_egyptian
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command(context_settings=dict(show_default=True))
 9 | @click.argument("audio-dir", type=click.Path(exists=True, file_okay=False))
10 | @click.argument("transcript-dir", type=click.Path(exists=True, file_okay=False))
11 | @click.argument("output-dir", type=click.Path())
12 | @click.option(
13 |     "--absolute-paths",
14 |     default=False,
15 |     help="Whether to return absolute or relative (to the corpus dir) paths for recordings.",
16 | )
17 | def callhome_egyptian(
18 |     audio_dir: Pathlike,
19 |     transcript_dir: Pathlike,
20 |     output_dir: Pathlike,
21 |     absolute_paths: bool,
22 | ):
23 |     """
24 |     About the Callhome Egyptian Arabic Corpus
25 | 
26 |     The CALLHOME Egyptian Arabic corpus of telephone speech consists of 120 unscripted
27 |     telephone conversations between native speakers of Egyptian Colloquial Arabic (ECA),
28 |     the spoken variety of Arabic found in Egypt. The dialect of ECA that this
29 |     dictionary represents is Cairene Arabic.
30 | 
31 |     This recipe uses the speech and transcripts available through LDC. In addition,
32 |     an Egyptian arabic phonetic lexicon (available via LDC) is used to get word to
33 |     phoneme mappings for the vocabulary. This datasets are:
34 | 
35 |     Speech : LDC97S45
36 |     Transcripts : LDC97T19
37 |     Lexicon : LDC99L22 (unused here)
38 | 
39 |     To actually read the audio, you will need the SPH2PIPE binary: you can provide its path,
40 |     so that we will add it in the manifests (otherwise you might need to modify your PATH
41 |     environment variable to find sph2pipe).
42 |     """
43 |     prepare_callhome_egyptian(
44 |         audio_dir=audio_dir,
45 |         transcript_dir=transcript_dir,
46 |         output_dir=output_dir,
47 |         absolute_paths=absolute_paths,
48 |     )
49 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/cdsd.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes.cdsd import prepare_cdsd
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["cdsd"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | def cdsd(corpus_dir: Pathlike, output_dir: Pathlike):
14 |     """CDSD ASR data preparation."""
15 |     prepare_cdsd(corpus_dir, output_dir=output_dir)
16 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/cmu_arctic.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.cmu_arctic import download_cmu_arctic, prepare_cmu_arctic
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["cmu_arctic"]
 8 | 
 9 | 
10 | @prepare.command()
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | def cmu_arctic(corpus_dir: Pathlike, output_dir: Pathlike):
14 |     """CMU Arctic data preparation."""
15 |     prepare_cmu_arctic(corpus_dir, output_dir=output_dir)
16 | 
17 | 
18 | @download.command()
19 | @click.argument("target_dir", type=click.Path())
20 | def cmu_arctic(target_dir: Pathlike):
21 |     """CMU Arctic download."""
22 |     download_cmu_arctic(target_dir)
23 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/cmu_indic.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.cmu_indic import download_cmu_indic, prepare_cmu_indic
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command()
 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
10 | @click.argument("output_dir", type=click.Path())
11 | def cmu_indic(corpus_dir: Pathlike, output_dir: Pathlike):
12 |     """CMU Indic data preparation."""
13 |     prepare_cmu_indic(corpus_dir, output_dir=output_dir)
14 | 
15 | 
16 | @download.command()
17 | @click.argument("target_dir", type=click.Path())
18 | def cmu_indic(target_dir: Pathlike):
19 |     """CMU Indic download."""
20 |     download_cmu_indic(target_dir)
21 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/cmu_kids.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.cmu_kids import prepare_cmu_kids
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["cmu_kids"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
14 | @click.argument("output_dir", type=click.Path())
15 | @click.option(
16 |     "--absolute-paths",
17 |     type=bool,
18 |     default=True,
19 |     help="Use absolute paths for recordings",
20 | )
21 | def cmu_kids(
22 |     corpus_dir: Pathlike,
23 |     output_dir: Pathlike,
24 |     absolute_paths: Optional[bool] = False,
25 | ):
26 |     """CMU Kids corpus data preparation."""
27 |     prepare_cmu_kids(
28 |         corpus_dir,
29 |         output_dir=output_dir,
30 |         absolute_paths=absolute_paths,
31 |     )
32 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/csj.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.csj import prepare_csj
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["csj"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
14 | @click.argument("manifest_dir", type=click.Path())
15 | @click.option(
16 |     "-t",
17 |     "--transcript-dir",
18 |     type=click.Path(),
19 |     default=None,
20 |     help=(
21 |         "Directory to save parsed transcripts in txt format, with "
22 |         "valid and eval sets created from the core and noncore datasets. "
23 |         "If not provided, this script will not create valid and eval "
24 |         "sets."
25 |     ),
26 | )
27 | @click.option(
28 |     "-p",
29 |     "--dataset-parts",
30 |     type=str,
31 |     default=None,
32 |     multiple=True,
33 |     help=(
34 |         "List of dataset parts to prepare. "
35 |         "To prepare multiple parts, pass each with `-p` "
36 |         "Example: `-p eval1 -p eval2`"
37 |     ),
38 | )
39 | @click.option(
40 |     "-j",
41 |     "--num-jobs",
42 |     type=int,
43 |     default=1,
44 |     help="How many threads to use (can give good speed-ups with slow disks).",
45 | )
46 | def csj(
47 |     corpus_dir: Pathlike,
48 |     manifest_dir: Pathlike,
49 |     dataset_parts: Union[str, Sequence[str]],
50 |     transcript_dir: Pathlike,
51 |     num_jobs: int,
52 | ):
53 |     "Prepare Corpus of Spontaneous Japanese"
54 | 
55 |     prepare_csj(
56 |         corpus_dir=corpus_dir,
57 |         manifest_dir=manifest_dir,
58 |         dataset_parts=dataset_parts,
59 |         transcript_dir=transcript_dir,
60 |         nj=num_jobs,
61 |     )
62 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/cslu_kids.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.cslu_kids import prepare_cslu_kids
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["cslu_kids"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
14 | @click.argument("output_dir", type=click.Path())
15 | @click.option(
16 |     "--absolute-paths",
17 |     type=bool,
18 |     default=True,
19 |     help="Use absolute paths for recordings",
20 | )
21 | @click.option(
22 |     "--normalize-text",
23 |     type=bool,
24 |     default=True,
25 |     help="Remove noise tags (<bn>, <bs>) from spontaneous speech transcripts",
26 | )
27 | def cslu_kids(
28 |     corpus_dir: Pathlike,
29 |     output_dir: Pathlike,
30 |     absolute_paths: Optional[bool] = False,
31 |     normalize_text: Optional[bool] = True,
32 | ):
33 |     """CSLU Kids corpus data preparation."""
34 |     prepare_cslu_kids(
35 |         corpus_dir,
36 |         output_dir=output_dir,
37 |         absolute_paths=absolute_paths,
38 |         normalize_text=normalize_text,
39 |     )
40 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/daily_talk.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.daily_talk import download_daily_talk, prepare_daily_talk
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command()
 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
10 | @click.argument("output_dir", type=click.Path())
11 | @click.option("--num-jobs", type=int, default=1, help="Number of parallel workers.")
12 | def daily_talk(corpus_dir: Pathlike, output_dir: Pathlike, num_jobs: int):
13 |     """
14 |     DailyTalk recording and supervision manifest preparation.
15 |     """
16 |     prepare_daily_talk(corpus_dir, output_dir, num_jobs=num_jobs)
17 | 
18 | 
19 | @download.command()
20 | @click.argument("target_dir", type=click.Path())
21 | @click.option("--force-download", is_flag=True, help="Force download.")
22 | def daily_talk(target_dir: Pathlike, force_download: bool = False):
23 |     """
24 |     Download DailyTalk dataset.
25 |     """
26 |     download_daily_talk(target_dir, force_download)
27 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/dihard3.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.dihard3 import prepare_dihard3
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["dihard3"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("output_dir", type=click.Path())
14 | @click.option("--dev", type=click.Path(exists=True, dir_okay=True))
15 | @click.option("--eval", type=click.Path(exists=True, dir_okay=True))
16 | @click.option(
17 |     "--uem/--no-uem",
18 |     default=True,
19 |     help="Specify whether or not to create UEM supervision",
20 | )
21 | @click.option(
22 |     "-j",
23 |     "--num-jobs",
24 |     type=int,
25 |     default=1,
26 |     help="Number of jobs to scan corpus directory for recordings.",
27 | )
28 | def dihard3(
29 |     output_dir: Pathlike,
30 |     dev: Optional[Pathlike],
31 |     eval: Optional[Pathlike],
32 |     uem: Optional[float] = True,
33 |     num_jobs: Optional[int] = 1,
34 | ):
35 |     """DIHARD3 data preparation."""
36 |     prepare_dihard3(
37 |         dev, eval, output_dir=output_dir, uem_manifest=uem, num_jobs=num_jobs
38 |     )
39 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/dipco.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.dipco import download_dipco, prepare_dipco
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["dipco"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "--mic",
15 |     type=click.Choice(["ihm", "mdm"], case_sensitive=False),
16 |     default="mdm",
17 |     help="DiPCo microphone setting.",
18 | )
19 | @click.option(
20 |     "--normalize-text",
21 |     type=click.Choice(["none", "upper", "kaldi"], case_sensitive=False),
22 |     default="kaldi",
23 |     help="Text normalization method.",
24 |     show_default=True,
25 | )
26 | @click.option(
27 |     "--use-chime7-offset",
28 |     is_flag=True,
29 |     default=False,
30 |     help="If True, offset session IDs (from CHiME-7 challenge).",
31 | )
32 | def dipco(
33 |     corpus_dir: Pathlike,
34 |     output_dir: Pathlike,
35 |     mic: str,
36 |     normalize_text: str,
37 |     use_chime7_offset: bool,
38 | ):
39 |     """DiPCo data preparation."""
40 |     prepare_dipco(
41 |         corpus_dir,
42 |         output_dir=output_dir,
43 |         mic=mic,
44 |         normalize_text=normalize_text,
45 |         use_chime7_offset=use_chime7_offset,
46 |     )
47 | 
48 | 
49 | @download.command(context_settings=dict(show_default=True))
50 | @click.argument("target_dir", type=click.Path())
51 | @click.option(
52 |     "--force-download",
53 |     type=bool,
54 |     default=False,
55 |     help="If True, download even if file is present.",
56 | )
57 | def dipco(
58 |     target_dir: Pathlike,
59 |     force_download: bool,
60 | ):
61 |     """DiPCo download."""
62 |     download_dipco(
63 |         target_dir,
64 |         force_download=force_download,
65 |     )
66 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/earnings21.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.earnings21 import download_earnings21, prepare_earnings21
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @download.command(context_settings=dict(show_default=True))
 9 | @click.argument("target_dir", type=click.Path())
10 | def earnings21(target_dir: Pathlike):
11 |     """Earnings21 dataset download."""
12 |     download_earnings21(target_dir)
13 | 
14 | 
15 | @prepare.command(context_settings=dict(show_default=True))
16 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
17 | @click.argument("output_dir", type=click.Path())
18 | @click.option(
19 |     "--normalize-text/--no-normalize-text", default=False, help="Normalize the text."
20 | )
21 | def earnings21(corpus_dir: Pathlike, output_dir: Pathlike, normalize_text: bool):
22 |     """Earnings21 data preparation."""
23 |     prepare_earnings21(corpus_dir, output_dir=output_dir, normalize_text=normalize_text)
24 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/earnings22.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.earnings22 import download_earnings22, prepare_earnings22
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @download.command(context_settings=dict(show_default=True))
 9 | def earnings22():
10 |     """Earnings22 dataset download."""
11 |     download_earnings22(None)
12 | 
13 | 
14 | @prepare.command(context_settings=dict(show_default=True))
15 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
16 | @click.argument("output_dir", type=click.Path())
17 | @click.option(
18 |     "--normalize-text/--no-normalize-text", default=False, help="Normalize the text."
19 | )
20 | def earnings22(corpus_dir: Pathlike, output_dir: Pathlike, normalize_text: bool):
21 |     """Earnings22 data preparation."""
22 |     prepare_earnings22(corpus_dir, output_dir=output_dir, normalize_text=normalize_text)
23 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/ears.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Sequence, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.ears import download_ears, prepare_ears
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "-j",
15 |     "--num-jobs",
16 |     type=int,
17 |     default=1,
18 |     help="How many threads to use (can give good speed-ups with slow disks).",
19 | )
20 | def ears(
21 |     corpus_dir: Pathlike,
22 |     output_dir: Optional[Pathlike] = None,
23 |     num_jobs: int = 1,
24 | ):
25 |     """EARS data preparation."""
26 |     prepare_ears(
27 |         corpus_dir=corpus_dir,
28 |         output_dir=output_dir,
29 |         num_jobs=num_jobs,
30 |     )
31 | 
32 | 
33 | @download.command(context_settings=dict(show_default=True))
34 | @click.argument("target_dir", type=click.Path())
35 | def ears(
36 |     target_dir: Pathlike,
37 | ):
38 |     """EARS data download."""
39 |     download_ears(
40 |         target_dir=target_dir,
41 |     )
42 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/edacc.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.edacc import download_edacc, prepare_edacc
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["edacc"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
14 | @click.argument("output_dir", type=click.Path())
15 | def edacc(corpus_dir: Pathlike, output_dir: Pathlike):
16 |     """The Edinburgh International Accents of English Corpus (EDACC) data preparation."""
17 |     prepare_edacc(corpus_dir, output_dir=output_dir)
18 | 
19 | 
20 | @download.command(context_settings=dict(show_default=True))
21 | @click.argument("target_dir", type=click.Path())
22 | def edacc(target_dir: Pathlike):
23 |     """The Edinburgh International Accents of English Corpus (EDACC) download."""
24 |     download_edacc(target_dir)
25 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/emilia.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes.emilia import prepare_emilia
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command(context_settings=dict(show_default=True))
 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
10 | @click.argument("output_dir", type=click.Path())
11 | @click.option(
12 |     "-l",
13 |     "--lang",
14 |     type=str,
15 |     help="The language to process. Valid values: zh, en, ja, ko, de, fr",
16 | )
17 | @click.option(
18 |     "-j",
19 |     "--num-jobs",
20 |     type=int,
21 |     default=1,
22 |     help="How many threads to use (can give good speed-ups with slow disks).",
23 | )
24 | def emilia(
25 |     corpus_dir: Pathlike,
26 |     output_dir: Pathlike,
27 |     lang: str,
28 |     num_jobs: int = 1,
29 | ):
30 |     """Prepare the Emilia corpus manifests."""
31 |     prepare_emilia(
32 |         corpus_dir=corpus_dir,
33 |         output_dir=output_dir,
34 |         lang=lang,
35 |         num_jobs=num_jobs,
36 |     )
37 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/eval2000.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes import prepare_eval2000
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus-dir", type=click.Path(exists=True, file_okay=False))
12 | @click.argument("output-dir", type=click.Path())
13 | @click.option(
14 |     "--transcript-dir",
15 |     type=click.Path(exists=True, file_okay=False),
16 |     default=None,
17 |     required=False,
18 | )
19 | @click.option(
20 |     "--absolute-paths",
21 |     default=False,
22 |     help="Whether to return absolute or relative (to the corpus dir) paths for recordings.",
23 | )
24 | def eval2000(
25 |     corpus_dir: Pathlike,
26 |     output_dir: Pathlike,
27 |     absolute_paths: bool,
28 |     transcript_dir: Optional[Pathlike] = None,
29 | ):
30 |     """
31 |     The Eval2000 corpus preparation.
32 | 
33 |     \b
34 |     This is conversational telephone speech collected as 2-channel, 8kHz-sampled data.
35 |     The catalog number LDC2002S09 for audio corpora and LDC2002T43 for transcripts.
36 | 
37 |     This data is not available for free - your institution needs to have an LDC subscription.
38 |     """
39 | 
40 |     prepare_eval2000(
41 |         corpus_dir=corpus_dir,
42 |         output_dir=output_dir,
43 |         absolute_paths=absolute_paths,
44 |         transcript_path=transcript_dir,
45 |     )
46 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/fisher_spanish.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes import prepare_fisher_spanish
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command(context_settings=dict(show_default=True))
 9 | @click.argument("audio-dir", type=click.Path(exists=True, file_okay=False))
10 | @click.argument("transcript-dir", type=click.Path(exists=True, file_okay=False))
11 | @click.argument("output-dir", type=click.Path())
12 | @click.option(
13 |     "--absolute-paths",
14 |     default=False,
15 |     help="Whether to return absolute or relative (to the corpus dir) paths for recordings.",
16 | )
17 | def fisher_spanish(
18 |     audio_dir: Pathlike,
19 |     transcript_dir: Pathlike,
20 |     output_dir: Pathlike,
21 |     absolute_paths: bool,
22 | ):
23 |     """
24 |     The Fisher Spanish corpus preparation.
25 | 
26 |     \b
27 |     This is conversational telephone speech collected as 2-channel μ-law, 8kHz-sampled data.
28 |     The catalog number LDC2010S01 for audio corpus and LDC2010T04 for transcripts.
29 | 
30 |     This data is not available for free - your institution needs to have an LDC subscription.
31 |     """
32 |     prepare_fisher_spanish(
33 |         audio_dir_path=audio_dir,
34 |         transcript_dir_path=transcript_dir,
35 |         output_dir=output_dir,
36 |         absolute_paths=absolute_paths,
37 |     )
38 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/fleurs.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Sequence, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.fleurs import download_fleurs, prepare_fleurs
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["fleurs"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
14 | @click.argument("output_dir", type=click.Path())
15 | @click.option(
16 |     "-j",
17 |     "--num-jobs",
18 |     type=int,
19 |     default=1,
20 |     help="How many threads to use (can give good speed-ups with slow disks).",
21 | )
22 | @click.option(
23 |     "-l",
24 |     "--lang",
25 |     multiple=True,
26 |     default=["all"],
27 |     help="Specify which languages to prepare, e.g., "
28 |     "        lhoste prepare librispeech mtedx_corpus data -l de -l fr -l es ",
29 | )
30 | def fleurs(
31 |     corpus_dir: Pathlike,
32 |     output_dir: Pathlike,
33 |     num_jobs: int,
34 |     lang: Optional[Union[str, Sequence[str]]],
35 | ):
36 |     """Fleurs ASR data preparation."""
37 |     prepare_fleurs(corpus_dir, output_dir=output_dir, num_jobs=num_jobs, languages=lang)
38 | 
39 | 
40 | @download.command(context_settings=dict(show_default=True))
41 | @click.argument("target_dir", type=click.Path())
42 | @click.option(
43 |     "-l",
44 |     "--lang",
45 |     multiple=True,
46 |     default=["all"],
47 |     help="Specify which languages to download, e.g., "
48 |     "        lhotse download fleurs . -l hi_in -l en_us "
49 |     "        lhotse download fleurs",
50 | )
51 | @click.option(
52 |     "--force-download",
53 |     type=bool,
54 |     is_flag=True,
55 |     default=False,
56 |     help="Specify whether to overwrite an existing archive",
57 | )
58 | def fleurs(
59 |     target_dir: Pathlike,
60 |     lang: Optional[Union[str, Sequence[str]]],
61 |     force_download: bool = False,
62 | ):
63 |     """FLEURS download."""
64 |     download_fleurs(
65 |         target_dir,
66 |         languages=lang,
67 |         force_download=force_download,
68 |     )
69 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/gale_arabic.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.gale_arabic import prepare_gale_arabic
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["gale_arabic"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("output_dir", type=click.Path())
14 | @click.option(
15 |     "-s",
16 |     "--audio",
17 |     type=click.Path(exists=True, dir_okay=True),
18 |     multiple=True,
19 |     help="Paths to audio dirs, e.g., LDC2013S02. Multiple corpora can be provided by repeating `-s`.",
20 | )
21 | @click.option(
22 |     "-t",
23 |     "--transcript",
24 |     type=click.Path(exists=True, dir_okay=True),
25 |     multiple=True,
26 |     help="Paths to transcript dirs, e.g., LDC2013T17. Multiple corpora can be provided by repeating `-t`",
27 | )
28 | @click.option(
29 |     "--absolute-paths",
30 |     type=bool,
31 |     default=False,
32 |     help="Use absolute paths for recordings",
33 | )
34 | def gale_arabic(
35 |     output_dir: Pathlike,
36 |     audio: Optional[List[Pathlike]] = None,
37 |     transcript: Optional[List[Pathlike]] = None,
38 |     absolute_paths: Optional[bool] = False,
39 | ):
40 |     """GALE Arabic Phases 1 to 4 Broadcast news and conversation data preparation."""
41 |     prepare_gale_arabic(
42 |         audio,
43 |         transcript,
44 |         output_dir=output_dir,
45 |         absolute_paths=absolute_paths,
46 |     )
47 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/gale_mandarin.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.gale_mandarin import prepare_gale_mandarin
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["gale_mandarin"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("output_dir", type=click.Path())
14 | @click.option(
15 |     "-s",
16 |     "--audio",
17 |     type=click.Path(exists=True, dir_okay=True),
18 |     multiple=True,
19 |     help="Paths to audio dirs, e.g., LDC2013S08. Multiple corpora can be provided by repeating `-s`.",
20 | )
21 | @click.option(
22 |     "-t",
23 |     "--transcript",
24 |     type=click.Path(exists=True, dir_okay=True),
25 |     multiple=True,
26 |     help="Paths to transcript dirs, e.g., LDC2013T20. Multiple corpora can be provided by repeating `-t`",
27 | )
28 | @click.option(
29 |     "--absolute-paths",
30 |     type=bool,
31 |     default=False,
32 |     help="Use absolute paths for recordings",
33 | )
34 | @click.option(
35 |     "--segment-words",
36 |     type=bool,
37 |     default=False,
38 |     help="Use 'jieba' package to perform word segmentation on the text",
39 | )
40 | def gale_mandarin(
41 |     output_dir: Pathlike,
42 |     audio: Optional[List[Pathlike]] = None,
43 |     transcript: Optional[List[Pathlike]] = None,
44 |     absolute_paths: Optional[bool] = False,
45 |     segment_words: Optional[bool] = False,
46 | ):
47 |     """GALE Mandarin Broadcast speech data preparation."""
48 |     prepare_gale_mandarin(
49 |         audio,
50 |         transcript,
51 |         output_dir=output_dir,
52 |         absolute_paths=absolute_paths,
53 |         segment_words=segment_words,
54 |     )
55 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/grid.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes import download_grid, prepare_grid
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command(context_settings=dict(show_default=True))
 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
10 | @click.argument("output_dir", type=click.Path())
11 | @click.option(
12 |     "--with-supervisions/--no-supervisions",
13 |     default=True,
14 |     help="Note: using supervisions might discard some recordings that do not have them.",
15 | )
16 | @click.option("-j", "--jobs", default=1, type=int, help="The number of parallel jobs.")
17 | def grid(
18 |     corpus_dir: Pathlike,
19 |     output_dir: Pathlike,
20 |     with_supervisions: bool,
21 |     jobs: int,
22 | ):
23 |     """Grid audio-visual speech corpus preparation."""
24 |     prepare_grid(
25 |         corpus_dir,
26 |         output_dir=output_dir,
27 |         with_supervisions=with_supervisions,
28 |         num_jobs=jobs,
29 |     )
30 | 
31 | 
32 | @download.command(context_settings=dict(show_default=True))
33 | @click.argument("target_dir", type=click.Path())
34 | def grid(
35 |     target_dir: Pathlike,
36 | ):
37 |     """Grid audio-visual speech corpus download."""
38 |     download_grid(target_dir)
39 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/heroico.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.heroico import download_heroico, prepare_heroico
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["heroico"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("speech_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("transcript_dir", type=click.Path(exists=True, dir_okay=True))
13 | @click.argument("output_dir", type=click.Path())
14 | def heroico(speech_dir: Pathlike, transcript_dir: Pathlike, output_dir: Pathlike):
15 |     """heroico Answers ASR data preparation."""
16 |     prepare_heroico(speech_dir, transcript_dir, output_dir)
17 | 
18 | 
19 | @download.command(context_settings=dict(show_default=True))
20 | @click.argument("target_dir", type=click.Path())
21 | def heroico(target_dir: Pathlike):
22 |     """heroico download."""
23 |     download_heroico(target_dir)
24 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/hifitts.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes import download_hifitts, prepare_hifitts
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command(context_settings=dict(show_default=True))
 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
10 | @click.argument("output_dir", type=click.Path())
11 | @click.option(
12 |     "-j",
13 |     "--num-jobs",
14 |     type=int,
15 |     default=1,
16 |     help="How many jobs to use (can give good speed-ups with slow disks).",
17 | )
18 | def hifitts(corpus_dir: Pathlike, output_dir: Pathlike, num_jobs: int):
19 |     """HiFiTTS data preparation."""
20 |     prepare_hifitts(corpus_dir, output_dir=output_dir, num_jobs=num_jobs)
21 | 
22 | 
23 | @download.command(context_settings=dict(show_default=True))
24 | @click.argument("target_dir", type=click.Path())
25 | def hifitts(
26 |     target_dir: Pathlike,
27 | ):
28 |     """HiFiTTS data download."""
29 |     download_hifitts(target_dir)
30 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/himia.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.himia import download_himia, prepare_himia
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["himia"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
14 | @click.argument("output_dir", type=click.Path())
15 | @click.option(
16 |     "-p",
17 |     "--dataset-parts",
18 |     type=str,
19 |     default=["auto"],
20 |     multiple=True,
21 |     help="List of dataset parts to prepare. To prepare multiple parts, pass each with `-p` "
22 |     "Example: `-p test -p cw_test` "
23 |     "Prepare both HI_MIA and HI_MIA_CW by default "
24 |     "All possible data parts are train, dev, test and cw_test",
25 | )
26 | def himia(
27 |     corpus_dir: Pathlike,
28 |     output_dir: Pathlike,
29 |     dataset_parts: Sequence[str],
30 | ):
31 |     """HI_MIA and HI_MIA_CW data preparation."""
32 |     if len(dataset_parts) == 1:
33 |         dataset_parts = dataset_parts[0]
34 |     prepare_himia(
35 |         corpus_dir=corpus_dir,
36 |         output_dir=output_dir,
37 |         dataset_parts=dataset_parts,
38 |     )
39 | 
40 | 
41 | @download.command(context_settings=dict(show_default=True))
42 | @click.argument("target_dir", type=click.Path())
43 | @click.option(
44 |     "-p",
45 |     "--dataset-parts",
46 |     type=str,
47 |     default=["auto"],
48 |     multiple=True,
49 |     help="List of dataset parts to download. To download multiple parts, pass each with `-p` "
50 |     "Example: `-p test -p cw_test` "
51 |     "Download both HI_MIA and HI_MIA_CW by default "
52 |     "All possible data parts are train, dev, test and cw_test",
53 | )
54 | def himia(
55 |     target_dir: Pathlike,
56 |     dataset_parts: Sequence[str],
57 | ):
58 |     """HI-MIA and HI_MIA_CW download."""
59 |     if len(dataset_parts) == 1:
60 |         dataset_parts = dataset_parts[0]
61 |     download_himia(target_dir, dataset_parts=dataset_parts)
62 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/icmcasr.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Optional, Tuple, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.icmcasr import prepare_icmcasr
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "-j",
15 |     "--num-jobs",
16 |     type=int,
17 |     default=1,
18 |     help="How many threads to use (can give good speed-ups with slow disks).",
19 | )
20 | @click.option(
21 |     "--mic",
22 |     type=click.Choice(["ihm", "sdm", "mdm"]),
23 |     default="ihm",
24 |     help="Microphone type.",
25 | )
26 | def icmcasr(
27 |     corpus_dir: Pathlike,
28 |     output_dir: Optional[Pathlike] = None,
29 |     mic: str = "ihm",
30 |     num_jobs: int = 1,
31 | ):
32 |     """ICMC-ASR data preparation."""
33 |     prepare_icmcasr(
34 |         corpus_dir=corpus_dir,
35 |         output_dir=output_dir,
36 |         mic=mic,
37 |         num_jobs=num_jobs,
38 |     )
39 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/iwslt22_ta.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Sequence, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.iwslt22_ta import prepare_iwslt22_ta
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("splits", type=click.Path(exists=True, dir_okay=True))
13 | @click.argument("output_dir", type=click.Path())
14 | @click.option(
15 |     "-j",
16 |     "--num-jobs",
17 |     type=int,
18 |     default=1,
19 |     help="How many threads to use (can give good speed-ups with slow disks).",
20 | )
21 | @click.option(
22 |     "--normalize-text",
23 |     default=False,
24 |     help="Whether to perform additional text cleaning and normalization from https://aclanthology.org/2022.iwslt-1.29.pdf.",
25 | )
26 | @click.option(
27 |     "--langs",
28 |     default="",
29 |     help="Comma-separated list of language abbreviations for source and target languages",
30 | )
31 | def iwslt22_ta(
32 |     corpus_dir: Pathlike,
33 |     splits: Pathlike,
34 |     output_dir: Pathlike,
35 |     normalize_text: bool,
36 |     langs: str,
37 |     num_jobs: int,
38 | ):
39 |     """
40 |     IWSLT_2022 data preparation.
41 |     \b
42 |     This is conversational telephone speech collected as 8kHz-sampled data.
43 |     The catalog number LDC2022E01 corresponds to the train, dev, and test1
44 |     splits of the iwslt2022 shared task.
45 |     To obtaining this data your institution needs to have an LDC subscription.
46 |     You also should download the predined splits with
47 |     git clone https://github.com/kevinduh/iwslt22-dialect.git
48 |     """
49 |     langs_list = langs.split(",")
50 |     prepare_iwslt22_ta(
51 |         corpus_dir,
52 |         splits,
53 |         output_dir=output_dir,
54 |         num_jobs=num_jobs,
55 |         clean=normalize_text,
56 |         langs=langs_list,
57 |     )
58 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/kespeech.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.kespeech import prepare_kespeech
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "-p",
15 |     "--dataset-parts",
16 |     type=str,
17 |     default=["all"],
18 |     multiple=True,
19 |     help="List of dataset parts to prepare. To prepare multiple parts,"
20 |     "pass each with `-p` Example: `-p dev_phase1 -p dev_phase2`",
21 | )
22 | @click.option(
23 |     "-j",
24 |     "--num-jobs",
25 |     type=int,
26 |     default=1,
27 |     help="How many threads to use (can give good speed-ups with slow disks).",
28 | )
29 | def kespeech(
30 |     corpus_dir: Pathlike,
31 |     output_dir: Pathlike,
32 |     dataset_parts: Sequence[str],
33 |     num_jobs: int,
34 | ):
35 |     """
36 |     The KeSpeech corpus preparation.
37 |     """
38 |     prepare_kespeech(
39 |         corpus_dir,
40 |         output_dir=output_dir,
41 |         num_jobs=num_jobs,
42 |         dataset_parts=dataset_parts,
43 |     )
44 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/ksponspeech.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.ksponspeech import prepare_ksponspeech
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["ksponspeech"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
14 | @click.argument("output_dir", type=click.Path())
15 | @click.option(
16 |     "-p",
17 |     "--dataset-parts",
18 |     type=str,
19 |     default=["all"],
20 |     multiple=True,
21 |     help="List of dataset parts to prepare. To prepare multiple parts, pass each with `-p` "
22 |     "Example: `-p train -p test`",
23 | )
24 | @click.option(
25 |     "-j",
26 |     "--num-jobs",
27 |     type=int,
28 |     default=1,
29 |     help="How many threads to use (can give good speed-ups with slow disks).",
30 | )
31 | @click.option(
32 |     "--normalize-text",
33 |     type=click.Choice(["none", "default"], case_sensitive=False),
34 |     default="default",
35 |     help="Type of text normalization to apply.",
36 | )
37 | def ksponspeech(
38 |     corpus_dir: Pathlike,
39 |     output_dir: Pathlike,
40 |     dataset_parts: Sequence[str],
41 |     num_jobs: int,
42 |     normalize_text: str,
43 | ):
44 |     """KsponSpeech ASR data preparation."""
45 |     if len(dataset_parts) == 1:
46 |         dataset_parts = dataset_parts[0]
47 |     prepare_ksponspeech(
48 |         corpus_dir,
49 |         output_dir=output_dir,
50 |         num_jobs=num_jobs,
51 |         dataset_parts=dataset_parts,
52 |         normalize_text=normalize_text,
53 |     )
54 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/l2_arctic.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes.l2_arctic import prepare_l2_arctic
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["l2_arctic"]
 8 | 
 9 | 
10 | @prepare.command()
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | def l2_arctic(corpus_dir: Pathlike, output_dir: Pathlike):
14 |     """L2 Arctic data preparation."""
15 |     prepare_l2_arctic(corpus_dir, output_dir=output_dir)
16 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/libricss.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.libricss import download_libricss, prepare_libricss
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command()
 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
10 | @click.argument("output_dir", type=click.Path())
11 | @click.option(
12 |     "--type",
13 |     type=click.Choice(["ihm", "ihm-mix", "sdm", "mdm"]),
14 |     default="mdm",
15 |     help="Type of the corpus to prepare",
16 |     show_default=True,
17 | )
18 | @click.option(
19 |     "--segmented/--no-segmented",
20 |     default=False,
21 |     help="If True, the manifest will contain Cuts corresponding to 1-minute segments.",
22 |     show_default=True,
23 | )
24 | def libricss(corpus_dir: Pathlike, output_dir: Pathlike, type: str, segmented: bool):
25 |     """
26 |     LibriCSS recording and supervision manifest preparation.
27 |     """
28 |     prepare_libricss(corpus_dir, output_dir, type=type, segmented_cuts=segmented)
29 | 
30 | 
31 | @download.command()
32 | @click.argument("target_dir", type=click.Path())
33 | @click.option("--force-download", is_flag=True, help="Force download")
34 | def libricss(target_dir: Pathlike, force_download: bool = False):
35 |     """
36 |     Download LibriCSS dataset.
37 |     """
38 |     download_libricss(target_dir, force_download)
39 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/librilight.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Optional, Tuple, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.librilight import prepare_librilight
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "-j",
15 |     "--num-jobs",
16 |     type=int,
17 |     default=1,
18 |     help="How many threads to use (can give good speed-ups with slow disks).",
19 | )
20 | def librilight(
21 |     corpus_dir: Pathlike,
22 |     output_dir: Optional[Pathlike] = None,
23 |     num_jobs: int = 1,
24 | ):
25 |     """LibriLight data preparation."""
26 |     prepare_librilight(
27 |         corpus_dir=corpus_dir,
28 |         output_dir=output_dir,
29 |         num_jobs=num_jobs,
30 |     )
31 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/librimix.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.librimix import download_librimix, prepare_librimix
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["librimix"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("librimix-csv", type=click.Path(exists=True, dir_okay=False))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "--sampling-rate",
15 |     type=int,
16 |     default=16000,
17 |     help="Sampling rate to set in the RecordingSet manifest.",
18 | )
19 | @click.option(
20 |     "--min-segment-seconds",
21 |     type=float,
22 |     default=3.0,
23 |     help="Remove segments shorter than MIN_SEGMENT_SECONDS.",
24 | )
25 | @click.option(
26 |     "--with-precomputed-mixtures/--no-precomputed-mixtures",
27 |     type=bool,
28 |     default=False,
29 |     help="Optionally create an RecordingSet manifest including the precomputed LibriMix mixtures.",
30 | )
31 | def librimix(
32 |     librimix_csv: Pathlike,
33 |     output_dir: Pathlike,
34 |     sampling_rate: int,
35 |     min_segment_seconds: float,
36 |     with_precomputed_mixtures: bool,
37 | ):
38 |     """LibrMix source separation data preparation."""
39 |     prepare_librimix(
40 |         librimix_csv=librimix_csv,
41 |         output_dir=output_dir,
42 |         sampling_rate=sampling_rate,
43 |         min_segment_seconds=min_segment_seconds,
44 |         with_precomputed_mixtures=with_precomputed_mixtures,
45 |     )
46 | 
47 | 
48 | @download.command(context_settings=dict(show_default=True))
49 | @click.argument("target_dir", type=click.Path())
50 | def librimix(target_dir: Pathlike):
51 |     """Mini LibriMix download."""
52 |     download_librimix(target_dir)
53 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/ljspeech.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.ljspeech import download_ljspeech, prepare_ljspeech
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["ljspeech"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | def ljspeech(
14 |     corpus_dir: Pathlike,
15 |     output_dir: Pathlike,
16 | ):
17 |     """LJSpeech data preparation."""
18 |     prepare_ljspeech(corpus_dir, output_dir=output_dir)
19 | 
20 | 
21 | @download.command(context_settings=dict(show_default=True))
22 | @click.argument("target_dir", type=click.Path(), default=".")
23 | def ljspeech(target_dir: Pathlike):
24 |     """LJSpeech download."""
25 |     download_ljspeech(target_dir)
26 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/magicdata.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.magicdata import download_magicdata, prepare_magicdata
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["magicdata"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | def magicdata(corpus_dir: Pathlike, output_dir: Pathlike):
14 |     """Magicdata ASR data preparation."""
15 |     prepare_magicdata(corpus_dir, output_dir=output_dir)
16 | 
17 | 
18 | @download.command(context_settings=dict(show_default=True))
19 | @click.argument("target_dir", type=click.Path())
20 | def magicdata(target_dir: Pathlike):
21 |     """Magicdata download."""
22 |     download_magicdata(target_dir)
23 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/mdcc.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Sequence
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.mdcc import download_mdcc, prepare_mdcc
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "-p",
15 |     "--dataset-parts",
16 |     type=str,
17 |     default=["all"],
18 |     multiple=True,
19 |     help="List of dataset parts to prepare. To prepare multiple parts, pass each with `-p` "
20 |     "Example: `-p train -p valid`",
21 | )
22 | def MDCC(
23 |     corpus_dir: Pathlike,
24 |     dataset_parts: Sequence[str],
25 |     output_dir: Optional[Pathlike] = None,
26 | ):
27 |     """MDCC data preparation."""
28 |     prepare_mdcc(
29 |         corpus_dir=corpus_dir,
30 |         dataset_parts=dataset_parts,
31 |         output_dir=output_dir,
32 |     )
33 | 
34 | 
35 | @download.command(context_settings=dict(show_default=True))
36 | @click.argument("target_dir", type=click.Path())
37 | @click.option(
38 |     "--force-download",
39 |     is_flag=True,
40 |     default=False,
41 |     help="if True, it will download the MDCC data even if it is already present.",
42 | )
43 | def MDCC(
44 |     target_dir: Pathlike,
45 |     force_download: Optional[bool] = False,
46 | ):
47 |     """MDCC download."""
48 |     download_mdcc(
49 |         target_dir=target_dir,
50 |         force_download=force_download,
51 |     )
52 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/medical.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Optional, Tuple, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.medical import download_medical, prepare_medical
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "-j",
15 |     "--num-jobs",
16 |     type=int,
17 |     default=1,
18 |     help="How many threads to use (can give good speed-ups with slow disks).",
19 | )
20 | def medical(
21 |     corpus_dir: Pathlike,
22 |     output_dir: Optional[Pathlike] = None,
23 |     num_jobs: int = 1,
24 | ):
25 |     """Medical data preparation."""
26 |     prepare_medical(
27 |         corpus_dir=corpus_dir,
28 |         output_dir=output_dir,
29 |         num_jobs=num_jobs,
30 |     )
31 | 
32 | 
33 | @download.command(context_settings=dict(show_default=True))
34 | @click.argument("target_dir", type=click.Path())
35 | @click.option("--force-download", is_flag=True, default=False, help="Force download")
36 | def medical(
37 |     target_dir: Pathlike,
38 |     force_download: Optional[bool] = False,
39 | ):
40 |     """Medical download."""
41 |     download_medical(
42 |         target_dir=target_dir,
43 |         force_download=force_download,
44 |     )
45 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/mgb2.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes.mgb2 import prepare_mgb2
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["mgb2"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "--text-cleaning/--no-text-cleaning", default=True, help="Basic text cleaning."
15 | )
16 | @click.option(
17 |     "--buck-walter/--no-buck-walter",
18 |     default=False,
19 |     help="Use BuckWalter transliteration.",
20 | )
21 | @click.option(
22 |     "-j",
23 |     "--num-jobs",
24 |     type=int,
25 |     default=1,
26 |     help="How many threads to use (can give good speed-ups with slow disks).",
27 | )
28 | @click.option(
29 |     "--mer-thresh",
30 |     default=80,
31 |     help="filter out segments based on mer (Match Error Rate).",
32 | )
33 | def mgb2(
34 |     corpus_dir: Pathlike,
35 |     output_dir: Pathlike,
36 |     text_cleaning: bool,
37 |     buck_walter: bool,
38 |     num_jobs: int,
39 |     mer_thresh: int,
40 | ):
41 |     """mgb2 ASR data preparation."""
42 |     prepare_mgb2(
43 |         corpus_dir,
44 |         output_dir,
45 |         text_cleaning=text_cleaning,
46 |         buck_walter=buck_walter,
47 |         num_jobs=num_jobs,
48 |         mer_thresh=mer_thresh,
49 |     )
50 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/mls.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes.mls import prepare_mls
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["mls"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "--opus/--flac",
15 |     type=bool,
16 |     default=True,
17 |     help="Which codec should be used (OPUS or FLAC)",
18 | )
19 | @click.option(
20 |     "-j",
21 |     "--num-jobs",
22 |     type=int,
23 |     default=1,
24 |     help="How many threads to use (can give good speed-ups with slow disks).",
25 | )
26 | def mls(corpus_dir: Pathlike, output_dir: Pathlike, opus: bool, num_jobs: int):
27 |     """
28 |     Multilingual Librispeech (MLS) data preparation.
29 | 
30 |     Multilingual LibriSpeech (MLS) dataset is a large multilingual corpus suitable for speech research.
31 |     The dataset is derived from read audiobooks from LibriVox and consists of 8 languages -
32 |     English, German, Dutch, Spanish, French, Italian, Portuguese, Polish.
33 |     It is available at OpenSLR: http://openslr.org/94
34 |     """
35 |     prepare_mls(corpus_dir, opus=opus, output_dir=output_dir, num_jobs=num_jobs)
36 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/mtedx.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Sequence, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.mtedx import download_mtedx, prepare_mtedx
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["mtedx"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
14 | @click.argument("output_dir", type=click.Path())
15 | @click.option(
16 |     "-j",
17 |     "--num-jobs",
18 |     type=int,
19 |     default=1,
20 |     help="How many threads to use (can give good speed-ups with slow disks).",
21 | )
22 | @click.option(
23 |     "-l",
24 |     "--lang",
25 |     multiple=True,
26 |     default=["all"],
27 |     help="Specify which languages to prepare, e.g., "
28 |     "        lhoste prepare librispeech mtedx_corpus data -l de -l fr -l es ",
29 | )
30 | def mtedx(
31 |     corpus_dir: Pathlike,
32 |     output_dir: Pathlike,
33 |     num_jobs: int,
34 |     lang: Optional[Union[str, Sequence[str]]],
35 | ):
36 |     """MTEDx ASR data preparation."""
37 |     prepare_mtedx(corpus_dir, output_dir=output_dir, num_jobs=num_jobs, languages=lang)
38 | 
39 | 
40 | @download.command(context_settings=dict(show_default=True))
41 | @click.argument("target_dir", type=click.Path())
42 | @click.option(
43 |     "-l",
44 |     "--lang",
45 |     multiple=True,
46 |     default=["all"],
47 |     help="Specify which languages to download, e.g., "
48 |     "        lhoste download mtedx . -l de -l fr -l es "
49 |     "        lhoste download mtedx",
50 | )
51 | def mtedx(
52 |     target_dir: Pathlike,
53 |     lang: Optional[Union[str, Sequence[str]]],
54 | ):
55 |     """MTEDx download."""
56 |     download_mtedx(target_dir, languages=lang)
57 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/musan.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.musan import download_musan, prepare_musan
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["musan"]
 8 | 
 9 | 
10 | @prepare.command()
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "--use-vocals/--no-vocals",
15 |     default=True,
16 |     help='Whether to include vocal music in "music" part.',
17 | )
18 | def musan(corpus_dir: Pathlike, output_dir: Pathlike, use_vocals: bool):
19 |     """MUSAN data preparation."""
20 |     prepare_musan(corpus_dir, output_dir=output_dir, use_vocals=use_vocals)
21 | 
22 | 
23 | @download.command()
24 | @click.argument("target_dir", type=click.Path())
25 | def musan(target_dir: Pathlike):
26 |     """MUSAN download."""
27 |     download_musan(target_dir)
28 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/must_c.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.must_c import prepare_must_c
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option("--tgt-lang", type=str, help="The target language, e.g., zh, de, fr.")
14 | @click.option(
15 |     "-j",
16 |     "--num-jobs",
17 |     type=int,
18 |     default=1,
19 |     help="How many threads to use (can give good speed-ups with slow disks).",
20 | )
21 | def must_c(corpus_dir: Pathlike, output_dir: Pathlike, tgt_lang, num_jobs: int):
22 |     """MUST-C speech translation data preparation."""
23 |     logging.basicConfig(level=logging.INFO)
24 |     prepare_must_c(
25 |         corpus_dir,
26 |         output_dir=output_dir,
27 |         tgt_lang=tgt_lang,
28 |         num_jobs=num_jobs,
29 |     )
30 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/nsc.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes.nsc import NSC_PARTS, prepare_nsc
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command(context_settings=dict(show_default=True))
 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
10 | @click.argument("output_dir", type=click.Path())
11 | @click.option(
12 |     "-p",
13 |     "--dataset-part",
14 |     type=click.Choice(NSC_PARTS),
15 |     default="PART3_SameCloseMic",
16 |     help="Which part of NSC should be prepared",
17 | )
18 | @click.option(
19 |     "-j",
20 |     "--num-jobs",
21 |     type=int,
22 |     default=1,
23 |     help="How many threads to use (can give good speed-ups with slow disks).",
24 | )
25 | def nsc(corpus_dir: Pathlike, output_dir: Pathlike, dataset_part: str, num_jobs: int):
26 |     """
27 |     \b
28 |     This is a data preparation recipe for the National Corpus of Speech in Singaporean English.
29 |     CORPUS_DIR: root directory that contains all NSC shared folder. Eg.
30 |         ├── IMDA - National Speech Corpus
31 |         │   ├── LEXICON
32 |         │   ├── PART1
33 |         │   ├── PART2
34 |         │   └── PART3
35 |         ├── IMDA - National Speech Corpus - Additional
36 |         │   └── IMDA - National Speech Corpus (Additional)
37 |         │      ├── PART4
38 |         │      ├── PART5
39 |         │      └── PART6
40 |     """
41 |     prepare_nsc(
42 |         corpus_dir, dataset_part=dataset_part, output_dir=output_dir, num_jobs=num_jobs
43 |     )
44 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/peoples_speech.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes.peoples_speech import prepare_peoples_speech
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command(context_settings=dict(show_default=True))
 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
10 | @click.argument("output_dir", type=click.Path())
11 | @click.option(
12 |     "-j",
13 |     "--num-jobs",
14 |     type=int,
15 |     default=1,
16 |     help="How many threads to use (can give good speed-ups with slow disks).",
17 | )
18 | def peoples_speech(
19 |     corpus_dir: Pathlike,
20 |     output_dir: Pathlike,
21 |     num_jobs: int = 1,
22 | ):
23 |     """Prepare The People's Speech corpus manifests."""
24 |     prepare_peoples_speech(
25 |         corpus_dir=corpus_dir,
26 |         output_dir=output_dir,
27 |         num_jobs=num_jobs,
28 |     )
29 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/primewords.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.primewords import download_primewords, prepare_primewords
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["primewords"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | def primewords(corpus_dir: Pathlike, output_dir: Pathlike):
14 |     """Primewords ASR data preparation."""
15 |     prepare_primewords(corpus_dir, output_dir=output_dir)
16 | 
17 | 
18 | @download.command(context_settings=dict(show_default=True))
19 | @click.argument("target_dir", type=click.Path())
20 | def primewords(target_dir: Pathlike):
21 |     """Primewords download."""
22 |     download_primewords(target_dir)
23 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/radio.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional, Sequence, Tuple, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.radio import prepare_radio
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["radio"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("corpus_dir", type=click.Path(dir_okay=True))
14 | @click.argument("output_dir", type=click.Path(dir_okay=True))
15 | @click.option(
16 |     "-d",
17 |     "--min-seg-dur",
18 |     type=float,
19 |     default=0.5,
20 |     help="The minimum segment duration",
21 | )
22 | @click.option(
23 |     "-j",
24 |     "--num-jobs",
25 |     type=int,
26 |     default=4,
27 |     help="The number of parallel threads to use for data preparation",
28 | )
29 | def radio(
30 |     corpus_dir: Pathlike,
31 |     output_dir: Pathlike,
32 |     min_seg_dur: float = 0.5,
33 |     num_jobs: int = 4,
34 | ):
35 |     """Data preparation"""
36 |     prepare_radio(
37 |         corpus_dir,
38 |         output_dir=output_dir,
39 |         num_jobs=num_jobs,
40 |         min_segment_duration=min_seg_dur,
41 |     )
42 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/reazonspeech.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import List
 3 | 
 4 | import click
 5 | 
 6 | from lhotse.bin.modes import download, prepare
 7 | from lhotse.recipes.reazonspeech import (
 8 |     REAZONSPEECH,
 9 |     download_reazonspeech,
10 |     prepare_reazonspeech,
11 | )
12 | from lhotse.utils import Pathlike
13 | 
14 | __all__ = ["reazonspeech"]
15 | 
16 | 
17 | @prepare.command(context_settings=dict(show_default=True))
18 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
19 | @click.argument("output_dir", type=click.Path())
20 | @click.option(
21 |     "-j",
22 |     "--num-jobs",
23 |     type=int,
24 |     default=1,
25 |     help="How many threads to use (can give good speed-ups with slow disks).",
26 | )
27 | def reazonspeech(
28 |     corpus_dir: Pathlike,
29 |     output_dir: Pathlike,
30 |     num_jobs: int,
31 | ):
32 |     """ReazonSpeech ASR data preparation."""
33 |     logging.basicConfig(level=logging.INFO)
34 |     prepare_reazonspeech(corpus_dir, output_dir=output_dir, num_jobs=num_jobs)
35 | 
36 | 
37 | @download.command(context_settings=dict(show_default=True))
38 | @click.argument("target_dir", type=click.Path())
39 | @click.option(
40 |     "--subset",
41 |     type=click.Choice(("auto",) + REAZONSPEECH),
42 |     multiple=True,
43 |     default=["auto"],
44 |     help="List of dataset parts to prepare (default: small-v1). To prepare multiple parts, pass each with `--subset` "
45 |     "Example: `--subset all",
46 | )
47 | @click.option(
48 |     "-j",
49 |     "--num-jobs",
50 |     type=int,
51 |     default=1,
52 |     help="How many threads to use (can give good speed-ups with slow disks).",
53 | )
54 | def reazonspeech(target_dir: Pathlike, subset: List[str], num_jobs: int):
55 |     """ReazonSpeech download."""
56 |     logging.basicConfig(level=logging.INFO)
57 |     if "auto" in subset:
58 |         subset = "auto"
59 |     download_reazonspeech(target_dir, dataset_parts=subset, num_jobs=num_jobs)
60 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/rir_noise.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes import download_rir_noise, prepare_rir_noise
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["rir_noise"]
10 | 
11 | 
12 | @prepare.command()
13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
14 | @click.argument("output_dir", type=click.Path())
15 | @click.option(
16 |     "--parts",
17 |     "-p",
18 |     type=str,
19 |     multiple=True,
20 |     default=["point_noise", "iso_noise", "real_rir", "sim_rir"],
21 |     show_default=True,
22 |     help="Parts to prepare.",
23 | )
24 | def rir_noise(
25 |     corpus_dir: Pathlike, output_dir: Pathlike, parts: Union[str, Sequence[str]]
26 | ):
27 |     """RIRS and noises data preparation."""
28 |     prepare_rir_noise(corpus_dir, output_dir=output_dir, parts=parts)
29 | 
30 | 
31 | @download.command()
32 | @click.argument("target_dir", type=click.Path())
33 | def rir_noise(target_dir: Pathlike):
34 |     """RIRS and noises download."""
35 |     download_rir_noise(target_dir)
36 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/sbcsae.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Sequence
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.sbcsae import download_sbcsae, prepare_sbcsae
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["sbcsae"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
14 | @click.argument("output_dir", type=click.Path())
15 | @click.option(
16 |     "--geolocation",
17 |     type=bool,
18 |     is_flag=True,
19 |     default=False,
20 |     help="Include geographic coordinates of speakers' hometowns in the manifests.",
21 | )
22 | @click.option(
23 |     "--omit-realignments",
24 |     type=bool,
25 |     is_flag=True,
26 |     default=False,
27 |     help="Only output the original corpus segmentation without boundary improvements.",
28 | )
29 | def sbcsae(
30 |     corpus_dir: Pathlike,
31 |     output_dir: Pathlike,
32 |     geolocation: bool,
33 |     omit_realignments: bool,
34 | ):
35 |     """SBCSAE data preparation."""
36 |     prepare_sbcsae(
37 |         corpus_dir,
38 |         output_dir=output_dir,
39 |         geolocation=geolocation,
40 |         omit_realignments=omit_realignments,
41 |     )
42 | 
43 | 
44 | @download.command(context_settings=dict(show_default=True))
45 | @click.argument("target_dir", type=click.Path())
46 | @click.option(
47 |     "--force-download",
48 |     type=bool,
49 |     is_flag=True,
50 |     default=False,
51 |     help="Force download.",
52 | )
53 | def sbcsae(
54 |     target_dir: Pathlike,
55 |     force_download: bool,
56 | ):
57 |     """SBCSAE download."""
58 |     download_sbcsae(target_dir, force_download=force_download)
59 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/slu.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional, Sequence, Tuple, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.slu import prepare_slu
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path())
12 | @click.argument("output_dir", type=click.Path())
13 | def slu(
14 |     corpus_dir: Pathlike,
15 |     output_dir: Pathlike,
16 | ):
17 |     prepare_slu(corpus_dir=corpus_dir, output_dir=output_dir)
18 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/speechcommands.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Optional, Tuple, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.speechcommands import (
 7 |     download_speechcommands,
 8 |     prepare_speechcommands,
 9 | )
10 | from lhotse.utils import Pathlike
11 | 
12 | 
13 | @prepare.command(context_settings=dict(show_default=True))
14 | @click.argument("speechcommands_version", type=str)
15 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
16 | @click.argument("output_dir", type=click.Path())
17 | def speechcommands(
18 |     speechcommands_version: str,
19 |     corpus_dir: Pathlike,
20 |     output_dir: Optional[Pathlike] = None,
21 | ):
22 |     """Speech Commands v0.01 or v0.02 data preparation."""
23 |     prepare_speechcommands(
24 |         speechcommands_version=speechcommands_version,
25 |         corpus_dir=corpus_dir,
26 |         output_dir=output_dir,
27 |     )
28 | 
29 | 
30 | @download.command(context_settings=dict(show_default=True))
31 | @click.argument("speechcommands_version", type=str)
32 | @click.argument("target_dir", type=click.Path())
33 | @click.option("--force-download", is_flag=True, default=False, help="Force download")
34 | def speechcommands(
35 |     speechcommands_version: str,
36 |     target_dir: Pathlike,
37 |     force_download: Optional[bool] = False,
38 | ):
39 |     """Speech Commands v0.01 or v0.02 download."""
40 |     download_speechcommands(
41 |         speechcommands_version=speechcommands_version,
42 |         target_dir=target_dir,
43 |         force_download=force_download,
44 |     )
45 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/speechio.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Optional, Tuple, Union
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.speechio import prepare_speechio
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | def speechio(
14 |     corpus_dir: Pathlike,
15 |     output_dir: Optional[Pathlike] = None,
16 |     num_jobs: int = 1,
17 | ):
18 |     """SpeechIO data preparation. See https://github.com/SpeechColab/Leaderboard"""
19 |     prepare_speechio(
20 |         corpus_dir=corpus_dir,
21 |         output_dir=output_dir,
22 |     )
23 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/spgispeech.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.spgispeech import download_spgispeech, prepare_spgispeech
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["spgispeech"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "-j",
15 |     "--num-jobs",
16 |     type=int,
17 |     default=1,
18 |     help="How many threads to use (can give good speed-ups with slow disks).",
19 | )
20 | @click.option(
21 |     "--normalize-text/--no-normalize-text", default=True, help="Normalize the text."
22 | )
23 | def spgispeech(
24 |     corpus_dir: Pathlike,
25 |     output_dir: Pathlike,
26 |     num_jobs: int,
27 |     normalize_text: bool,
28 | ):
29 |     """SPGISpeech ASR data preparation."""
30 |     prepare_spgispeech(
31 |         corpus_dir,
32 |         output_dir,
33 |         num_jobs=num_jobs,
34 |         normalize_text=normalize_text,
35 |     )
36 | 
37 | 
38 | @download.command(context_settings=dict(show_default=True))
39 | @click.argument("target_dir", type=click.Path())
40 | def spgispeech(target_dir: Pathlike):
41 |     """SPGISpeech download."""
42 |     download_spgispeech(target_dir)
43 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/stcmds.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.stcmds import download_stcmds, prepare_stcmds
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["stcmds"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | def stcmds(corpus_dir: Pathlike, output_dir: Pathlike):
14 |     """Stcmds ASR data preparation."""
15 |     prepare_stcmds(corpus_dir, output_dir=output_dir)
16 | 
17 | 
18 | @download.command(context_settings=dict(show_default=True))
19 | @click.argument("target_dir", type=click.Path())
20 | def stcmds(target_dir: Pathlike):
21 |     """Stcmds download."""
22 |     download_stcmds(target_dir)
23 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/switchboard.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes import prepare_switchboard
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @prepare.command(context_settings=dict(show_default=True))
 9 | @click.argument("audio-dir", type=click.Path(exists=True, file_okay=False))
10 | @click.argument("output-dir", type=click.Path())
11 | @click.option("--transcript-dir", type=click.Path(exists=True, file_okay=False))
12 | @click.option(
13 |     "--sentiment-dir",
14 |     type=click.Path(exists=True, file_okay=False),
15 |     help="Optional path to LDC2020T14 package with sentiment annotations for SWBD.",
16 | )
17 | @click.option(
18 |     "--omit-silence/--retain-silence",
19 |     default=True,
20 |     help="Should the [silence] segments be kept.",
21 | )
22 | @click.option(
23 |     "--absolute-paths",
24 |     default=False,
25 |     help="Whether to return absolute or relative (to the corpus dir) paths for recordings.",
26 | )
27 | def switchboard(
28 |     audio_dir: Pathlike,
29 |     output_dir: Pathlike,
30 |     transcript_dir: Pathlike,
31 |     sentiment_dir: Pathlike,
32 |     omit_silence: bool,
33 |     absolute_paths: bool,
34 | ):
35 |     """
36 |     The Switchboard corpus preparation.
37 | 
38 |     \b
39 |     This is conversational telephone speech collected as 2-channel, 8kHz-sampled
40 |     data.  We are using just the Switchboard-1 Phase 1 training data.
41 |     The catalog number LDC97S62 (Switchboard-1 Release 2) corresponds, we believe,
42 |     to what we have.  We also use the Mississippi State transcriptions, which
43 |     we download separately from
44 |     http://www.isip.piconepress.com/projects/switchboard/releases/switchboard_word_alignments.tar.gz
45 | 
46 |     This data is not available for free - your institution needs to have an LDC subscription.
47 |     """
48 |     prepare_switchboard(
49 |         audio_dir=audio_dir,
50 |         transcripts_dir=transcript_dir,
51 |         sentiment_dir=sentiment_dir,
52 |         output_dir=output_dir,
53 |         omit_silence=omit_silence,
54 |         absolute_paths=absolute_paths,
55 |     )
56 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/tal_asr.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes.tal_asr import prepare_tal_asr
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["tal_asr"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | def tal_asr(corpus_dir: Pathlike, output_dir: Pathlike):
14 |     """Tal_asr ASR data preparation."""
15 |     prepare_tal_asr(corpus_dir, output_dir=output_dir)
16 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/tal_csasr.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import prepare
 4 | from lhotse.recipes.tal_csasr import prepare_tal_csasr
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["tal_csasr"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "-j",
15 |     "--num-jobs",
16 |     type=int,
17 |     default=1,
18 |     help="How many threads to use (can give good speed-ups with slow disks).",
19 | )
20 | def tal_csasr(corpus_dir: Pathlike, output_dir: Pathlike, num_jobs: int):
21 |     """Tal_csasr ASR data preparation."""
22 |     prepare_tal_csasr(corpus_dir, output_dir=output_dir, num_jobs=num_jobs)
23 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/tedlium.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.tedlium import TEDLIUM_PARTS, download_tedlium, prepare_tedlium
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command()
11 | @click.argument(
12 |     "tedlium_dir", type=click.Path(exists=True, dir_okay=True, file_okay=False)
13 | )
14 | @click.argument("output_dir", type=click.Path())
15 | @click.option(
16 |     "--parts",
17 |     "-p",
18 |     type=click.Choice(TEDLIUM_PARTS),
19 |     multiple=True,
20 |     default=list(TEDLIUM_PARTS),
21 |     help="Which parts of TED-LIUM v3 to prepare (by default all).",
22 | )
23 | @click.option(
24 |     "-j",
25 |     "--num-jobs",
26 |     type=int,
27 |     default=1,
28 |     help="How many threads to use (can give good speed-ups with slow disks).",
29 | )
30 | @click.option(
31 |     "--normalize-text",
32 |     type=click.Choice(["none", "upper", "kaldi"], case_sensitive=False),
33 |     default="none",
34 |     help="Type of text normalization to apply (no normalization, by default). "
35 |     "Selecting `kaldi` will remove <unk> tokens and join suffixes.",
36 | )
37 | def tedlium(
38 |     tedlium_dir: Pathlike,
39 |     output_dir: Pathlike,
40 |     parts: List[str],
41 |     num_jobs: int,
42 |     normalize_text: str,
43 | ):
44 |     """
45 |     TED-LIUM v3 recording and supervision manifest preparation.
46 |     """
47 |     prepare_tedlium(
48 |         tedlium_root=tedlium_dir,
49 |         output_dir=output_dir,
50 |         dataset_parts=parts,
51 |         num_jobs=num_jobs,
52 |         normalize_text=normalize_text,
53 |     )
54 | 
55 | 
56 | @download.command()
57 | @click.argument("target_dir", type=click.Path())
58 | def tedlium(target_dir: Pathlike):
59 |     """TED-LIUM v3 download (approx. 11GB)."""
60 |     download_tedlium(target_dir)
61 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/tedlium2.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import download, prepare
 6 | from lhotse.recipes.tedlium2 import TEDLIUM_PARTS, download_tedlium2, prepare_tedlium2
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command()
11 | @click.argument(
12 |     "tedlium_dir", type=click.Path(exists=True, dir_okay=True, file_okay=False)
13 | )
14 | @click.argument("output_dir", type=click.Path())
15 | @click.option(
16 |     "--parts",
17 |     "-p",
18 |     type=click.Choice(TEDLIUM_PARTS),
19 |     multiple=True,
20 |     default=list(TEDLIUM_PARTS),
21 |     help=f"Which parts of TED-LIUM v2 to prepare (by default all, i.e., {TEDLIUM_PARTS}).",
22 | )
23 | @click.option(
24 |     "-j",
25 |     "--num-jobs",
26 |     type=int,
27 |     default=1,
28 |     help="How many threads to use (can give good speed-ups with slow disks).",
29 | )
30 | @click.option(
31 |     "--normalize-text",
32 |     type=click.Choice(["none", "upper", "kaldi"], case_sensitive=False),
33 |     default="none",
34 |     help="Type of text normalization to apply (no normalization, by default). "
35 |     "Selecting `kaldi` will remove <unk> tokens and join suffixes.",
36 | )
37 | def tedlium2(
38 |     tedlium_dir: Pathlike,
39 |     output_dir: Pathlike,
40 |     parts: List[str],
41 |     num_jobs: int,
42 |     normalize_text: str,
43 | ):
44 |     """
45 |     TED-LIUM v2 recording and supervision manifest preparation.
46 |     """
47 |     prepare_tedlium2(
48 |         tedlium_root=tedlium_dir,
49 |         output_dir=output_dir,
50 |         dataset_parts=parts,
51 |         num_jobs=num_jobs,
52 |         normalize_text=normalize_text,
53 |     )
54 | 
55 | 
56 | @download.command()
57 | @click.argument("target_dir", type=click.Path())
58 | def tedlium(target_dir: Pathlike):
59 |     """TED-LIUM v2 download (approx. 35GB)."""
60 |     download_tedlium2(target_dir)
61 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/thchs_30.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.thchs_30 import download_thchs_30, prepare_thchs_30
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["thchs_30"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | def thchs_30(corpus_dir: Pathlike, output_dir: Pathlike):
14 |     """thchs_30 ASR data preparation."""
15 |     prepare_thchs_30(corpus_dir, output_dir=output_dir)
16 | 
17 | 
18 | @download.command(context_settings=dict(show_default=True))
19 | @click.argument("target_dir", type=click.Path())
20 | def thchs_30(target_dir: Pathlike):
21 |     """thchs_30 download."""
22 |     download_thchs_30(target_dir)
23 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/this_american_life.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.this_american_life import (
 5 |     download_this_american_life,
 6 |     prepare_this_american_life,
 7 | )
 8 | from lhotse.utils import Pathlike
 9 | 
10 | __all__ = ["this_american_life"]
11 | 
12 | 
13 | @download.command(context_settings=dict(show_default=True))
14 | @click.argument("target_dir", type=click.Path())
15 | @click.option(
16 |     "-f",
17 |     "--force-download",
18 |     is_flag=True,
19 |     default=False,
20 | )
21 | def this_american_life(target_dir: Pathlike, force_download: bool = False):
22 |     """This American Life dataset download."""
23 |     download_this_american_life(target_dir)
24 | 
25 | 
26 | @prepare.command(context_settings=dict(show_default=True))
27 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
28 | @click.argument("output_dir", type=click.Path())
29 | def this_american_life(corpus_dir: Pathlike, output_dir: Pathlike):
30 |     """This American Life data preparation."""
31 |     prepare_this_american_life(corpus_dir, output_dir=output_dir)
32 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/timit.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.timit import download_timit, prepare_timit
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["timit"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "-p",
15 |     "--num-phones",
16 |     type=int,
17 |     default=48,
18 |     help="The number of phones (60, 48 or 39) for modeling. "
19 |     "And 48 is regarded as the default value.",
20 | )
21 | @click.option(
22 |     "-j",
23 |     "--num-jobs",
24 |     type=int,
25 |     default=1,
26 |     help="How many threads to use (can give good speed-ups with slow disks).",
27 | )
28 | def timit(
29 |     corpus_dir: Pathlike,
30 |     output_dir: Pathlike,
31 |     num_phones: int,
32 |     num_jobs: int = 1,
33 | ):
34 |     """TIMIT data preparation.
35 |     :param corpus_dir: Pathlike, the path of the data dir.
36 |     :param output_dir: Pathlike, the path where to write and save the manifests.
37 |     """
38 |     prepare_timit(
39 |         corpus_dir,
40 |         output_dir=output_dir,
41 |         num_phones=num_phones,
42 |         num_jobs=num_jobs,
43 |     )
44 | 
45 | 
46 | @download.command(context_settings=dict(show_default=True))
47 | @click.argument("target_dir", type=click.Path())
48 | def timit(target_dir: Pathlike):
49 |     """TIMIT download."""
50 |     download_timit(target_dir)
51 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/uwb_atcc.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.uwb_atcc import download_uwb_atcc, prepare_uwb_atcc
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["uwb_atcc"]
 8 | 
 9 | 
10 | @download.command(context_settings=dict(show_default=True))
11 | @click.argument("target_dir", type=click.Path())
12 | def uwb_atcc(target_dir: Pathlike):
13 |     """UWB-ATCC download."""
14 |     download_uwb_atcc(target_dir)
15 | 
16 | 
17 | @prepare.command(context_settings=dict(show_default=True))
18 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
19 | @click.argument("output_dir", type=click.Path())
20 | @click.option("--silence-sym", type=str, default="")
21 | @click.option("--breath-sym", type=str, default="")
22 | @click.option("--noise-sym", type=str, default="")
23 | @click.option("--foreign-sym", type=str, default="<unk>")
24 | @click.option("--partial-sym", type=str, default="<unk>")
25 | @click.option("--unintelligble-sym", type=str, default="<unk>")
26 | @click.option("--unknown-sym", type=str, default="<unk>")
27 | def uwb_atcc(
28 |     corpus_dir: Pathlike,
29 |     output_dir: Pathlike,
30 |     silence_sym: str,
31 |     breath_sym: str,
32 |     noise_sym: str,
33 |     foreign_sym: str,
34 |     partial_sym: str,
35 |     unintelligble_sym: str,
36 |     unknown_sym: str,
37 | ):
38 |     """UWB-ATCC data preparation."""
39 |     prepare_uwb_atcc(
40 |         corpus_dir,
41 |         output_dir=output_dir,
42 |         silence_sym=silence_sym,
43 |         breath_sym=breath_sym,
44 |         noise_sym=noise_sym,
45 |         foreign_sym=foreign_sym,
46 |         partial_sym=partial_sym,
47 |         unintelligble_sym=unintelligble_sym,
48 |         unknown_sym=unknown_sym,
49 |     )
50 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/vctk.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes import download_vctk, prepare_vctk
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["vctk"]
 8 | 
 9 | 
10 | @prepare.command()
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option("--use-edinburgh-vctk-url", default=False)
14 | def vctk(corpus_dir: Pathlike, output_dir: Pathlike, use_edinburgh_vctk_url: bool):
15 |     """VCTK data preparation."""
16 |     prepare_vctk(
17 |         corpus_dir, output_dir=output_dir, use_edinburgh_vctk_url=use_edinburgh_vctk_url
18 |     )
19 | 
20 | 
21 | @download.command()
22 | @click.argument("target_dir", type=click.Path())
23 | @click.option("--use-edinburgh-vctk-url", default=False)
24 | def vctk(target_dir: Pathlike, use_edinburgh_vctk_url: bool):
25 |     """VCTK download."""
26 |     download_vctk(target_dir, use_edinburgh_vctk_url=use_edinburgh_vctk_url)
27 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/voxconverse.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.voxconverse import download_voxconverse, prepare_voxconverse
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @download.command(context_settings=dict(show_default=True))
 9 | @click.argument("target_dir", type=click.Path())
10 | @click.option("--force-download", is_flag=True, default=False, help="Force download")
11 | def voxconverse(target_dir: Pathlike, force_download=False):
12 |     """VoxConverse dataset download."""
13 |     download_voxconverse(target_dir, force_download=force_download)
14 | 
15 | 
16 | @prepare.command(context_settings=dict(show_default=True))
17 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
18 | @click.argument("output_dir", type=click.Path())
19 | @click.option(
20 |     "--split-test",
21 |     is_flag=True,
22 |     default=False,
23 |     help="Split test part into dev and test parts",
24 | )
25 | def voxconverse(corpus_dir: Pathlike, output_dir: Pathlike, split_test: bool = False):
26 |     """VoxConverse data preparation."""
27 |     prepare_voxconverse(corpus_dir, output_dir=output_dir, split_test=split_test)
28 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/wenet_speech.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes.wenet_speech import prepare_wenet_speech
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | @click.option(
14 |     "-p",
15 |     "--dataset-parts",
16 |     type=str,
17 |     default=["all"],
18 |     multiple=True,
19 |     help="List of dataset parts to prepare. To prepare multiple parts,"
20 |     "pass each with `-p` Example: `-p M -p TEST_NET`",
21 | )
22 | @click.option(
23 |     "-j",
24 |     "--num-jobs",
25 |     type=int,
26 |     default=1,
27 |     help="How many threads to use (can give good speed-ups with slow disks).",
28 | )
29 | def wenet_speech(
30 |     corpus_dir: Pathlike,
31 |     output_dir: Pathlike,
32 |     dataset_parts: Sequence[str],
33 |     num_jobs: int,
34 | ):
35 |     """
36 |     The WenetSpeech corpus preparation.
37 |     """
38 |     prepare_wenet_speech(
39 |         corpus_dir,
40 |         output_dir=output_dir,
41 |         num_jobs=num_jobs,
42 |         dataset_parts=dataset_parts,
43 |     )
44 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/wenetspeech4tts.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence
 2 | 
 3 | import click
 4 | 
 5 | from lhotse.bin.modes import prepare
 6 | from lhotse.recipes import prepare_wenetspeech4tts
 7 | from lhotse.utils import Pathlike
 8 | 
 9 | __all__ = ["wenetspeech4tts"]
10 | 
11 | 
12 | @prepare.command(context_settings=dict(show_default=True))
13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
14 | @click.argument("output_dir", type=click.Path())
15 | @click.option(
16 |     "-j",
17 |     "--num-jobs",
18 |     type=int,
19 |     default=1,
20 |     help="How many jobs to use (can give good speed-ups with slow disks).",
21 | )
22 | @click.option(
23 |     "-p",
24 |     "--dataset-parts",
25 |     type=str,
26 |     default=["all"],
27 |     multiple=True,
28 |     help="List of dataset parts to prepare. To prepare multiple parts, pass each with `-p` "
29 |     "Example: `-p Basic -p Premium`",
30 | )
31 | def wenetspeech4tts(
32 |     corpus_dir: Pathlike,
33 |     output_dir: Pathlike,
34 |     dataset_parts: Sequence[str],
35 |     num_jobs: int,
36 | ):
37 |     """WenetSpeech4TTS data preparation."""
38 |     prepare_wenetspeech4tts(
39 |         corpus_dir,
40 |         output_dir=output_dir,
41 |         num_jobs=num_jobs,
42 |         dataset_parts=dataset_parts,
43 |     )
44 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/xbmu_amdo31.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.xbmu_amdo31 import download_xbmu_amdo31, prepare_xbmu_amdo31
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | __all__ = ["xbmu_amdo31"]
 8 | 
 9 | 
10 | @prepare.command(context_settings=dict(show_default=True))
11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
12 | @click.argument("output_dir", type=click.Path())
13 | def xbmu_amdo31(corpus_dir: Pathlike, output_dir: Pathlike):
14 |     """XBMU-AMDO31 ASR data preparation."""
15 |     prepare_xbmu_amdo31(corpus_dir, output_dir=output_dir)
16 | 
17 | 
18 | @download.command(context_settings=dict(show_default=True))
19 | @click.argument("target_dir", type=click.Path())
20 | def xbmu_amdo31(target_dir: Pathlike):
21 |     """XBMU-AMDO31 download."""
22 |     download_xbmu_amdo31(target_dir)
23 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/recipes/yesno.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from lhotse.bin.modes import download, prepare
 4 | from lhotse.recipes.yesno import download_yesno, prepare_yesno
 5 | from lhotse.utils import Pathlike
 6 | 
 7 | 
 8 | @download.command(context_settings=dict(show_default=True))
 9 | @click.argument("target_dir", type=click.Path())
10 | def yesno(target_dir: Pathlike):
11 |     """yes_no dataset download."""
12 |     download_yesno(target_dir)
13 | 
14 | 
15 | @prepare.command(context_settings=dict(show_default=True))
16 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
17 | @click.argument("output_dir", type=click.Path())
18 | def yesno(corpus_dir: Pathlike, output_dir: Pathlike):
19 |     """yes_no data preparation."""
20 |     prepare_yesno(corpus_dir, output_dir=output_dir)
21 | 


--------------------------------------------------------------------------------
/lhotse/bin/modes/utils.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | 
 3 | from .cli_base import cli
 4 | 
 5 | 
 6 | @cli.command()
 7 | def list_audio_backends():
 8 |     """
 9 |     List the names of all available audio backends.
10 |     """
11 |     from lhotse import available_audio_backends
12 | 
13 |     click.echo(available_audio_backends())
14 | 


--------------------------------------------------------------------------------
/lhotse/cut/text.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Any, Dict, Optional, Union
 3 | 
 4 | import numpy as np
 5 | 
 6 | from lhotse.custom import CustomFieldMixin
 7 | 
 8 | 
 9 | @dataclass
10 | class TextExample(CustomFieldMixin):
11 |     """
12 |     Represents a single text example. Useful e.g. for language modeling.
13 |     """
14 | 
15 |     text: str
16 |     tokens: Optional[np.ndarray] = None
17 |     custom: Optional[Dict[str, Any]] = None
18 | 
19 |     @property
20 |     def num_tokens(self) -> Optional[int]:
21 |         if self.tokens is None:
22 |             return None
23 |         return len(self.tokens)
24 | 
25 | 
26 | @dataclass
27 | class TextPairExample(CustomFieldMixin):
28 |     """
29 |     Represents a pair of text examples. Useful e.g. for sequence-to-sequence tasks.
30 |     """
31 | 
32 |     source: TextExample
33 |     target: TextExample
34 |     custom: Optional[Dict[str, Any]] = None
35 | 
36 |     @property
37 |     def num_tokens(self) -> Optional[int]:
38 |         return self.source.num_tokens
39 | 


--------------------------------------------------------------------------------
/lhotse/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import cut_transforms, input_strategies, sampling, signal_transforms
 2 | from .audio_tagging import AudioTaggingDataset
 3 | from .cut_transforms import *
 4 | from .dataloading import make_worker_init_fn
 5 | from .diarization import DiarizationDataset
 6 | from .input_strategies import AudioSamples, OnTheFlyFeatures, PrecomputedFeatures
 7 | from .iterable_dataset import IterableDatasetWrapper
 8 | from .sampling import *
 9 | from .signal_transforms import GlobalMVN, RandomizedSmoothing, SpecAugment
10 | from .source_separation import (
11 |     DynamicallyMixedSourceSeparationDataset,
12 |     PreMixedSourceSeparationDataset,
13 |     SourceSeparationDataset,
14 | )
15 | from .speech_recognition import K2SpeechRecognitionDataset
16 | from .speech_synthesis import SpeechSynthesisDataset
17 | from .surt import K2SurtDataset
18 | from .unsupervised import (
19 |     DynamicUnsupervisedDataset,
20 |     UnsupervisedDataset,
21 |     UnsupervisedWaveformDataset,
22 | )
23 | from .vad import VadDataset
24 | from .vis import plot_batch
25 | from .webdataset import LazyWebdatasetIterator, WebdatasetWriter, export_to_webdataset
26 | 


--------------------------------------------------------------------------------
/lhotse/dataset/cut_transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | from .concatenate import CutConcatenate, concat_cuts
 2 | from .extra_padding import ExtraPadding
 3 | from .mix import CutMix
 4 | from .perturb_speed import PerturbSpeed
 5 | from .perturb_tempo import PerturbTempo
 6 | from .perturb_volume import PerturbVolume
 7 | from .reverberate import ReverbWithImpulseResponse
 8 | 
 9 | __all__ = [
10 |     "CutConcatenate",
11 |     "CutMix",
12 |     "ExtraPadding",
13 |     "PerturbSpeed",
14 |     "PerturbTempo",
15 |     "PerturbVolume",
16 |     "ReverbWithImpulseResponse",
17 | ]
18 | 


--------------------------------------------------------------------------------
/lhotse/dataset/cut_transforms/perturb_speed.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from typing import Sequence, Union
 3 | 
 4 | from lhotse import CutSet
 5 | 
 6 | 
 7 | class PerturbSpeed:
 8 |     """
 9 |     A transform on batch of cuts (``CutSet``) that perturbs the speed of the recordings
10 |     with a given probability :attr:`p`.
11 | 
12 |     If the effect is applied, then one of the perturbation factors from the constructor's
13 |     :attr:`factors` parameter is sampled with uniform probability.
14 |     """
15 | 
16 |     def __init__(
17 |         self,
18 |         factors: Union[float, Sequence[float]],
19 |         p: float,
20 |         randgen: random.Random = None,
21 |         preserve_id: bool = False,
22 |     ) -> None:
23 |         self.factors = factors if isinstance(factors, Sequence) else [factors]
24 |         self.p = p
25 |         self.random = randgen
26 |         self.preserve_id = preserve_id
27 | 
28 |     def __call__(self, cuts: CutSet) -> CutSet:
29 |         if self.random is None:
30 |             self.random = random.Random()
31 |         return CutSet.from_cuts(
32 |             cut.perturb_speed(
33 |                 factor=self.random.choice(self.factors), affix_id=not self.preserve_id
34 |             )
35 |             if self.random.random() <= self.p
36 |             else cut
37 |             for cut in cuts
38 |         )
39 | 


--------------------------------------------------------------------------------
/lhotse/dataset/cut_transforms/perturb_tempo.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from typing import Sequence, Union
 3 | 
 4 | from lhotse import CutSet
 5 | 
 6 | 
 7 | class PerturbTempo:
 8 |     """
 9 |     A transform on batch of cuts (``CutSet``) that perturbs the tempo of the recordings
10 |     with a given probability :attr:`p`.
11 | 
12 |     If the effect is applied, then one of the perturbation factors from the constructor's
13 |     :attr:`factors` parameter is sampled with uniform probability.
14 |     """
15 | 
16 |     def __init__(
17 |         self,
18 |         factors: Union[float, Sequence[float]],
19 |         p: float,
20 |         randgen: random.Random = None,
21 |         preserve_id: bool = False,
22 |     ) -> None:
23 |         self.factors = factors if isinstance(factors, Sequence) else [factors]
24 |         self.p = p
25 |         self.random = randgen
26 |         self.preserve_id = preserve_id
27 | 
28 |     def __call__(self, cuts: CutSet) -> CutSet:
29 |         if self.random is None:
30 |             self.random = random
31 |         return CutSet.from_cuts(
32 |             cut.perturb_tempo(
33 |                 factor=self.random.choice(self.factors), affix_id=not self.preserve_id
34 |             )
35 |             if self.random.random() <= self.p
36 |             else cut
37 |             for cut in cuts
38 |         )
39 | 


--------------------------------------------------------------------------------
/lhotse/dataset/cut_transforms/perturb_volume.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from lhotse import CutSet
 4 | 
 5 | 
 6 | class PerturbVolume:
 7 |     """
 8 |     A transform on batch of cuts (``CutSet``) that perturbs the volume of the recordings
 9 |     with a given probability :attr:`p`.
10 | 
11 |     If the effect is applied, then one of the perturbation factors from the constructor's
12 |     :attr:`factors` parameter is sampled with uniform probability.
13 |     """
14 | 
15 |     def __init__(
16 |         self,
17 |         p: float,
18 |         scale_low: float = 0.125,
19 |         scale_high: float = 2.0,
20 |         randgen: random.Random = None,
21 |         preserve_id: bool = False,
22 |     ) -> None:
23 |         self.p = p
24 |         self.scale_low = scale_low
25 |         self.scale_high = scale_high
26 |         self.random = randgen
27 |         self.preserve_id = preserve_id
28 | 
29 |     def __call__(self, cuts: CutSet) -> CutSet:
30 |         if self.random is None:
31 |             self.random = random
32 |         return CutSet.from_cuts(
33 |             cut.perturb_volume(
34 |                 factor=self.random.uniform(self.scale_low, self.scale_high),
35 |                 affix_id=not self.preserve_id,
36 |             )
37 |             if self.random.random() <= self.p
38 |             else cut
39 |             for cut in cuts
40 |         )
41 | 


--------------------------------------------------------------------------------
/lhotse/dataset/cut_transforms/reverberate.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from typing import List, Optional
 3 | 
 4 | from lhotse import CutSet, RecordingSet
 5 | 
 6 | 
 7 | class ReverbWithImpulseResponse:
 8 |     """
 9 |     A transform on batch of cuts (``CutSet``) that convolves each cut with an impulse
10 |     response with some probability :attr:`p`.
11 |     The impulse response is chosen randomly from a specified CutSet of RIRs :attr:`rir_cuts`.
12 |     If no RIRs are specified, we will generate them using a fast random generator (https://arxiv.org/abs/2208.04101).
13 |     If `early_only` is set to True, convolution is performed only with the first 50ms of the impulse response.
14 |     """
15 | 
16 |     def __init__(
17 |         self,
18 |         rir_recordings: Optional[RecordingSet] = None,
19 |         p: float = 0.5,
20 |         normalize_output: bool = True,
21 |         randgen: random.Random = None,
22 |         preserve_id: bool = False,
23 |         early_only: bool = False,
24 |         rir_channels: List[int] = [0],
25 |     ) -> None:
26 |         self.rir_recordings = list(rir_recordings) if rir_recordings is not None else []
27 |         self.p = p
28 |         self.normalize_output = normalize_output
29 |         self.random = randgen
30 |         self.preserve_id = preserve_id
31 |         self.early_only = early_only
32 |         self.rir_channels = rir_channels
33 | 
34 |     def __call__(self, cuts: CutSet) -> CutSet:
35 |         if self.random is None:
36 |             self.random = random.Random()
37 |         return CutSet.from_cuts(
38 |             cut.reverb_rir(
39 |                 rir_recording=self.random.choice(self.rir_recordings)
40 |                 if self.rir_recordings
41 |                 else None,
42 |                 normalize_output=self.normalize_output,
43 |                 early_only=self.early_only,
44 |                 affix_id=not self.preserve_id,
45 |                 rir_channels=self.rir_channels,
46 |             )
47 |             if self.random.random() <= self.p
48 |             else cut
49 |             for cut in cuts
50 |         )
51 | 


--------------------------------------------------------------------------------
/lhotse/dataset/sampling/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import (
 2 |     SamplingConstraint,
 3 |     SamplingDiagnostics,
 4 |     TimeConstraint,
 5 |     TokenConstraint,
 6 | )
 7 | from .bucketing import BucketingSampler
 8 | from .cut_pairs import CutPairsSampler
 9 | from .dynamic import DynamicCutSampler
10 | from .dynamic_bucketing import DynamicBucketingSampler
11 | from .round_robin import RoundRobinSampler
12 | from .simple import SimpleCutSampler
13 | from .stateless import StatelessSampler
14 | from .utils import find_pessimistic_batches, report_padding_ratio_estimate
15 | from .weighted_simple import WeightedSimpleCutSampler
16 | from .zip import ZipSampler
17 | 
18 | __all__ = [
19 |     "TokenConstraint",
20 |     "TimeConstraint",
21 |     "SamplingDiagnostics",
22 |     "SamplingConstraint",
23 |     "BucketingSampler",
24 |     "CutPairsSampler",
25 |     "DynamicCutSampler",
26 |     "DynamicBucketingSampler",
27 |     "RoundRobinSampler",
28 |     "SimpleCutSampler",
29 |     "WeightedSimpleCutSampler",
30 |     "StatelessSampler",
31 |     "ZipSampler",
32 |     "find_pessimistic_batches",
33 |     "report_padding_ratio_estimate",
34 | ]
35 | 


--------------------------------------------------------------------------------
/lhotse/dataset/vad.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, Dict, Sequence
 2 | 
 3 | import torch
 4 | 
 5 | from lhotse import validate
 6 | from lhotse.cut import CutSet
 7 | from lhotse.dataset.input_strategies import BatchIO, PrecomputedFeatures
 8 | from lhotse.utils import ifnone
 9 | 
10 | 
11 | class VadDataset(torch.utils.data.Dataset):
12 |     """
13 |     The PyTorch Dataset for the voice activity detection task.
14 |     Each item in this dataset is a dict of:
15 | 
16 |     .. code-block::
17 | 
18 |         {
19 |             'inputs': (B x T x F) tensor
20 |             'input_lens': (B,) tensor
21 |             'is_voice': (T x 1) tensor
22 |             'cut': List[Cut]
23 |         }
24 |     """
25 | 
26 |     def __init__(
27 |         self,
28 |         input_strategy: BatchIO = PrecomputedFeatures(),
29 |         cut_transforms: Sequence[Callable[[CutSet], CutSet]] = None,
30 |         input_transforms: Sequence[Callable[[torch.Tensor], torch.Tensor]] = None,
31 |     ) -> None:
32 |         super().__init__()
33 |         self.input_strategy = input_strategy
34 |         self.cut_transforms = ifnone(cut_transforms, [])
35 |         self.input_transforms = ifnone(input_transforms, [])
36 | 
37 |     def __getitem__(self, cuts: CutSet) -> Dict[str, torch.Tensor]:
38 |         validate(cuts)
39 |         cuts = cuts.sort_by_duration()
40 |         for tfnm in self.cut_transforms:
41 |             cuts = tfnm(cuts)
42 |         inputs, input_lens = self.input_strategy(cuts)
43 |         for tfnm in self.input_transforms:
44 |             inputs = tfnm(inputs)
45 |         return {
46 |             "inputs": inputs,
47 |             "input_lens": input_lens,
48 |             "is_voice": self.input_strategy.supervision_masks(cuts),
49 |             "cut": cuts,
50 |         }
51 | 


--------------------------------------------------------------------------------
/lhotse/dataset/video.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | import torch
 4 | 
 5 | from lhotse import CutSet
 6 | from lhotse.dataset.collation import collate_video
 7 | 
 8 | 
 9 | class UnsupervisedAudioVideoDataset(torch.utils.data.Dataset):
10 |     """
11 |     A basic dataset that loads, pads, collates, and returns video and audio tensors.
12 | 
13 |     Returns:
14 | 
15 |     .. code-block::
16 | 
17 |         {
18 |             'video': (B x NumFrames x Color x Height x Width) uint8 tensor
19 |             'video_lens': (B, ) int32 tensor
20 |             'audio': (B x NumChannels x NumSamples) float32 tensor
21 |             'audio_lens': (B, ) int32 tensor
22 |             'cuts': CutSet of length B
23 |         }
24 |     """
25 | 
26 |     def __getitem__(self, cuts: CutSet) -> Dict[str, Any]:
27 |         video, video_lens, audio, audio_lens, cuts = collate_video(
28 |             cuts, fault_tolerant=True
29 |         )
30 |         return {
31 |             "cuts": cuts,
32 |             "video": video,
33 |             "video_lens": video_lens,
34 |             "audio": audio,
35 |             "audio_lens": audio_lens,
36 |         }
37 | 


--------------------------------------------------------------------------------
/lhotse/features/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import (
 2 |     FeatureExtractor,
 3 |     Features,
 4 |     FeatureSet,
 5 |     FeatureSetBuilder,
 6 |     create_default_feature_extractor,
 7 | )
 8 | from .fbank import TorchaudioFbank, TorchaudioFbankConfig
 9 | from .io import (
10 |     ChunkedLilcomHdf5Reader,
11 |     ChunkedLilcomHdf5Writer,
12 |     FeaturesReader,
13 |     FeaturesWriter,
14 |     KaldiReader,
15 |     LilcomChunkyReader,
16 |     LilcomChunkyWriter,
17 |     LilcomFilesReader,
18 |     LilcomFilesWriter,
19 |     LilcomHdf5Reader,
20 |     LilcomHdf5Writer,
21 |     LilcomURLReader,
22 |     LilcomURLWriter,
23 |     NumpyFilesReader,
24 |     NumpyFilesWriter,
25 |     NumpyHdf5Reader,
26 |     NumpyHdf5Writer,
27 |     available_storage_backends,
28 |     close_cached_file_handles,
29 | )
30 | from .kaldi.extractors import (
31 |     Fbank,
32 |     FbankConfig,
33 |     LogSpectrogram,
34 |     LogSpectrogramConfig,
35 |     Mfcc,
36 |     MfccConfig,
37 |     Spectrogram,
38 |     SpectrogramConfig,
39 | )
40 | from .kaldifeat import (
41 |     KaldifeatFbank,
42 |     KaldifeatFbankConfig,
43 |     KaldifeatMfcc,
44 |     KaldifeatMfccConfig,
45 | )
46 | from .librosa_fbank import LibrosaFbank, LibrosaFbankConfig
47 | from .mfcc import TorchaudioMfcc, TorchaudioMfccConfig
48 | from .mixer import FeatureMixer
49 | from .opensmile import OpenSmileConfig, OpenSmileExtractor
50 | from .spectrogram import TorchaudioSpectrogram, TorchaudioSpectrogramConfig
51 | from .ssl import S3PRLSSL, S3PRLSSLConfig
52 | from .whisper_fbank import WhisperFbank, WhisperFbankConfig
53 | 


--------------------------------------------------------------------------------
/lhotse/features/compression.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import lilcom
 4 | import numpy as np
 5 | 
 6 | 
 7 | def lilcom_compress_chunked(
 8 |     data: np.ndarray,
 9 |     tick_power: int = -5,
10 |     do_regression=True,
11 |     chunk_size: int = 100,
12 |     temporal_dim: int = 0,
13 | ) -> List[bytes]:
14 |     assert temporal_dim < data.ndim
15 |     num_frames = data.shape[temporal_dim]
16 |     compressed = []
17 |     for begin in range(0, num_frames, chunk_size):
18 |         compressed.append(
19 |             lilcom.compress(
20 |                 data[begin : begin + chunk_size],
21 |                 tick_power=tick_power,
22 |                 do_regression=do_regression,
23 |             )
24 |         )
25 |     return compressed
26 | 


--------------------------------------------------------------------------------
/lhotse/features/kaldi/__init__.py:
--------------------------------------------------------------------------------
 1 | from .extractors import (
 2 |     Fbank,
 3 |     FbankConfig,
 4 |     LogSpectrogram,
 5 |     LogSpectrogramConfig,
 6 |     Mfcc,
 7 |     MfccConfig,
 8 |     Spectrogram,
 9 |     SpectrogramConfig,
10 | )
11 | from .layers import Wav2FFT, Wav2LogFilterBank, Wav2LogSpec, Wav2MFCC, Wav2Spec, Wav2Win
12 | 


--------------------------------------------------------------------------------
/lhotse/features/mfcc.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import asdict, dataclass
 2 | from typing import Any, Dict
 3 | 
 4 | from lhotse.features.base import TorchaudioFeatureExtractor, register_extractor
 5 | from lhotse.utils import EPSILON, Seconds
 6 | 
 7 | 
 8 | @dataclass
 9 | class TorchaudioMfccConfig:
10 |     # Spectogram-related part
11 |     dither: float = 0.0
12 |     window_type: str = "povey"
13 |     # Note that frame_length and frame_shift will be converted to milliseconds before torchaudio/Kaldi sees them
14 |     frame_length: Seconds = 0.025
15 |     frame_shift: Seconds = 0.01
16 |     remove_dc_offset: bool = True
17 |     round_to_power_of_two: bool = True
18 |     energy_floor: float = EPSILON
19 |     min_duration: float = 0.0
20 |     preemphasis_coefficient: float = 0.97
21 |     raw_energy: bool = True
22 | 
23 |     # MFCC-related part
24 |     low_freq: float = 20.0
25 |     high_freq: float = -400.0
26 |     num_mel_bins: int = 23
27 |     use_energy: bool = False
28 |     vtln_low: float = 100.0
29 |     vtln_high: float = -500.0
30 |     vtln_warp: float = 1.0
31 |     cepstral_lifter: float = 22.0
32 |     num_ceps: int = 13
33 | 
34 |     def to_dict(self) -> Dict[str, Any]:
35 |         return asdict(self)
36 | 
37 |     @staticmethod
38 |     def from_dict(data: Dict[str, Any]) -> "TorchaudioMfccConfig":
39 |         return TorchaudioMfccConfig(**data)
40 | 
41 | 
42 | @register_extractor
43 | class TorchaudioMfcc(TorchaudioFeatureExtractor):
44 |     """MFCC feature extractor based on ``torchaudio.compliance.kaldi.mfcc`` function."""
45 | 
46 |     name = "mfcc"
47 |     config_type = TorchaudioMfccConfig
48 | 
49 |     def _feature_fn(self, *args, **kwargs):
50 |         from torchaudio.compliance.kaldi import mfcc
51 | 
52 |         return mfcc(*args, **kwargs)
53 | 
54 |     def feature_dim(self, sampling_rate: int) -> int:
55 |         return self.config.num_ceps
56 | 


--------------------------------------------------------------------------------
/lhotse/image/__init__.py:
--------------------------------------------------------------------------------
 1 | from .image import Image
 2 | from .io import (
 3 |     PillowInMemoryReader,
 4 |     PillowInMemoryWriter,
 5 |     PillowReader,
 6 |     PillowWriter,
 7 |     available_storage_backends,
 8 |     get_reader,
 9 |     get_writer,
10 |     register_reader,
11 |     register_writer,
12 | )
13 | 


--------------------------------------------------------------------------------
/lhotse/shar/__init__.py:
--------------------------------------------------------------------------------
 1 | from .readers import *
 2 | from .writers import *
 3 | 
 4 | __all__ = [
 5 |     "ArrayTarWriter",
 6 |     "AudioTarWriter",
 7 |     "JsonlShardWriter",
 8 |     "LazySharIterator",
 9 |     "SharWriter",
10 |     "TarIterator",
11 |     "TarWriter",
12 | ]
13 | 


--------------------------------------------------------------------------------
/lhotse/shar/readers/__init__.py:
--------------------------------------------------------------------------------
1 | from .lazy import LazySharIterator
2 | from .tar import TarIterator
3 | 
4 | __all__ = [
5 |     "LazySharIterator",
6 |     "TarIterator",
7 | ]
8 | 


--------------------------------------------------------------------------------
/lhotse/shar/writers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .array import ArrayTarWriter
 2 | from .audio import AudioTarWriter
 3 | from .cut import JsonlShardWriter
 4 | from .shar import SharWriter
 5 | from .tar import TarWriter
 6 | 
 7 | __all__ = [
 8 |     "ArrayTarWriter",
 9 |     "AudioTarWriter",
10 |     "JsonlShardWriter",
11 |     "SharWriter",
12 |     "TarWriter",
13 | ]
14 | 


--------------------------------------------------------------------------------
/lhotse/testing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/lhotse/testing/__init__.py


--------------------------------------------------------------------------------
/lhotse/testing/random.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | import torch
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def deterministic_rng(request):
10 |     """
11 |     Pytest fixture that ensures deterministic RNG behavior.
12 |     After the test finishes, it restores the previous RNG state.
13 | 
14 |     Example usage::
15 | 
16 |         >>> def my_test(deterministic_rng):
17 |         ...     x = torch.randn(10, 5)  # always has the same values
18 | 
19 |     You can also set random seed like this::
20 | 
21 |         >>> @pytest.mark.seed(1337)
22 |         ... def my_test(deterministic_rng):
23 |         ...     x = torch.randn(10, 5)
24 | 
25 |     .. note: Learn more about pytest fixtures setup/teardown here:
26 |         https://docs.pytest.org/en/latest/how-to/fixtures.html#teardown-cleanup-aka-fixture-finalization
27 |     """
28 | 
29 |     # The mechanism below is pytest's way of parameterizing fixtures.
30 |     # We use that to optionally sed a different random seed than the default 0.
31 |     # See: https://docs.pytest.org/en/7.1.x/how-to/fixtures.html#using-markers-to-pass-data-to-fixtures
32 |     marker = request.node.get_closest_marker("seed")
33 |     if marker is None:
34 |         # Handle missing marker in some way...
35 |         SEED = 0
36 |     else:
37 |         SEED = marker.args[0]
38 | 
39 |     torch_state = torch.get_rng_state()
40 |     np_state = np.random.get_state()
41 |     py_state = random.getstate()
42 | 
43 |     torch.manual_seed(SEED)
44 |     np.random.seed(SEED)
45 |     random.seed(SEED)
46 | 
47 |     yield SEED
48 | 
49 |     random.setstate(py_state)
50 |     np.random.set_state(np_state)
51 |     torch.set_rng_state(torch_state)
52 | 


--------------------------------------------------------------------------------
/lhotse/tools/__init__.py:
--------------------------------------------------------------------------------
1 | from .sph2pipe import install_sph2pipe
2 | 


--------------------------------------------------------------------------------
/lhotse/tools/env.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | from pathlib import Path
 5 | 
 6 | 
 7 | def default_tools_cachedir(force_mkdir: bool = False) -> Path:
 8 |     d = Path.home() / ".lhotse/tools"
 9 |     try:
10 |         d.mkdir(exist_ok=True, parents=True)
11 |     except OSError:
12 |         if force_mkdir:
13 |             raise
14 |         else:
15 |             logging.warning(
16 |                 f"We couldn't create lhotse utilities directory: {d} (not enough space/no permissions?)"
17 |             )
18 |     return d
19 | 
20 | 
21 | def add_tools_to_path():
22 |     sph2pipe_path = str(default_tools_cachedir() / "sph2pipe-2.5")
23 |     sys.path.append(sph2pipe_path)
24 |     os.environ["PATH"] += os.pathsep + sph2pipe_path  # platform-agnostic
25 | 


--------------------------------------------------------------------------------
/lhotse/tools/sph2pipe.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import subprocess
 3 | import tarfile
 4 | from pathlib import Path
 5 | from typing import Optional
 6 | 
 7 | from lhotse.tools.env import default_tools_cachedir
 8 | from lhotse.utils import Pathlike, resumable_download, safe_extract
 9 | 
10 | SPH2PIPE_URL = "https://github.com/burrmill/sph2pipe/archive/2.5.tar.gz"
11 | 
12 | 
13 | def install_sph2pipe(
14 |     where: Optional[Pathlike] = None,
15 |     download_from: str = SPH2PIPE_URL,
16 |     force: bool = False,
17 | ) -> None:
18 |     """
19 |     Install the sph2pipe program to handle sphere (.sph) audio files with
20 |     "shorten" codec compression (needed for older LDC data).
21 | 
22 |     It downloads an archive and then decompresses and compiles the contents.
23 |     """
24 |     if where is None:
25 |         where = default_tools_cachedir(force_mkdir=True)
26 |     where = Path(where)
27 |     # Download
28 |     download_and_untar_sph2pipe(where, url=download_from, force_download=force)
29 |     # Compile
30 |     subprocess.run([f'make -C {where / "sph2pipe-2.5"}'], shell=True, check=True)
31 |     logging.info("Finished installing sph2pipe.")
32 | 
33 | 
34 | def download_and_untar_sph2pipe(
35 |     target_dir: Pathlike,
36 |     url: str,
37 |     force_download: bool = False,
38 | ) -> Path:
39 |     target_dir = Path(target_dir)
40 |     sph2pipe_dir = target_dir / "sph2pipe-2.5"
41 |     if (sph2pipe_dir / "Makefile").is_file() and not force_download:
42 |         return sph2pipe_dir
43 |     target_dir.mkdir(parents=True, exist_ok=True)
44 |     tar_name = "sph2pipe-2.5.tar.gz"
45 |     tar_path = target_dir / tar_name
46 |     resumable_download(url, filename=tar_path, force_download=force_download)
47 |     with tarfile.open(tar_path) as tar:
48 |         safe_extract(tar, path=target_dir)
49 |     return sph2pipe_dir
50 | 


--------------------------------------------------------------------------------
/lhotse/workflows/__init__.py:
--------------------------------------------------------------------------------
1 | from .activity_detection import *
2 | from .dnsmos import annotate_dnsmos
3 | from .forced_alignment import align_with_torchaudio
4 | from .meeting_simulation import *
5 | from .whisper import annotate_with_whisper
6 | 


--------------------------------------------------------------------------------
/lhotse/workflows/activity_detection/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import Activity, ActivityDetector
2 | from .silero_vad import SileroVAD8k, SileroVAD16k
3 | 


--------------------------------------------------------------------------------
/lhotse/workflows/forced_alignment/__init__.py:
--------------------------------------------------------------------------------
1 | from .asr_aligner import *
2 | from .base import *
3 | from .mms_aligner import *
4 | from .workflow import *
5 | 


--------------------------------------------------------------------------------
/lhotse/workflows/meeting_simulation/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseMeetingSimulator
2 | from .conversational import ConversationalMeetingSimulator
3 | from .speaker_independent import SpeakerIndependentMeetingSimulator
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.isort]
 2 | profile = "black"
 3 | skip = ["lhotse/__init__.py"]
 4 | 
 5 | [tool.black]
 6 | force-exclude = '''
 7 | /(
 8 |     \.git
 9 |   | \.github
10 | )/
11 | '''
12 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/__init__.py


--------------------------------------------------------------------------------
/test/audio/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/audio/__init__.py


--------------------------------------------------------------------------------
/test/audio/test_resample_randomized.py:
--------------------------------------------------------------------------------
 1 | from math import isclose
 2 | 
 3 | import hypothesis.strategies as st
 4 | from hypothesis import given, settings
 5 | 
 6 | from lhotse.testing.fixtures import RandomCutTestCase
 7 | 
 8 | 
 9 | class TestResample(RandomCutTestCase):
10 |     @settings(deadline=None, print_blob=True)
11 |     @given(
12 |         st.one_of(
13 |             st.just(8000),
14 |             st.just(16000),
15 |             st.just(22050),
16 |             st.just(44100),
17 |             st.just(48000),
18 |         ),
19 |         st.one_of(
20 |             st.just(8000),
21 |             st.just(16000),
22 |             st.just(22050),
23 |             st.just(44100),
24 |             st.just(48000),
25 |         ),
26 |         st.data(),
27 |     )
28 |     def test_resample(self, source_sampling_rate, target_sampling_rate, randgen):
29 |         # Draw a number of samples between 0.9 - 1.1 times the sampling rate
30 |         num_samples = randgen.draw(
31 |             st.integers(
32 |                 round(source_sampling_rate * 0.9), round(source_sampling_rate * 1.1)
33 |             ),
34 |             label="Numbers of samples for Recordings",
35 |         )
36 |         # Generate random recording
37 |         rec = self.with_recording(
38 |             sampling_rate=source_sampling_rate, num_samples=num_samples
39 |         )
40 |         # Actual test
41 |         rec_rs = rec.resample(target_sampling_rate)
42 |         assert rec_rs.id == rec.id
43 |         # Tolerance of one sample in the resampled domain
44 |         assert isclose(rec_rs.duration, rec.duration, abs_tol=1 / target_sampling_rate)
45 |         samples = rec_rs.load_audio()
46 |         assert samples.shape[0] == rec_rs.num_channels
47 |         assert samples.shape[1] == rec_rs.num_samples
48 |         # Cleanup open file handles
49 |         self.cleanup()
50 | 


--------------------------------------------------------------------------------
/test/augmentation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/augmentation/__init__.py


--------------------------------------------------------------------------------
/test/cut/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/cut/__init__.py


--------------------------------------------------------------------------------
/test/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/dataset/__init__.py


--------------------------------------------------------------------------------
/test/dataset/sampling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/dataset/sampling/__init__.py


--------------------------------------------------------------------------------
/test/dataset/test_audio_tagging.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from lhotse.cut import CutSet
 4 | from lhotse.dataset import AudioTaggingDataset
 5 | 
 6 | 
 7 | @pytest.fixture
 8 | def dummy_cut_set():
 9 |     cuts = CutSet.from_json("test/fixtures/libri/cuts.json")
10 | 
11 |     def _add_audio_event(c):
12 |         c.supervisions[0].audio_event = "Speech; Whisper"
13 |         return c
14 | 
15 |     cuts = cuts.map(_add_audio_event)
16 |     return cuts
17 | 
18 | 
19 | def test_audio_tagging_dataset(dummy_cut_set):
20 |     dataset = AudioTaggingDataset()
21 |     out = dataset[dummy_cut_set]
22 |     supervisions = out["supervisions"]
23 |     assert "audio_event" in supervisions
24 |     print("Pass the test")
25 | 


--------------------------------------------------------------------------------
/test/dataset/test_iterable_dataset.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import torch.utils.data
 3 | 
 4 | from lhotse import CutSet
 5 | from lhotse.dataset import IterableDatasetWrapper, SimpleCutSampler
 6 | from lhotse.testing.dummies import DummyManifest
 7 | 
 8 | 
 9 | class IdentityDataset(torch.utils.data.Dataset):
10 |     def __getitem__(self, item):
11 |         return item
12 | 
13 | 
14 | @pytest.mark.parametrize("persistent_workers", [False, True])
15 | def test_iterable_dataset_wrapper(persistent_workers):
16 |     cuts = DummyManifest(CutSet, begin_id=0, end_id=10)
17 |     sampler = SimpleCutSampler(cuts, max_cuts=10, shuffle=True)  # one batch
18 |     dataset = IdentityDataset()
19 |     dloader = torch.utils.data.DataLoader(
20 |         IterableDatasetWrapper(
21 |             dataset, sampler, auto_increment_epoch=persistent_workers
22 |         ),
23 |         batch_size=None,
24 |         num_workers=1,
25 |         persistent_workers=persistent_workers,
26 |     )
27 | 
28 |     batches_per_epoch = []
29 |     for epoch in range(2):
30 |         dloader.dataset.set_epoch(epoch)
31 |         batches = list(dloader)
32 |         epoch_cuts = CutSet.from_cuts(c for b in batches for c in b)
33 |         batches_per_epoch.append(epoch_cuts)
34 | 
35 |     assert list(batches_per_epoch[0].ids) != list(batches_per_epoch[1].ids)
36 | 


--------------------------------------------------------------------------------
/test/dataset/test_speech_synthesis_dataset.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import torch
 3 | 
 4 | from lhotse import CutSet
 5 | from lhotse.dataset.signal_transforms import GlobalMVN
 6 | from lhotse.dataset.speech_synthesis import SpeechSynthesisDataset
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def cut_set():
11 |     return CutSet.from_json("test/fixtures/ljspeech/cuts.json")
12 | 
13 | 
14 | @pytest.mark.parametrize("transform", [None, GlobalMVN, [GlobalMVN]])
15 | def test_speech_synthesis_dataset(cut_set, transform):
16 |     if isinstance(transform, list):
17 |         transform = [transform[0].from_cuts(cut_set)]
18 |     elif isinstance(transform, GlobalMVN):
19 |         transform = transform(cut_set)
20 |     else:
21 |         transform = None
22 | 
23 |     dataset = SpeechSynthesisDataset(feature_transforms=transform)
24 |     example = dataset[cut_set]
25 |     assert example["audio"].shape[1] > 0
26 |     assert example["features"].shape[1] > 0
27 |     assert len(example["text"]) > 0
28 |     assert len(example["text"][0]) > 0
29 | 
30 |     assert example["audio"].ndim == 2
31 |     assert example["features"].ndim == 3
32 | 
33 |     assert isinstance(example["audio_lens"], torch.IntTensor)
34 |     assert isinstance(example["features_lens"], torch.IntTensor)
35 | 
36 |     assert example["audio_lens"].ndim == 1
37 |     assert example["features_lens"].ndim == 1
38 | 


--------------------------------------------------------------------------------
/test/dataset/test_surt_dataset.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from torch.utils.data import DataLoader
 3 | 
 4 | from lhotse.cut import CutSet
 5 | from lhotse.dataset.sampling import SimpleCutSampler
 6 | from lhotse.dataset.surt import K2SurtDataset
 7 | 
 8 | 
 9 | @pytest.fixture
10 | def cut_set():
11 |     return CutSet.from_shar(in_dir="test/fixtures/lsmix")
12 | 
13 | 
14 | @pytest.mark.parametrize("num_workers", [0, 1])
15 | @pytest.mark.parametrize("return_sources", [True, False])
16 | def test_surt_iterable_dataset(cut_set, num_workers, return_sources):
17 |     dataset = K2SurtDataset(return_sources=return_sources, return_cuts=True)
18 |     sampler = SimpleCutSampler(cut_set, shuffle=False, max_cuts=10000)
19 |     # Note: "batch_size=None" disables the automatic batching mechanism,
20 |     #       which is required when Dataset takes care of the collation itself.
21 |     dloader = DataLoader(
22 |         dataset, batch_size=None, sampler=sampler, num_workers=num_workers
23 |     )
24 |     batch = next(iter(dloader))
25 |     assert batch["inputs"].shape == (2, 2238, 80)
26 |     assert batch["input_lens"].tolist() == [2238, 985]
27 | 
28 |     assert len(batch["supervisions"][1]) == 2
29 |     assert len(batch["text"][1]) == 2
30 |     assert batch["text"][1] == [
31 |         "BY THIS MANOEUVRE WE DON'T LET ANYBODY IN THE CAR AND WE TRY AND KEEP THEM CLEAR OF THE CAR SHORT OF SHOOTING THEM THAT IS CARRIED NO OTHER MESSAGE",
32 |         "THE AMERICAN INTERPOSED BRUSQUELY BETWEEN PAROXYSMS AND THEY CAUGHT HIM AT IT EH",
33 |     ]
34 |     if return_sources:
35 |         assert len(batch["source_feats"]) == 2
36 |         assert all(
37 |             len(batch["source_feats"][i]) == len(batch["cuts"][i].supervisions)
38 |             for i in range(2)
39 |         )
40 | 


--------------------------------------------------------------------------------
/test/features/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/features/__init__.py


--------------------------------------------------------------------------------
/test/features/test_chunky_writer.py:
--------------------------------------------------------------------------------
 1 | from tempfile import NamedTemporaryFile
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | 
 6 | from lhotse import ChunkedLilcomHdf5Writer, LilcomChunkyWriter
 7 | from lhotse.features.io import get_reader
 8 | from lhotse.utils import is_module_available
 9 | 
10 | 
11 | @pytest.mark.parametrize(
12 |     ["writer_type", "ext"],
13 |     [
14 |         (LilcomChunkyWriter, ".lca"),
15 |         pytest.param(
16 |             ChunkedLilcomHdf5Writer,
17 |             ".h5",
18 |             marks=pytest.mark.skipif(
19 |                 not is_module_available("h5py"),
20 |                 reason="Requires h5py to run HDF5 tests.",
21 |             ),
22 |         ),
23 |     ],
24 | )
25 | def test_chunky_writer_left_right_offsets_equal(writer_type, ext):
26 |     # Generate small random numbers that are nicely compressed with lilcom
27 |     arr = np.log(np.random.uniform(size=(11, 80)).astype(np.float32) / 100)
28 | 
29 |     with NamedTemporaryFile(suffix=ext) as f:
30 | 
31 |         with writer_type(f.name) as writer:
32 |             key = writer.write("dummy-key", arr)
33 | 
34 |         f.flush()
35 |         reader = get_reader(writer.name)(f.name)
36 | 
37 |         # Reading full array -- works as expected
38 |         arr1 = reader.read(key)
39 |         np.testing.assert_almost_equal(arr, arr1, decimal=1)
40 | 
41 |         # Reading an empty subset should return an empty array
42 |         arr2 = reader.read(key, left_offset_frames=0, right_offset_frames=0)
43 |         assert arr2.shape == (0,)
44 | 


--------------------------------------------------------------------------------
/test/features/test_librosa_fbank.py:
--------------------------------------------------------------------------------
 1 | from math import ceil
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | 
 6 | from lhotse.features.librosa_fbank import LibrosaFbank, pad_or_truncate_features
 7 | from lhotse.utils import is_module_available
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     "feats,expected_num_frames,abs_tol",
12 |     [
13 |         (np.zeros((5, 2)), 5, 0),
14 |         (np.zeros((5, 2)), 4, 1),
15 |         (np.zeros((5, 2)), 6, 1),
16 |         (np.zeros((5, 2)), 3, 2),
17 |         (np.zeros((5, 2)), 7, 2),
18 |     ],
19 | )
20 | def test_pad_or_truncate_features_shape(feats, expected_num_frames, abs_tol):
21 |     feats_adjusted = pad_or_truncate_features(feats, expected_num_frames, abs_tol)
22 |     assert feats_adjusted.shape == (expected_num_frames, feats.shape[-1])
23 | 
24 | 
25 | @pytest.mark.parametrize(
26 |     "feats,expected_num_frames,abs_tol",
27 |     [
28 |         (np.zeros((5, 2)), 4, 0),
29 |         (np.zeros((5, 2)), 3, 1),
30 |         (np.zeros((5, 2)), 7, 1),
31 |         (np.zeros((5, 2)), 2, 2),
32 |         (np.zeros((5, 2)), 8, 2),
33 |     ],
34 | )
35 | def test_pad_or_truncate_features_fails(feats, expected_num_frames, abs_tol):
36 |     with pytest.raises(ValueError):
37 |         pad_or_truncate_features(feats, expected_num_frames, abs_tol)
38 | 
39 | 
40 | @pytest.mark.skipif(
41 |     not is_module_available("librosa"), reason="Librosa is an optional dependency."
42 | )
43 | @pytest.mark.parametrize("audio_len", [22050, 11025, 1024, 512, 24000, 16000])
44 | def test_librosa_fbank_with_different_audio_lengths(audio_len):
45 | 
46 |     extractor = LibrosaFbank()
47 | 
48 |     kernel_size = extractor.config.fft_size
49 |     stride = extractor.config.hop_size
50 |     pad = stride
51 |     expected_n_frames = ceil((audio_len - kernel_size + 2 * pad) / stride + 1)
52 | 
53 |     n_frames = len(extractor.extract(np.zeros(audio_len), 22050))
54 |     assert abs(n_frames - expected_n_frames) <= 1
55 | 


--------------------------------------------------------------------------------
/test/features/test_whisper_fbank.py:
--------------------------------------------------------------------------------
 1 | from math import ceil
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | 
 6 | from lhotse.features.whisper_fbank import WhisperFbank, WhisperFbankConfig
 7 | from lhotse.utils import is_module_available
 8 | 
 9 | 
10 | @pytest.mark.skipif(
11 |     not is_module_available("librosa"), reason="Librosa is an optional dependency."
12 | )
13 | @pytest.mark.parametrize("audio_len", [22050, 11025, 1024, 512, 24000, 16000])
14 | def test_whisper_fbank_with_different_audio_lengths(audio_len):
15 | 
16 |     extractor = WhisperFbank(WhisperFbankConfig(device="cpu"))
17 | 
18 |     kernel_size = 400
19 |     stride = extractor.hop_length
20 |     pad = stride
21 |     expected_n_frames = ceil((audio_len - kernel_size + 2 * pad) / stride + 1)
22 | 
23 |     n_frames = len(extractor.extract(np.zeros(audio_len, dtype=np.float32), 16000))
24 |     assert abs(n_frames - expected_n_frames) <= 1
25 | 


--------------------------------------------------------------------------------
/test/fixtures/ami/350b3ee0-a6fd-47ab-b921-fd298b1d53c0.llc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ami/350b3ee0-a6fd-47ab-b921-fd298b1d53c0.llc


--------------------------------------------------------------------------------
/test/fixtures/ami/ES2011a.Headset-0-40s-46s.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ami/ES2011a.Headset-0-40s-46s.wav


--------------------------------------------------------------------------------
/test/fixtures/ami/ES2011a_sups.jsonl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ami/ES2011a_sups.jsonl.gz


--------------------------------------------------------------------------------
/test/fixtures/ami/cuts.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "channel": 0,
 4 |     "duration": 6.0,
 5 |     "features": {
 6 |       "channels": 0,
 7 |       "duration": 6.0,
 8 |       "frame_shift": 0.01,
 9 |       "num_features": 23,
10 |       "num_frames": 600,
11 |       "recording_id": "ES2011a.Headset-0-40s-46s.wav",
12 |       "sampling_rate": 16000,
13 |       "start": 0.0,
14 |       "storage_path": "test/fixtures/ami",
15 |       "storage_key": "350b3ee0-a6fd-47ab-b921-fd298b1d53c0.llc",
16 |       "storage_type": "lilcom_files",
17 |       "type": "fbank"
18 |     },
19 |     "id": "a7889ee6-1703-4d0d-98b3-91f1d45a790d",
20 |     "recording": {
21 |       "duration": 6,
22 |       "id": "ES2011a.Headset-0-40s-46s.wav",
23 |       "num_samples": 96000,
24 |       "sampling_rate": 16000,
25 |       "sources": [
26 |         {
27 |           "channels": [
28 |             0
29 |           ],
30 |           "source": "test/fixtures/ami/ES2011a.Headset-0-40s-46s.wav",
31 |           "type": "file"
32 |         }
33 |       ]
34 |     },
35 |     "start": 0.0,
36 |     "supervisions": [
37 |       {
38 |         "channel": 0,
39 |         "duration": 1.36,
40 |         "id": "ES2011a.Headset-0-40s-46s-0-3",
41 |         "language": "English",
42 |         "recording_id": "ES2011a.Headset-0-40s-46s.wav",
43 |         "speaker": "ES2011a.Headset-1",
44 |         "start": 1.46,
45 |         "text": "I'M ABIGAIL CLAFLIN"
46 |       },
47 |       {
48 |         "channel": 0,
49 |         "duration": 1.0,
50 |         "id": "ES2011a.Headset-0-40s-46s-0-4",
51 |         "language": "English",
52 |         "recording_id": "ES2011a.Headset-0-40s-46s.wav",
53 |         "speaker": "ES2011a.Headset-2",
54 |         "start": 3.36,
55 |         "text": "YOU CAN CALL ME ABBIE"
56 |       }
57 |     ],
58 |     "type": "MonoCut"
59 |   }
60 | ]
61 | 


--------------------------------------------------------------------------------
/test/fixtures/audio.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "recording-1",
 4 |     "sampling_rate": 8000,
 5 |     "num_samples": 4000,
 6 |     "duration": 0.5,
 7 |     "sources": [
 8 |       {
 9 |         "type": "file",
10 |         "channels": [
11 |           0
12 |         ],
13 |         "source": "test/fixtures/mono_c0.wav"
14 |       },
15 |       {
16 |         "type": "command",
17 |         "channels": [
18 |           1
19 |         ],
20 |         "source": "cat test/fixtures/mono_c1.wav | cat"
21 |       }
22 |     ]
23 |   },
24 |   {
25 |     "id": "recording-2",
26 |     "sampling_rate": 8000,
27 |     "num_samples": 8000,
28 |     "duration": 1.0,
29 |     "sources": [
30 |       {
31 |         "type": "file",
32 |         "channels": [
33 |           0,
34 |           1
35 |         ],
36 |         "source": "test/fixtures/stereo.wav"
37 |       }
38 |     ]
39 |   },
40 |   {
41 |     "id": "recording-3",
42 |     "sampling_rate": 8000,
43 |     "num_samples": 8000,
44 |     "duration": 1.0,
45 |     "sources": [
46 |       {
47 |         "type": "file",
48 |         "channels": [
49 |           0,
50 |           1
51 |         ],
52 |         "source": "test/fixtures/stereo.sph"
53 |       }
54 |     ]
55 |   },
56 |   {
57 |     "id": "recording-4",
58 |     "sampling_rate": 8000,
59 |     "num_samples": 4444,
60 |     "duration": 0.56,
61 |     "sources": [
62 |       {
63 |         "type": "file",
64 |         "channels": [
65 |           0
66 |         ],
67 |         "source": "test/fixtures/mono_c0.wav"
68 |       },
69 |       {
70 |         "type": "command",
71 |         "channels": [
72 |           1
73 |         ],
74 |         "source": "sox test/fixtures/mono_c1.wav -t wav - speed 0.9 | cat"
75 |       }
76 |     ]
77 |   }
78 | ]
79 | 


--------------------------------------------------------------------------------
/test/fixtures/big_buck_bunny_small.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/big_buck_bunny_small.mp4


--------------------------------------------------------------------------------
/test/fixtures/common_voice_en_651325.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/common_voice_en_651325.mp3


--------------------------------------------------------------------------------
/test/fixtures/dummy_feats/feature_manifest.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "channels": 0,
 4 |     "duration": 0.5,
 5 |     "frame_shift": 0.01,
 6 |     "num_features": 23,
 7 |     "num_frames": 50,
 8 |     "recording_id": "recording-1",
 9 |     "sampling_rate": 16000,
10 |     "start": 0.0,
11 |     "storage_path": "test/fixtures/dummy_feats/storage",
12 |     "storage_key": "89739de9-308c-4487-9fa5-1c690d44e718.llc",
13 |     "storage_type": "lilcom_files",
14 |     "type": "fbank"
15 |   },
16 |   {
17 |     "channels": 1,
18 |     "duration": 0.5,
19 |     "frame_shift": 0.01,
20 |     "num_features": 23,
21 |     "num_frames": 50,
22 |     "recording_id": "recording-1",
23 |     "sampling_rate": 16000,
24 |     "start": 0.0,
25 |     "storage_path": "test/fixtures/dummy_feats/storage",
26 |     "storage_key": "25959652-8816-4810-a88a-0b022d6b9b6d.llc",
27 |     "storage_type": "lilcom_files",
28 |     "type": "fbank"
29 |   },
30 |   {
31 |     "channels": 0,
32 |     "duration": 1.0,
33 |     "frame_shift": 0.01,
34 |     "num_features": 23,
35 |     "num_frames": 100,
36 |     "recording_id": "recording-2",
37 |     "sampling_rate": 16000,
38 |     "start": 0.0,
39 |     "storage_path": "test/fixtures/dummy_feats/storage",
40 |     "storage_key": "dbf9a0ec-f79d-4eb8-ae83-143a6d5de64d.llc",
41 |     "storage_type": "lilcom_files",
42 |     "type": "fbank"
43 |   },
44 |   {
45 |     "channels": 1,
46 |     "duration": 1.0,
47 |     "frame_shift": 0.01,
48 |     "num_features": 23,
49 |     "num_frames": 100,
50 |     "recording_id": "recording-2",
51 |     "sampling_rate": 16000,
52 |     "start": 0.0,
53 |     "storage_path": "test/fixtures/dummy_feats/storage",
54 |     "storage_key": "d3466ce9-d604-48c3-8c1f-26480aaf07d1.llc",
55 |     "storage_type": "lilcom_files",
56 |     "type": "fbank"
57 |   }
58 | ]


--------------------------------------------------------------------------------
/test/fixtures/dummy_feats/storage/25959652-8816-4810-a88a-0b022d6b9b6d.llc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/dummy_feats/storage/25959652-8816-4810-a88a-0b022d6b9b6d.llc


--------------------------------------------------------------------------------
/test/fixtures/dummy_feats/storage/89739de9-308c-4487-9fa5-1c690d44e718.llc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/dummy_feats/storage/89739de9-308c-4487-9fa5-1c690d44e718.llc


--------------------------------------------------------------------------------
/test/fixtures/dummy_feats/storage/d3466ce9-d604-48c3-8c1f-26480aaf07d1.llc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/dummy_feats/storage/d3466ce9-d604-48c3-8c1f-26480aaf07d1.llc


--------------------------------------------------------------------------------
/test/fixtures/dummy_feats/storage/dbf9a0ec-f79d-4eb8-ae83-143a6d5de64d.llc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/dummy_feats/storage/dbf9a0ec-f79d-4eb8-ae83-143a6d5de64d.llc


--------------------------------------------------------------------------------
/test/fixtures/feature_config.yml:
--------------------------------------------------------------------------------
 1 | feature_extractor:
 2 |   fbank_config:
 3 |     use_log_fbank: true
 4 |   mfcc_config:
 5 |     cepstral_lifter: 22.0
 6 |     num_ceps: 13
 7 |   mfcc_fbank_common_config:
 8 |     high_freq: 0.0
 9 |     low_freq: 20.0
10 |     num_mel_bins: 23
11 |     use_energy: false
12 |     vtln_high: -500.0
13 |     vtln_low: 100.0
14 |     vtln_warp: 1.0
15 |   spectrogram_config:
16 |     dither: 0.0
17 |     energy_floor: 0.0
18 |     frame_length: 25.0
19 |     frame_shift: 10.0
20 |     min_duration: 0.0
21 |     preemphasis_coefficient: 0.97
22 |     raw_energy: true
23 |     remove_dc_offset: true
24 |     round_to_power_of_two: true
25 |     snip_edges: false
26 |     window_type: povey
27 |   type: mfcc
28 | 


--------------------------------------------------------------------------------
/test/fixtures/libri/audio.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "recording-1",
 4 |     "sampling_rate": 16000,
 5 |     "num_samples": 256640,
 6 |     "duration": 16.04,
 7 |     "sources": [
 8 |       {
 9 |         "type": "file",
10 |         "channels": [
11 |           0
12 |         ],
13 |         "source": "test/fixtures/libri/libri-1088-134315-0000.wav"
14 |       }
15 |     ]
16 |   }
17 | ]


--------------------------------------------------------------------------------
/test/fixtures/libri/cuts.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "e3e70682-c209-4cac-629f-6fbed82c07cd",
 4 |     "start": 0.0,
 5 |     "duration": 10.0,
 6 |     "channel": 0,
 7 |     "supervisions": [
 8 |       {
 9 |         "id": "sup-1",
10 |         "recording_id": "recording-1",
11 |         "start": 0,
12 |         "duration": 10.0,
13 |         "channel": 0,
14 |         "text": "EXAMPLE OF TEXT",
15 |         "speaker": "libri-spk1"
16 |       }
17 |     ],
18 |     "features": {
19 |       "type": "fbank",
20 |       "num_frames": 1604,
21 |       "num_features": 40,
22 |       "frame_shift": 0.01,
23 |       "sampling_rate": 16000,
24 |       "start": 0,
25 |       "duration": 16.04,
26 |       "storage_type": "lilcom_files",
27 |       "storage_path": "test/fixtures/libri/storage",
28 |       "storage_key": "30c2440c-93cb-4e83-b382-f2a59b3859b4.llc",
29 |       "recording_id": "recording-1",
30 |       "channels": 0
31 |     },
32 |     "recording": {
33 |       "id": "recording-1",
34 |       "sources": [
35 |         {
36 |           "type": "file",
37 |           "channels": [
38 |             0
39 |           ],
40 |           "source": "test/fixtures/libri/libri-1088-134315-0000.wav"
41 |         }
42 |       ],
43 |       "sampling_rate": 16000,
44 |       "num_samples": 256640,
45 |       "duration": 16.04
46 |     },
47 |     "type": "MonoCut"
48 |   }
49 | ]
50 | 


--------------------------------------------------------------------------------
/test/fixtures/libri/cuts_multi.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "e3e70682-c209-4cac-629f-7gcfe93d18de",
 4 |     "start": 0.0,
 5 |     "duration": 10.0,
 6 |     "channel": 0,
 7 |     "supervisions": [
 8 |       {
 9 |         "id": "sup-1",
10 |         "recording_id": "recording-1",
11 |         "start": 0,
12 |         "duration": 10.0,
13 |         "channel": 0,
14 |         "text": "EXAMPLE OF TEXT"
15 |       }
16 |     ],
17 |     "recording": {
18 |       "id": "recording-1",
19 |       "sources": [
20 |         {
21 |           "type": "file",
22 |           "channels": [
23 |             0
24 |           ],
25 |           "source": "test/fixtures/libri/libri-1088-134315-0000_8ch.wav"
26 |         }
27 |       ],
28 |       "sampling_rate": 16000,
29 |       "num_samples": 256640,
30 |       "duration": 16.04
31 |     },
32 |     "type": "MultiCut"
33 |   }
34 | ]
35 | 


--------------------------------------------------------------------------------
/test/fixtures/libri/cuts_no_feats.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "e3e70682-c209-4cac-629f-6fbed82c07cd",
 4 |     "start": 0.0,
 5 |     "duration": 10.0,
 6 |     "channel": 0,
 7 |     "supervisions": [],
 8 |     "recording": {
 9 |       "id": "recording-1",
10 |       "sources": [
11 |         {
12 |           "type": "file",
13 |           "channels": [
14 |             0
15 |           ],
16 |           "source": "test/fixtures/libri/libri-1088-134315-0000.wav"
17 |         }
18 |       ],
19 |       "sampling_rate": 16000,
20 |       "num_samples": 256640,
21 |       "duration": 16.04
22 |     },
23 |     "type": "Cut"
24 |   }
25 | ]


--------------------------------------------------------------------------------
/test/fixtures/libri/cuts_no_recording.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "e3e70682-c209-4cac-629f-6fbed82c07cd",
 4 |     "start": 0.0,
 5 |     "duration": 10.0,
 6 |     "channel": 0,
 7 |     "supervisions": [],
 8 |     "features": {
 9 |       "type": "fbank",
10 |       "num_frames": 1604,
11 |       "num_features": 40,
12 |       "frame_shift": 0.01,
13 |       "sampling_rate": 16000,
14 |       "start": 0,
15 |       "duration": 16.04,
16 |       "storage_type": "lilcom_files",
17 |       "storage_path": "test/fixtures/libri/storage",
18 |       "storage_key": "30c2440c-93cb-4e83-b382-f2a59b3859b4.llc",
19 |       "recording_id": "recording-1",
20 |       "channels": 0
21 |     },
22 |     "type": "Cut"
23 |   }
24 | ]


--------------------------------------------------------------------------------
/test/fixtures/libri/feature_manifest.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/libri/feature_manifest.json.gz


--------------------------------------------------------------------------------
/test/fixtures/libri/libri-1088-134315-0000.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/libri/libri-1088-134315-0000.wav


--------------------------------------------------------------------------------
/test/fixtures/libri/libri-1088-134315-0000_8ch.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/libri/libri-1088-134315-0000_8ch.wav


--------------------------------------------------------------------------------
/test/fixtures/libri/libri-1088-134315-0000_rvb.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/libri/libri-1088-134315-0000_rvb.wav


--------------------------------------------------------------------------------
/test/fixtures/libri/recreate.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | dir=test/fixtures/libri
 4 | if [ ! -f $dir/audio.json ]; then
 5 |   echo "Expected to run this script in the main Lhotse repo directory."
 6 |   exit 1
 7 | fi
 8 | 
 9 | rm $dir/cuts*
10 | rm $dir/feature_manifest.json.gz
11 | rm -rf $dir/storage
12 | 
13 | lhotse feat extract $dir/audio.json $dir
14 | # Create three variants of cut manifests.
15 | # Seed 0 ensures the RNG always picks the same ID for the cuts.
16 | lhotse --seed 0 cut simple -r $dir/audio.json -f $dir/feature_manifest.json.gz $dir/cuts.json
17 | lhotse --seed 0 cut simple -r $dir/audio.json $dir/cuts_no_feats.json
18 | lhotse --seed 0 cut simple -f $dir/feature_manifest.json.gz $dir/cuts_no_recording.json
19 | 
20 | for f in $dir/cuts*; do
21 |   lhotse cut truncate -d 10.0 --preserve-id $f $f
22 | done
23 | 


--------------------------------------------------------------------------------
/test/fixtures/libri/storage/30c2440c-93cb-4e83-b382-f2a59b3859b4.llc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/libri/storage/30c2440c-93cb-4e83-b382-f2a59b3859b4.llc


--------------------------------------------------------------------------------
/test/fixtures/ljspeech/feats/5bb/5bb52a3d-aaf6-42ff-8891-2be7852a4858.llc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ljspeech/feats/5bb/5bb52a3d-aaf6-42ff-8891-2be7852a4858.llc


--------------------------------------------------------------------------------
/test/fixtures/ljspeech/feats/d39/d39cf273-a42d-433a-a63c-ba6357f1669e.llc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ljspeech/feats/d39/d39cf273-a42d-433a-a63c-ba6357f1669e.llc


--------------------------------------------------------------------------------
/test/fixtures/ljspeech/storage/LJ002-0020.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ljspeech/storage/LJ002-0020.wav


--------------------------------------------------------------------------------
/test/fixtures/ljspeech/storage/LJ002-0035.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ljspeech/storage/LJ002-0035.wav


--------------------------------------------------------------------------------
/test/fixtures/lsmix/cuts.000000.jsonl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/lsmix/cuts.000000.jsonl.gz


--------------------------------------------------------------------------------
/test/fixtures/lsmix/features.000000.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/lsmix/features.000000.tar


--------------------------------------------------------------------------------
/test/fixtures/lsmix/source_feats.000000.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/lsmix/source_feats.000000.tar


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false   # only non-default option.
2 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech/lhotse-b/recordings.jsonl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech/lhotse-b/recordings.jsonl.gz


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech/lhotse-b/supervisions.jsonl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech/lhotse-b/supervisions.jsonl.gz


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech/lhotse/recordings.jsonl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech/lhotse/recordings.jsonl.gz


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech/lhotse/supervisions.jsonl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech/lhotse/supervisions.jsonl.gz


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech/reco2dur:
--------------------------------------------------------------------------------
 1 | lbi-1272-135031-0000 10.885
 2 | lbi-1272-141231-0000 4.650
 3 | lbi-1462-170142-0000 4.715
 4 | lbi-1462-170145-0000 15.404999732971191
 5 | lbi-174-168635-0000 4.53000020980835
 6 | lbi-1988-147956-0000 14.949999809265137
 7 | lbi-1988-24833-0000 3.319999933242798
 8 | lbi-1993-147964-0000 8.420000076293945
 9 | lbi-2035-147960-0000 9.020000457763672
10 | lbi-2035-147961-0000 15.069999694824219
11 | lbi-2035-152373-0000 7.875
12 | lbi-2412-153948-0000 11.65999984741211
13 | lbi-2428-83699-0000 13.305000305175781
14 | lbi-251-118436-0000 6.260000228881836
15 | lbi-251-136532-0000 9.8100004196167
16 | lbi-2803-154320-0000 11.479999542236328
17 | lbi-2803-161169-0000 11.699999809265137
18 | lbi-3000-15664-0000 3.130000114440918
19 | lbi-3536-23268-0000 19.889999389648438
20 | lbi-3576-138058-0000 14.140000343322754
21 | lbi-3752-4944-0000 3.3350000381469727
22 | lbi-5338-24640-0000 3.450000047683716
23 | lbi-5338-284437-0000 4.550000190734863
24 | lbi-5694-64038-0000 2.5950000286102295
25 | lbi-5895-34615-0000 3.3350000381469727
26 | lbi-5895-34622-0000 3.369999885559082
27 | lbi-5895-34629-0000 2.259999990463257
28 | lbi-6241-61943-0000 6.949999809265137
29 | lbi-6241-61946-0000 6.235000133514404
30 | lbi-6295-244435-0000 3.1050000190734863
31 | lbi-6319-57405-0000 7.295000076293945
32 | lbi-777-126732-0000 2.740000009536743
33 | lbi-7850-281318-0000 4.175000190734863
34 | lbi-7850-286674-0000 8.454999923706055
35 | lbi-7976-110523-0000 15.220000267028809
36 | lbi-8297-275156-0000 3.5799999237060547
37 | lbi-84-121550-0000 8.4350004196167
38 | lbi-8842-304647-0000 9.710000038146973
39 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech/segments:
--------------------------------------------------------------------------------
 1 | lbi-1272-135031-0000 lbi-1272-135031-0000 0 10.885
 2 | lbi-1272-141231-0000 lbi-1272-141231-0000 0 4.65
 3 | lbi-1462-170142-0000 lbi-1462-170142-0000 0 4.715
 4 | lbi-1462-170145-0000 lbi-1462-170145-0000 0 15.405
 5 | lbi-174-168635-0000 lbi-174-168635-0000 0 4.53
 6 | lbi-1988-147956-0000 lbi-1988-147956-0000 0 14.95
 7 | lbi-1988-24833-0000 lbi-1988-24833-0000 0 3.32
 8 | lbi-1993-147964-0000 lbi-1993-147964-0000 0 8.42
 9 | lbi-2035-147960-0000 lbi-2035-147960-0000 0 9.02
10 | lbi-2035-147961-0000 lbi-2035-147961-0000 0 15.07
11 | lbi-2035-152373-0000 lbi-2035-152373-0000 0 7.875
12 | lbi-2412-153948-0000 lbi-2412-153948-0000 0 11.66
13 | lbi-2428-83699-0000 lbi-2428-83699-0000 0 13.305
14 | lbi-251-118436-0000 lbi-251-118436-0000 0 6.26
15 | lbi-251-136532-0000 lbi-251-136532-0000 0 9.81
16 | lbi-2803-154320-0000 lbi-2803-154320-0000 0 11.48
17 | lbi-2803-161169-0000 lbi-2803-161169-0000 0 11.7
18 | lbi-3000-15664-0000 lbi-3000-15664-0000 0 3.13
19 | lbi-3536-23268-0000 lbi-3536-23268-0000 0 19.89
20 | lbi-3576-138058-0000 lbi-3576-138058-0000 0 14.14
21 | lbi-3752-4944-0000 lbi-3752-4944-0000 0 3.335
22 | lbi-5338-24640-0000 lbi-5338-24640-0000 0 3.45
23 | lbi-5338-284437-0000 lbi-5338-284437-0000 0 4.55
24 | lbi-5694-64038-0000 lbi-5694-64038-0000 0 2.595
25 | lbi-5895-34615-0000 lbi-5895-34615-0000 0 3.335
26 | lbi-5895-34622-0000 lbi-5895-34622-0000 0 3.37
27 | lbi-5895-34629-0000 lbi-5895-34629-0000 0 2.26
28 | lbi-6241-61943-0000 lbi-6241-61943-0000 0 6.95
29 | lbi-6241-61946-0000 lbi-6241-61946-0000 0 6.235
30 | lbi-6295-244435-0000 lbi-6295-244435-0000 0 3.105
31 | lbi-6319-57405-0000 lbi-6319-57405-0000 0 7.295
32 | lbi-777-126732-0000 lbi-777-126732-0000 0 2.74
33 | lbi-7850-281318-0000 lbi-7850-281318-0000 0 4.175
34 | lbi-7850-286674-0000 lbi-7850-286674-0000 0 8.455
35 | lbi-7976-110523-0000 lbi-7976-110523-0000 0 15.22
36 | lbi-8297-275156-0000 lbi-8297-275156-0000 0 3.58
37 | lbi-84-121550-0000 lbi-84-121550-0000 0 8.435
38 | lbi-8842-304647-0000 lbi-8842-304647-0000 0 9.71
39 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech/spk2gender:
--------------------------------------------------------------------------------
 1 | lbi-1272-135031 m
 2 | lbi-1272-141231 m
 3 | lbi-1462-170142 f
 4 | lbi-1462-170145 f
 5 | lbi-174-168635 m
 6 | lbi-1988-147956 f
 7 | lbi-1988-24833 f
 8 | lbi-1993-147964 f
 9 | lbi-2035-147960 f
10 | lbi-2035-147961 f
11 | lbi-2035-152373 f
12 | lbi-2412-153948 f
13 | lbi-2428-83699 m
14 | lbi-251-118436 m
15 | lbi-251-136532 m
16 | lbi-2803-154320 m
17 | lbi-2803-161169 m
18 | lbi-3000-15664 m
19 | lbi-3536-23268 f
20 | lbi-3576-138058 f
21 | lbi-3752-4944 m
22 | lbi-5338-24640 f
23 | lbi-5338-284437 f
24 | lbi-5694-64038 m
25 | lbi-5895-34615 f
26 | lbi-5895-34622 f
27 | lbi-5895-34629 f
28 | lbi-6241-61943 m
29 | lbi-6241-61946 m
30 | lbi-6295-244435 m
31 | lbi-6319-57405 f
32 | lbi-777-126732 m
33 | lbi-7850-281318 f
34 | lbi-7850-286674 f
35 | lbi-7976-110523 m
36 | lbi-8297-275156 m
37 | lbi-84-121550 f
38 | lbi-8842-304647 f
39 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech/spk2utt:
--------------------------------------------------------------------------------
 1 | lbi-1272-135031 lbi-1272-135031-0000
 2 | lbi-1272-141231 lbi-1272-141231-0000
 3 | lbi-1462-170142 lbi-1462-170142-0000
 4 | lbi-1462-170145 lbi-1462-170145-0000
 5 | lbi-174-168635 lbi-174-168635-0000
 6 | lbi-1988-147956 lbi-1988-147956-0000
 7 | lbi-1988-24833 lbi-1988-24833-0000
 8 | lbi-1993-147964 lbi-1993-147964-0000
 9 | lbi-2035-147960 lbi-2035-147960-0000
10 | lbi-2035-147961 lbi-2035-147961-0000
11 | lbi-2035-152373 lbi-2035-152373-0000
12 | lbi-2412-153948 lbi-2412-153948-0000
13 | lbi-2428-83699 lbi-2428-83699-0000
14 | lbi-251-118436 lbi-251-118436-0000
15 | lbi-251-136532 lbi-251-136532-0000
16 | lbi-2803-154320 lbi-2803-154320-0000
17 | lbi-2803-161169 lbi-2803-161169-0000
18 | lbi-3000-15664 lbi-3000-15664-0000
19 | lbi-3536-23268 lbi-3536-23268-0000
20 | lbi-3576-138058 lbi-3576-138058-0000
21 | lbi-3752-4944 lbi-3752-4944-0000
22 | lbi-5338-24640 lbi-5338-24640-0000
23 | lbi-5338-284437 lbi-5338-284437-0000
24 | lbi-5694-64038 lbi-5694-64038-0000
25 | lbi-5895-34615 lbi-5895-34615-0000
26 | lbi-5895-34622 lbi-5895-34622-0000
27 | lbi-5895-34629 lbi-5895-34629-0000
28 | lbi-6241-61943 lbi-6241-61943-0000
29 | lbi-6241-61946 lbi-6241-61946-0000
30 | lbi-6295-244435 lbi-6295-244435-0000
31 | lbi-6319-57405 lbi-6319-57405-0000
32 | lbi-777-126732 lbi-777-126732-0000
33 | lbi-7850-281318 lbi-7850-281318-0000
34 | lbi-7850-286674 lbi-7850-286674-0000
35 | lbi-7976-110523 lbi-7976-110523-0000
36 | lbi-8297-275156 lbi-8297-275156-0000
37 | lbi-84-121550 lbi-84-121550-0000
38 | lbi-8842-304647 lbi-8842-304647-0000
39 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech/utt2dur:
--------------------------------------------------------------------------------
 1 | lbi-1272-135031-0000 10.885 
 2 | lbi-1272-141231-0000 4.65 
 3 | lbi-1462-170142-0000 4.715 
 4 | lbi-1462-170145-0000 15.405 
 5 | lbi-174-168635-0000 4.53 
 6 | lbi-1988-147956-0000 14.95 
 7 | lbi-1988-24833-0000 3.32 
 8 | lbi-1993-147964-0000 8.42 
 9 | lbi-2035-147960-0000 9.02 
10 | lbi-2035-147961-0000 15.07 
11 | lbi-2035-152373-0000 7.875 
12 | lbi-2412-153948-0000 11.66 
13 | lbi-2428-83699-0000 13.305 
14 | lbi-251-118436-0000 6.26 
15 | lbi-251-136532-0000 9.81 
16 | lbi-2803-154320-0000 11.48 
17 | lbi-2803-161169-0000 11.7 
18 | lbi-3000-15664-0000 3.13 
19 | lbi-3536-23268-0000 19.89 
20 | lbi-3576-138058-0000 14.14 
21 | lbi-3752-4944-0000 3.335 
22 | lbi-5338-24640-0000 3.45 
23 | lbi-5338-284437-0000 4.55 
24 | lbi-5694-64038-0000 2.595 
25 | lbi-5895-34615-0000 3.335 
26 | lbi-5895-34622-0000 3.37 
27 | lbi-5895-34629-0000 2.26 
28 | lbi-6241-61943-0000 6.95 
29 | lbi-6241-61946-0000 6.235 
30 | lbi-6295-244435-0000 3.105 
31 | lbi-6319-57405-0000 7.295 
32 | lbi-777-126732-0000 2.74 
33 | lbi-7850-281318-0000 4.175 
34 | lbi-7850-286674-0000 8.455 
35 | lbi-7976-110523-0000 15.22 
36 | lbi-8297-275156-0000 3.58 
37 | lbi-84-121550-0000 8.435 
38 | lbi-8842-304647-0000 9.71 
39 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech/utt2num_frames:
--------------------------------------------------------------------------------
 1 | lbi-1272-135031-0000 1087 
 2 | lbi-1272-141231-0000 463 
 3 | lbi-1462-170142-0000 470 
 4 | lbi-1462-170145-0000 1539 
 5 | lbi-174-168635-0000 451 
 6 | lbi-1988-147956-0000 1493 
 7 | lbi-1988-24833-0000 330 
 8 | lbi-1993-147964-0000 840 
 9 | lbi-2035-147960-0000 900 
10 | lbi-2035-147961-0000 1505 
11 | lbi-2035-152373-0000 786 
12 | lbi-2412-153948-0000 1164 
13 | lbi-2428-83699-0000 1329 
14 | lbi-251-118436-0000 624 
15 | lbi-251-136532-0000 979 
16 | lbi-2803-154320-0000 1146 
17 | lbi-2803-161169-0000 1168 
18 | lbi-3000-15664-0000 311 
19 | lbi-3536-23268-0000 1987 
20 | lbi-3576-138058-0000 1412 
21 | lbi-3752-4944-0000 332 
22 | lbi-5338-24640-0000 343 
23 | lbi-5338-284437-0000 453 
24 | lbi-5694-64038-0000 258 
25 | lbi-5895-34615-0000 332 
26 | lbi-5895-34622-0000 335 
27 | lbi-5895-34629-0000 224 
28 | lbi-6241-61943-0000 693 
29 | lbi-6241-61946-0000 622 
30 | lbi-6295-244435-0000 309 
31 | lbi-6319-57405-0000 728 
32 | lbi-777-126732-0000 272 
33 | lbi-7850-281318-0000 416 
34 | lbi-7850-286674-0000 844 
35 | lbi-7976-110523-0000 1520 
36 | lbi-8297-275156-0000 356 
37 | lbi-84-121550-0000 842 
38 | lbi-8842-304647-0000 969 
39 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech/utt2spk:
--------------------------------------------------------------------------------
 1 | lbi-1272-135031-0000 lbi-1272-135031
 2 | lbi-1272-141231-0000 lbi-1272-141231
 3 | lbi-1462-170142-0000 lbi-1462-170142
 4 | lbi-1462-170145-0000 lbi-1462-170145
 5 | lbi-174-168635-0000 lbi-174-168635
 6 | lbi-1988-147956-0000 lbi-1988-147956
 7 | lbi-1988-24833-0000 lbi-1988-24833
 8 | lbi-1993-147964-0000 lbi-1993-147964
 9 | lbi-2035-147960-0000 lbi-2035-147960
10 | lbi-2035-147961-0000 lbi-2035-147961
11 | lbi-2035-152373-0000 lbi-2035-152373
12 | lbi-2412-153948-0000 lbi-2412-153948
13 | lbi-2428-83699-0000 lbi-2428-83699
14 | lbi-251-118436-0000 lbi-251-118436
15 | lbi-251-136532-0000 lbi-251-136532
16 | lbi-2803-154320-0000 lbi-2803-154320
17 | lbi-2803-161169-0000 lbi-2803-161169
18 | lbi-3000-15664-0000 lbi-3000-15664
19 | lbi-3536-23268-0000 lbi-3536-23268
20 | lbi-3576-138058-0000 lbi-3576-138058
21 | lbi-3752-4944-0000 lbi-3752-4944
22 | lbi-5338-24640-0000 lbi-5338-24640
23 | lbi-5338-284437-0000 lbi-5338-284437
24 | lbi-5694-64038-0000 lbi-5694-64038
25 | lbi-5895-34615-0000 lbi-5895-34615
26 | lbi-5895-34622-0000 lbi-5895-34622
27 | lbi-5895-34629-0000 lbi-5895-34629
28 | lbi-6241-61943-0000 lbi-6241-61943
29 | lbi-6241-61946-0000 lbi-6241-61946
30 | lbi-6295-244435-0000 lbi-6295-244435
31 | lbi-6319-57405-0000 lbi-6319-57405
32 | lbi-777-126732-0000 lbi-777-126732
33 | lbi-7850-281318-0000 lbi-7850-281318
34 | lbi-7850-286674-0000 lbi-7850-286674
35 | lbi-7976-110523-0000 lbi-7976-110523
36 | lbi-8297-275156-0000 lbi-8297-275156
37 | lbi-84-121550-0000 lbi-84-121550
38 | lbi-8842-304647-0000 lbi-8842-304647
39 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false   # only non-default option.
2 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/data/raw_mfcc_mini_librispeech2.1.ark:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech2/data/raw_mfcc_mini_librispeech2.1.ark


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/data/raw_mfcc_mini_librispeech2.1.scp:
--------------------------------------------------------------------------------
1 | lbi-3536-23268-0000 data/raw_mfcc_mini_librispeech2.1.ark:20
2 | lbi-6241-61943-0000 data/raw_mfcc_mini_librispeech2.1.ark:25996
3 | lbi-8842-304647-0000 data/raw_mfcc_mini_librispeech2.1.ark:35151
4 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/feats.scp:
--------------------------------------------------------------------------------
1 | lbi-3536-23268-0000 data/raw_mfcc_mini_librispeech2.1.ark:20
2 | lbi-6241-61943-0000 data/raw_mfcc_mini_librispeech2.1.ark:25996
3 | lbi-8842-304647-0000 data/raw_mfcc_mini_librispeech2.1.ark:35151
4 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/frame_shift:
--------------------------------------------------------------------------------
1 | 0.01
2 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/lhotse/features.jsonl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech2/lhotse/features.jsonl.gz


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/lhotse/recordings.jsonl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech2/lhotse/recordings.jsonl.gz


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/lhotse/supervisions.jsonl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech2/lhotse/supervisions.jsonl.gz


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/reco2dur:
--------------------------------------------------------------------------------
1 | lbi-3536-23268-0000 19.889999389648438
2 | lbi-6241-61943-0000 6.949999809265137
3 | lbi-8842-304647-0000 9.710000038146973
4 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/segments:
--------------------------------------------------------------------------------
1 | lbi-3536-23268-0000 lbi-3536-23268-0000 1.0 20.89
2 | lbi-6241-61943-0000 lbi-6241-61943-0000 0 6.95
3 | lbi-8842-304647-0000 lbi-8842-304647-0000 0 9.71
4 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/spk2gender:
--------------------------------------------------------------------------------
1 | lbi-3536-23268 f
2 | lbi-6241-61943 m
3 | lbi-8842-304647 f
4 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/spk2utt:
--------------------------------------------------------------------------------
1 | lbi-3536-23268 lbi-3536-23268-0000
2 | lbi-6241-61943 lbi-6241-61943-0000
3 | lbi-8842-304647 lbi-8842-304647-0000
4 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/text:
--------------------------------------------------------------------------------
1 | lbi-3536-23268-0000 SIR EDWARD NOT WHOLLY DISCOURAGED BY THE DENIAL WITH WHICH DORRIFORTH HAD WITH DELICACY ACQUAINTED HIM STILL HOPED FOR A KIND RECEPTION AND WAS SO OFTEN AT THE HOUSE OF MISSUS HORTON THAT LORD FREDERICK'S JEALOUSY WAS EXCITED AND THE TORTURES HE SUFFERED IN CONSEQUENCE CONVINCED HIM BEYOND A DOUBT OF THE SINCERITY OF HIS AFFECTION
2 | lbi-6241-61943-0000 ON THE SECOND OF THE MONTH AT TWO IN THE MORNING OUR PRECIOUS CARGO OF LUGGAGE WAS TAKEN ON BOARD THE GOOD SHIP VALKYRIE
3 | lbi-8842-304647-0000 HE LIVES THY LOSS HE DIES FROM EVERY LIMB MANGLED BY THEE LIGHTNINGS OF GODHEAD SHINE FROM WHICH THY DARKNESS HATH NOT WHERE TO HIDE
4 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/utt2dur:
--------------------------------------------------------------------------------
1 | lbi-3536-23268-0000 19.89
2 | lbi-6241-61943-0000 6.95
3 | lbi-8842-304647-0000 9.71
4 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/utt2num_frames:
--------------------------------------------------------------------------------
1 | lbi-3536-23268-0000 1987
2 | lbi-6241-61943-0000 693
3 | lbi-8842-304647-0000 969
4 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/utt2spk:
--------------------------------------------------------------------------------
1 | lbi-3536-23268-0000 lbi-3536-23268
2 | lbi-6241-61943-0000 lbi-6241-61943
3 | lbi-8842-304647-0000 lbi-8842-304647
4 | 


--------------------------------------------------------------------------------
/test/fixtures/mini_librispeech2/wav.scp:
--------------------------------------------------------------------------------
1 | lbi-3536-23268-0000   sox  -r  16000  -b16  -c  1  --null  -t  wav  -  synth  318240s  sine  300-3300  |
2 | lbi-6241-61943-0000   sox  -r  16000  -b16  -c  1  --null  -t  wav  -  synth  111200s  sine  300-3300  |
3 | lbi-8842-304647-0000  sox  -r  16000  -b16  -c  1  --null  -t  wav  -  synth  155360s  sine  300-3300  |
4 | 


--------------------------------------------------------------------------------
/test/fixtures/mix_cut_test/audio/storage/2412-153948-0000.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mix_cut_test/audio/storage/2412-153948-0000.flac


--------------------------------------------------------------------------------
/test/fixtures/mix_cut_test/audio/storage/2412-153948-0001.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mix_cut_test/audio/storage/2412-153948-0001.flac


--------------------------------------------------------------------------------
/test/fixtures/mix_cut_test/feats/storage/5078e7eb-57a6-4000-b0f2-fa4bf9c52090.llc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mix_cut_test/feats/storage/5078e7eb-57a6-4000-b0f2-fa4bf9c52090.llc


--------------------------------------------------------------------------------
/test/fixtures/mix_cut_test/feats/storage/9dc645db-cbe4-4529-85e4-b6ed4f59c340.llc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mix_cut_test/feats/storage/9dc645db-cbe4-4529-85e4-b6ed4f59c340.llc


--------------------------------------------------------------------------------
/test/fixtures/mono_c0.opus:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mono_c0.opus


--------------------------------------------------------------------------------
/test/fixtures/mono_c0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mono_c0.wav


--------------------------------------------------------------------------------
/test/fixtures/mono_c1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mono_c1.wav


--------------------------------------------------------------------------------
/test/fixtures/rir/real_8ch.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/rir/real_8ch.wav


--------------------------------------------------------------------------------
/test/fixtures/rir/sim_1ch.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/rir/sim_1ch.wav


--------------------------------------------------------------------------------
/test/fixtures/stereo.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/stereo.mp3


--------------------------------------------------------------------------------
/test/fixtures/stereo.opus:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/stereo.opus


--------------------------------------------------------------------------------
/test/fixtures/stereo.sph:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/stereo.sph


--------------------------------------------------------------------------------
/test/fixtures/stereo.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/stereo.wav


--------------------------------------------------------------------------------
/test/fixtures/supervision.ctm:
--------------------------------------------------------------------------------
1 | recording-1 0 0.10 0.08 transcript
2 | recording-1 0 0.18 0.02 of
3 | recording-1 0 0.20 0.03 the
4 | recording-1 0 0.23 0.07 first
5 | recording-1 0 0.30 0.10 segment
6 | 


--------------------------------------------------------------------------------
/test/fixtures/supervision.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "segment-1",
 4 |     "recording_id": "recording-1",
 5 |     "channel": 0,
 6 |     "start": 0.1,
 7 |     "duration": 0.3,
 8 |     "text": "transcript of the first segment",
 9 |     "language": "english",
10 |     "speaker": "Norman Dyhrentfurth"
11 |   },
12 |   {
13 |     "id": "segment-2",
14 |     "recording_id": "recording-1",
15 |     "start": 0.5,
16 |     "duration": 0.4
17 |   },
18 |   {
19 |     "id": "segment-3",
20 |     "recording_id": "recording-2",
21 |     "start": 0.1,
22 |     "duration": 0.2
23 |   },
24 |   {
25 |     "id": "segment-4",
26 |     "recording_id": "recording-2",
27 |     "start": 0.3,
28 |     "duration": 0.2
29 |   }
30 | ]


--------------------------------------------------------------------------------
/test/fixtures/supervision_with_scores.ctm:
--------------------------------------------------------------------------------
1 | recording-1 0 0.10 0.08 transcript 0.9
2 | recording-1 0 0.18 0.02 of 0.8
3 | recording-1 0 0.20 0.03 the 0.85
4 | recording-1 0 0.23 0.07 first 0.7
5 | recording-1 0 0.30 0.10 segment 0.98
6 | 


--------------------------------------------------------------------------------
/test/known_issues/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/known_issues/__init__.py


--------------------------------------------------------------------------------
/test/known_issues/test_lazy_cuts_issues.py:
--------------------------------------------------------------------------------
 1 | from tempfile import NamedTemporaryFile, TemporaryDirectory
 2 | 
 3 | from lhotse import CutSet, combine, load_manifest_lazy
 4 | from lhotse.testing.dummies import DummyManifest
 5 | 
 6 | 
 7 | def test_lazy_cuts_combine_split_issue():
 8 |     # Test for lack of exception
 9 |     cuts = DummyManifest(CutSet, begin_id=0, end_id=1000)
10 |     with TemporaryDirectory() as d, NamedTemporaryFile(suffix=".jsonl.gz") as f:
11 |         cuts.to_file(f.name)
12 |         f.flush()
13 | 
14 |         cuts_lazy = load_manifest_lazy(f.name)
15 |         cuts_lazy = combine(cuts_lazy, cuts_lazy.perturb_speed(0.9))
16 |         cuts_lazy.split_lazy(d, chunk_size=100)
17 | 


--------------------------------------------------------------------------------
/test/known_issues/test_mixing_zero_energy_cuts.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from lhotse import CutSet
 5 | from lhotse.dataset.collation import collate_audio
 6 | from lhotse.testing.fixtures import RandomCutTestCase
 7 | from lhotse.utils import NonPositiveEnergyError
 8 | 
 9 | 
10 | class TestMixZeroEnergyCuts(RandomCutTestCase):
11 |     @pytest.mark.parametrize("snr", [None, 10])
12 |     def test_mix_zero_energy_cut_raises(self, snr):
13 |         sr = 16000
14 |         zero_cut = self.with_cut(
15 |             sampling_rate=sr, num_samples=sr, features=False, use_zeroes=True
16 |         )
17 |         rand_cut = self.with_cut(sampling_rate=sr, num_samples=sr, features=False)
18 | 
19 |         mixed = zero_cut.mix(rand_cut, snr=snr)
20 | 
21 |         mix_cut_samples = mixed.load_audio()
22 |         np.testing.assert_equal(rand_cut.load_audio(), mix_cut_samples)
23 | 


--------------------------------------------------------------------------------
/test/recipes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/recipes/__init__.py


--------------------------------------------------------------------------------
/test/recipes/test_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | 
 7 | from lhotse.audio import RecordingSet
 8 | from lhotse.recipes.utils import read_manifests_if_cached
 9 | from lhotse.supervision import SupervisionSet
10 | 
11 | 
12 | @pytest.fixture
13 | def recording_set() -> RecordingSet:
14 |     return RecordingSet.from_json("test/fixtures/audio.json")
15 | 
16 | 
17 | @pytest.fixture
18 | def supervision_set() -> SupervisionSet:
19 |     return SupervisionSet.from_json(
20 |         "test/fixtures/supervision.json"
21 |     ).with_alignment_from_ctm("test/fixtures/supervision.ctm")
22 | 
23 | 
24 | def test_read_manifests_if_cached(
25 |     recording_set: RecordingSet, supervision_set: SupervisionSet
26 | ):
27 |     tmp_test_dir = Path(f"{tempfile.gettempdir()}/lhotse_test_read_manifests_if_cached")
28 |     if not tmp_test_dir.exists():
29 |         tmp_test_dir.mkdir()
30 |     data_part = "dev"
31 |     suffix = "jsonl.gz"
32 |     tmp_recording_set_file = tmp_test_dir / f"recordings_{data_part}.{suffix}"
33 |     tmp_supervision_set_file = tmp_test_dir / f"supervisions_{data_part}.{suffix}"
34 |     recording_set.to_jsonl(tmp_recording_set_file)
35 |     supervision_set.to_jsonl(tmp_supervision_set_file)
36 | 
37 |     try:
38 |         cached_manifests = read_manifests_if_cached(
39 |             [data_part], output_dir=tmp_test_dir
40 |         )
41 |         assert data_part in cached_manifests
42 |         assert cached_manifests[data_part]["recordings"] == recording_set
43 | 
44 |         cached_manifests = read_manifests_if_cached(data_part, output_dir=tmp_test_dir)
45 |         assert data_part in cached_manifests
46 |         assert cached_manifests[data_part]["recordings"] == recording_set
47 |     finally:
48 |         os.remove(tmp_recording_set_file)
49 |         os.remove(tmp_supervision_set_file)
50 | 


--------------------------------------------------------------------------------
/test/shar/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/shar/__init__.py


--------------------------------------------------------------------------------
/test/shar/test_missing_values.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from lhotse import CutSet
 4 | from lhotse.testing.dummies import DummyManifest
 5 | 
 6 | 
 7 | @pytest.mark.parametrize("drop_everything", [True, False])
 8 | def test_cut_set_from_shar(tmp_path, drop_everything):
 9 |     # Prepare data -- it needs to have missing values for some cuts
10 |     cuts = DummyManifest(CutSet, begin_id=0, end_id=20, with_data=True)
11 |     cuts[0].recording = None
12 |     cuts[0].features = None
13 |     cuts[0].custom_indexes = None
14 |     cuts[0].custom_recording = None
15 |     cuts[0].custom_features = None
16 |     if drop_everything:
17 |         cuts[0].custom_embedding = None
18 | 
19 |     # Prepare system under test
20 |     cuts.to_shar(
21 |         tmp_path,
22 |         fields={
23 |             "recording": "wav",
24 |             "features": "lilcom",
25 |             "custom_embedding": "numpy",
26 |             "custom_features": "lilcom",
27 |             "custom_indexes": "numpy",
28 |             "custom_recording": "wav",
29 |         },
30 |         shard_size=10,
31 |     )
32 |     cuts_shar = CutSet.from_shar(in_dir=tmp_path).to_eager()
33 | 
34 |     assert not cuts_shar[0].has_recording
35 |     assert not cuts_shar[0].has_features
36 |     assert not cuts_shar[0].has_custom("custom_indexes")
37 |     assert not cuts_shar[0].has_custom("custom_recording")
38 |     assert not cuts_shar[0].has_custom("custom_features")
39 |     assert cuts_shar[0].has_custom("custom_embedding") == (not drop_everything)
40 |     for cut in cuts_shar.subset(last=19):
41 |         assert cut.has_recording
42 |         assert cut.has_features
43 |         assert cut.has_custom("custom_indexes")
44 |         assert cut.has_custom("custom_recording")
45 |         assert cut.has_custom("custom_features")
46 |         assert cut.has_custom("custom_embedding")
47 | 


--------------------------------------------------------------------------------
/test/test_parallel.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from lhotse.parallel import parallel_map
 4 | 
 5 | 
 6 | def pow2(x):
 7 |     return x**2
 8 | 
 9 | 
10 | def mul(x, y):
11 |     return x * y
12 | 
13 | 
14 | @pytest.mark.parametrize("num_jobs", [1, 2])
15 | def test_parallel_map_num_jobs(num_jobs):
16 |     squares = list(map(pow2, range(100)))
17 |     squares_parallel = list(parallel_map(pow2, range(100), num_jobs=num_jobs))
18 |     assert squares == squares_parallel
19 | 
20 | 
21 | def test_parallel_map_threads():
22 |     squares = list(map(pow2, range(100)))
23 |     squares_parallel = list(parallel_map(pow2, range(100), num_jobs=2, threads=True))
24 |     assert squares == squares_parallel
25 | 
26 | 
27 | def test_parallel_map_two_iterables():
28 |     squares = list(map(mul, range(100), range(100)))
29 |     squares_parallel = list(parallel_map(mul, range(100), range(100), num_jobs=2))
30 |     assert squares == squares_parallel
31 | 


--------------------------------------------------------------------------------
/test/video/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/video/__init__.py


--------------------------------------------------------------------------------
/test/video/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | from lhotse import Recording
 6 | from lhotse.audio.backend import torchaudio_ffmpeg_backend_available
 7 | 
 8 | # Disable video tests for PyTorch/Torchaudio < 2.0
 9 | collect_ignore = []
10 | if not torchaudio_ffmpeg_backend_available():
11 |     collect_ignore_glob = ["test_video_*.py"]
12 | 
13 | 
14 | @pytest.fixture(scope="session")
15 | def video_path() -> Path:
16 |     return Path("test/fixtures/big_buck_bunny_small.mp4")
17 | 
18 | 
19 | @pytest.fixture(scope="session")
20 | def video_recording(video_path) -> Recording:
21 |     return Recording.from_file(video_path)
22 | 


--------------------------------------------------------------------------------
/test/video/test_video_dataset.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from torch.utils.data import DataLoader
 3 | 
 4 | from lhotse import CutSet, MultiCut
 5 | from lhotse.dataset import DynamicCutSampler
 6 | from lhotse.dataset.collation import collate_video
 7 | from lhotse.dataset.video import UnsupervisedAudioVideoDataset
 8 | 
 9 | COLOR = 3
10 | HEIGHT = 240
11 | WIDTH = 320
12 | FPS = 25.0
13 | FRAMES = 132
14 | AUDIO_CHANNELS = 6
15 | 
16 | 
17 | @pytest.fixture(scope="session")
18 | def video_cut(video_recording) -> MultiCut:
19 |     return video_recording.to_cut()
20 | 
21 | 
22 | @pytest.fixture(scope="session")
23 | def video_cut_set(video_cut) -> CutSet:
24 |     return (
25 |         CutSet.from_cuts([video_cut])
26 |         .resample(16000)
27 |         .cut_into_windows(duration=1.0, hop=0.48)
28 |         .filter(lambda c: c.duration > 1 / FPS)
29 |         .repeat(100)
30 |     )
31 | 
32 | 
33 | def test_collate_video(video_cut):
34 |     cuts = CutSet.from_cuts([video_cut]).repeat(2)
35 |     video, video_lens, audio, audio_lens = collate_video(cuts)
36 |     assert video.shape == (2, FRAMES, COLOR, HEIGHT, WIDTH)
37 |     assert video_lens.tolist() == [FRAMES, FRAMES]
38 |     assert audio.shape == (2, AUDIO_CHANNELS, 253440)
39 |     assert audio_lens.tolist() == [253440, 253440]
40 | 
41 | 
42 | def test_video_dataloading(video_cut_set):
43 |     dataset = UnsupervisedAudioVideoDataset()
44 |     sampler = DynamicCutSampler(video_cut_set, max_duration=2.0, shuffle=True)
45 |     dloader = DataLoader(dataset, sampler=sampler, batch_size=None)
46 | 
47 |     for step, batch in enumerate(dloader):
48 |         if step == 10:
49 |             break
50 | 
51 |         for k in "cuts video audio video_lens audio_lens".split():
52 |             assert k in batch
53 | 
54 |         # Mostly just test that it runs without exceptions for a few steps.
55 | 


--------------------------------------------------------------------------------
/tools/make_release.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -eou pipefail  # "strict" mode
 4 | 
 5 | set -x  # show executed commands
 6 | 
 7 | # Clean up old builds.
 8 | rm -rf dist/ build/ lhotse.egg_info/
 9 | 
10 | export LHOTSE_PREPARING_RELEASE=1
11 | 
12 | # Build wheels and package current source code
13 | python setup.py sdist bdist_wheel
14 | 
15 | set +x  # stop showing the executed commands
16 | 
17 | echo
18 | echo "Lhotse is packaged SUCCESSFULLY!"
19 | echo
20 | echo "To upload a TEST RELEASE to testpypi (recommended):"
21 | echo "  twine upload -r testpypi dist/*"
22 | echo
23 | echo "To upload a PUBLIC RELEASE to pypi:"
24 | echo "  twine upload dist/*"
25 | 


--------------------------------------------------------------------------------