├── .git-blame-ignore-revs ├── .github ├── codecov.yml └── workflows │ ├── black.yml │ ├── flake8.yml │ ├── isort.yml │ └── unit_tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── CITATION.cff ├── LICENSE ├── MANIFEST.in ├── NOTICE ├── README.md ├── VERSION ├── docs ├── Makefile ├── api.rst ├── cli.rst ├── conf.py ├── corpus.rst ├── cuts.rst ├── datasets.rst ├── features.rst ├── getting-started.rst ├── index.rst ├── kaldi.rst ├── lhotse-concept-graph.png ├── lhotse-cut-illustration.png ├── logo.png ├── make.bat ├── parallelism.rst ├── requirements.txt └── vad_sample.png ├── examples ├── 00-basic-workflow.ipynb ├── 01-cut-python-api.ipynb ├── 02-webdataset-integration.ipynb ├── 03-combining-datasets.ipynb ├── 04-lhotse-shar.ipynb └── 05-image-and-video-loading.ipynb ├── lhotse ├── __init__.py ├── array.py ├── audio │ ├── __init__.py │ ├── backend.py │ ├── mixer.py │ ├── recording.py │ ├── recording_set.py │ ├── source.py │ └── utils.py ├── augmentation │ ├── __init__.py │ ├── common.py │ ├── loudness.py │ ├── resample.py │ ├── rir.py │ ├── torchaudio.py │ ├── transform.py │ ├── utils.py │ └── wpe.py ├── bin │ ├── __init__.py │ ├── lhotse.py │ └── modes │ │ ├── __init__.py │ │ ├── cli_base.py │ │ ├── cut.py │ │ ├── features.py │ │ ├── install_tools.py │ │ ├── kaldi.py │ │ ├── manipulation.py │ │ ├── recipes │ │ ├── __init__.py │ │ ├── adept.py │ │ ├── aidatatang_200zh.py │ │ ├── aishell.py │ │ ├── aishell2.py │ │ ├── aishell3.py │ │ ├── aishell4.py │ │ ├── ali_meeting.py │ │ ├── ami.py │ │ ├── aspire.py │ │ ├── atcosim.py │ │ ├── audio_mnist.py │ │ ├── babel.py │ │ ├── baker_zh.py │ │ ├── bengaliai_speech.py │ │ ├── broadcast_news.py │ │ ├── but_reverb_db.py │ │ ├── bvcc.py │ │ ├── callhome_egyptian.py │ │ ├── callhome_english.py │ │ ├── cdsd.py │ │ ├── chime6.py │ │ ├── cmu_arctic.py │ │ ├── cmu_indic.py │ │ ├── cmu_kids.py │ │ ├── commonvoice.py │ │ ├── csj.py │ │ ├── cslu_kids.py │ │ ├── daily_talk.py │ │ ├── dihard3.py │ │ ├── dipco.py │ │ ├── earnings21.py │ │ ├── earnings22.py │ │ ├── ears.py │ │ ├── edacc.py │ │ ├── emilia.py │ │ ├── eval2000.py │ │ ├── fisher_english.py │ │ ├── fisher_spanish.py │ │ ├── fleurs.py │ │ ├── gale_arabic.py │ │ ├── gale_mandarin.py │ │ ├── gigaspeech.py │ │ ├── gigast.py │ │ ├── grid.py │ │ ├── heroico.py │ │ ├── hifitts.py │ │ ├── himia.py │ │ ├── icmcasr.py │ │ ├── icsi.py │ │ ├── iwslt22_ta.py │ │ ├── kespeech.py │ │ ├── ksponspeech.py │ │ ├── l2_arctic.py │ │ ├── libricss.py │ │ ├── librilight.py │ │ ├── librimix.py │ │ ├── librispeech.py │ │ ├── libritts.py │ │ ├── ljspeech.py │ │ ├── magicdata.py │ │ ├── mdcc.py │ │ ├── medical.py │ │ ├── mgb2.py │ │ ├── mls.py │ │ ├── mtedx.py │ │ ├── musan.py │ │ ├── must_c.py │ │ ├── nsc.py │ │ ├── peoples_speech.py │ │ ├── primewords.py │ │ ├── radio.py │ │ ├── reazonspeech.py │ │ ├── rir_noise.py │ │ ├── sbcsae.py │ │ ├── slu.py │ │ ├── spatial_librispeech.py │ │ ├── speechcommands.py │ │ ├── speechio.py │ │ ├── spgispeech.py │ │ ├── stcmds.py │ │ ├── switchboard.py │ │ ├── tal_asr.py │ │ ├── tal_csasr.py │ │ ├── tedlium.py │ │ ├── tedlium2.py │ │ ├── thchs_30.py │ │ ├── this_american_life.py │ │ ├── timit.py │ │ ├── uwb_atcc.py │ │ ├── vctk.py │ │ ├── voxceleb.py │ │ ├── voxconverse.py │ │ ├── voxpopuli.py │ │ ├── wenet_speech.py │ │ ├── wenetspeech4tts.py │ │ ├── xbmu_amdo31.py │ │ └── yesno.py │ │ ├── shar.py │ │ ├── supervision.py │ │ ├── utils.py │ │ ├── validate.py │ │ └── workflows.py ├── caching.py ├── custom.py ├── cut │ ├── __init__.py │ ├── base.py │ ├── data.py │ ├── describe.py │ ├── mixed.py │ ├── mono.py │ ├── multi.py │ ├── padding.py │ ├── set.py │ └── text.py ├── dataset │ ├── __init__.py │ ├── audio_tagging.py │ ├── collation.py │ ├── cut_transforms │ │ ├── __init__.py │ │ ├── concatenate.py │ │ ├── extra_padding.py │ │ ├── mix.py │ │ ├── perturb_speed.py │ │ ├── perturb_tempo.py │ │ ├── perturb_volume.py │ │ └── reverberate.py │ ├── dataloading.py │ ├── diarization.py │ ├── input_strategies.py │ ├── iterable_dataset.py │ ├── sampling │ │ ├── __init__.py │ │ ├── base.py │ │ ├── bucketing.py │ │ ├── cut_pairs.py │ │ ├── data_source.py │ │ ├── dynamic.py │ │ ├── dynamic_bucketing.py │ │ ├── round_robin.py │ │ ├── simple.py │ │ ├── stateless.py │ │ ├── utils.py │ │ ├── weighted_simple.py │ │ └── zip.py │ ├── signal_transforms.py │ ├── source_separation.py │ ├── speech_recognition.py │ ├── speech_synthesis.py │ ├── speech_translation.py │ ├── surt.py │ ├── unsupervised.py │ ├── vad.py │ ├── video.py │ ├── vis.py │ └── webdataset.py ├── features │ ├── __init__.py │ ├── base.py │ ├── compression.py │ ├── fbank.py │ ├── io.py │ ├── kaldi │ │ ├── __init__.py │ │ ├── extractors.py │ │ └── layers.py │ ├── kaldifeat.py │ ├── librosa_fbank.py │ ├── mfcc.py │ ├── mixer.py │ ├── opensmile.py │ ├── spectrogram.py │ ├── ssl.py │ └── whisper_fbank.py ├── hf.py ├── image │ ├── __init__.py │ ├── image.py │ └── io.py ├── kaldi.py ├── lazy.py ├── manipulation.py ├── parallel.py ├── qa.py ├── recipes │ ├── __init__.py │ ├── adept.py │ ├── aidatatang_200zh.py │ ├── aishell.py │ ├── aishell2.py │ ├── aishell3.py │ ├── aishell4.py │ ├── ali_meeting.py │ ├── ami.py │ ├── aspire.py │ ├── atcosim.py │ ├── audio_mnist.py │ ├── babel.py │ ├── baker_zh.py │ ├── bengaliai_speech.py │ ├── broadcast_news.py │ ├── but_reverb_db.py │ ├── bvcc.py │ ├── callhome_egyptian.py │ ├── callhome_english.py │ ├── cdsd.py │ ├── chime6.py │ ├── cmu_arctic.py │ ├── cmu_indic.py │ ├── cmu_kids.py │ ├── commonvoice.py │ ├── csj.py │ ├── cslu_kids.py │ ├── daily_talk.py │ ├── dihard3.py │ ├── dipco.py │ ├── earnings21.py │ ├── earnings22.py │ ├── ears.py │ ├── edacc.py │ ├── emilia.py │ ├── eval2000.py │ ├── fisher_english.py │ ├── fisher_spanish.py │ ├── fleurs.py │ ├── gale_arabic.py │ ├── gale_mandarin.py │ ├── gigaspeech.py │ ├── gigast.py │ ├── grid.py │ ├── heroico.py │ ├── hifitts.py │ ├── himia.py │ ├── icmcasr.py │ ├── icsi.py │ ├── iwslt22_ta.py │ ├── kespeech.py │ ├── ksponspeech.py │ ├── l2_arctic.py │ ├── libricss.py │ ├── librilight.py │ ├── librimix.py │ ├── librispeech.py │ ├── libritts.py │ ├── ljspeech.py │ ├── magicdata.py │ ├── mdcc.py │ ├── medical.py │ ├── mgb2.py │ ├── mls.py │ ├── mobvoihotwords.py │ ├── mtedx.py │ ├── musan.py │ ├── must_c.py │ ├── nsc.py │ ├── peoples_speech.py │ ├── primewords.py │ ├── radio.py │ ├── reazonspeech.py │ ├── rir_noise.py │ ├── sbcsae.py │ ├── slu.py │ ├── spatial_librispeech.py │ ├── speechcommands.py │ ├── speechio.py │ ├── spgispeech.py │ ├── stcmds.py │ ├── switchboard.py │ ├── tal_asr.py │ ├── tal_csasr.py │ ├── tedlium.py │ ├── tedlium2.py │ ├── thchs_30.py │ ├── this_american_life.py │ ├── timit.py │ ├── utils.py │ ├── uwb_atcc.py │ ├── vctk.py │ ├── voxceleb.py │ ├── voxconverse.py │ ├── voxpopuli.py │ ├── wenet_speech.py │ ├── wenetspeech4tts.py │ ├── xbmu_amdo31.py │ └── yesno.py ├── serialization.py ├── shar │ ├── __init__.py │ ├── readers │ │ ├── __init__.py │ │ ├── lazy.py │ │ ├── tar.py │ │ └── utils.py │ ├── utils.py │ └── writers │ │ ├── __init__.py │ │ ├── array.py │ │ ├── audio.py │ │ ├── cut.py │ │ ├── shar.py │ │ └── tar.py ├── supervision.py ├── testing │ ├── __init__.py │ ├── dummies.py │ ├── fixtures.py │ └── random.py ├── tools │ ├── __init__.py │ ├── env.py │ └── sph2pipe.py ├── utils.py ├── workarounds.py └── workflows │ ├── __init__.py │ ├── activity_detection │ ├── README.md │ ├── __init__.py │ ├── base.py │ └── silero_vad.py │ ├── dnsmos.py │ ├── forced_alignment │ ├── __init__.py │ ├── asr_aligner.py │ ├── base.py │ ├── mms_aligner.py │ └── workflow.py │ ├── meeting_simulation │ ├── __init__.py │ ├── base.py │ ├── conversational.py │ └── speaker_independent.py │ └── whisper.py ├── pyproject.toml ├── setup.py ├── test ├── __init__.py ├── audio │ ├── __init__.py │ ├── test_audio_backend.py │ ├── test_audio_reads.py │ ├── test_recording_set.py │ └── test_resample_randomized.py ├── augmentation │ ├── __init__.py │ └── test_torchaudio.py ├── cut │ ├── __init__.py │ ├── conftest.py │ ├── test_copy_data.py │ ├── test_custom_attrs.py │ ├── test_custom_attrs_randomized.py │ ├── test_cut.py │ ├── test_cut_augmentation.py │ ├── test_cut_drop_attributes.py │ ├── test_cut_extend_by.py │ ├── test_cut_fill_supervision.py │ ├── test_cut_merge_supervisions.py │ ├── test_cut_mixing.py │ ├── test_cut_ops_preserve_id.py │ ├── test_cut_set.py │ ├── test_cut_set_mix.py │ ├── test_cut_trim_to_supervisions.py │ ├── test_cut_truncate.py │ ├── test_cut_with_in_memory_data.py │ ├── test_feature_extraction.py │ ├── test_invariants_randomized.py │ ├── test_masks.py │ ├── test_multi_cut_augmentation.py │ └── test_padding_cut.py ├── dataset │ ├── __init__.py │ ├── sampling │ │ ├── __init__.py │ │ ├── test_dynamic_bucketing.py │ │ ├── test_sampler_pickling.py │ │ ├── test_sampler_restoring.py │ │ ├── test_sampling.py │ │ ├── test_stateless_sampler.py │ │ └── test_text_sampling.py │ ├── test_audio_chunk_dataset.py │ ├── test_audio_tagging.py │ ├── test_batch_io.py │ ├── test_collation.py │ ├── test_controllable_weights.py │ ├── test_cut_transforms.py │ ├── test_diarization.py │ ├── test_iterable_dataset.py │ ├── test_signal_transforms.py │ ├── test_speech_recognition_dataset.py │ ├── test_speech_recognition_dataset_randomized.py │ ├── test_speech_synthesis_dataset.py │ ├── test_surt_dataset.py │ ├── test_unsupervised_dataset.py │ ├── test_vad_dataset.py │ ├── test_webdataset.py │ └── test_webdataset_ddp.py ├── features │ ├── __init__.py │ ├── test_array.py │ ├── test_chunky_writer.py │ ├── test_copy_feats.py │ ├── test_feature_writer.py │ ├── test_kaldi_features.py │ ├── test_kaldi_layers.py │ ├── test_kaldifeat_features.py │ ├── test_librosa_fbank.py │ ├── test_opensmile.py │ ├── test_s3prl.py │ ├── test_temporal_array.py │ ├── test_torchaudio_features.py │ ├── test_whisper_fbank.py │ └── test_writer_append.py ├── fixtures │ ├── ami │ │ ├── 350b3ee0-a6fd-47ab-b921-fd298b1d53c0.llc │ │ ├── ES2011a.Headset-0-40s-46s.wav │ │ ├── ES2011a_sups.jsonl.gz │ │ └── cuts.json │ ├── audio.json │ ├── big_buck_bunny_small.mp4 │ ├── common_voice_en_651325.mp3 │ ├── dummy_feats │ │ ├── feature_manifest.json │ │ └── storage │ │ │ ├── 25959652-8816-4810-a88a-0b022d6b9b6d.llc │ │ │ ├── 89739de9-308c-4487-9fa5-1c690d44e718.llc │ │ │ ├── d3466ce9-d604-48c3-8c1f-26480aaf07d1.llc │ │ │ └── dbf9a0ec-f79d-4eb8-ae83-143a6d5de64d.llc │ ├── feature_config.yml │ ├── libri │ │ ├── audio.json │ │ ├── cuts.json │ │ ├── cuts_multi.json │ │ ├── cuts_no_feats.json │ │ ├── cuts_no_recording.json │ │ ├── feature_manifest.json.gz │ │ ├── libri-1088-134315-0000.wav │ │ ├── libri-1088-134315-0000_8ch.wav │ │ ├── libri-1088-134315-0000_rvb.wav │ │ ├── recreate.sh │ │ └── storage │ │ │ └── 30c2440c-93cb-4e83-b382-f2a59b3859b4.llc │ ├── ljspeech │ │ ├── cuts.json │ │ ├── feats │ │ │ ├── 5bb │ │ │ │ └── 5bb52a3d-aaf6-42ff-8891-2be7852a4858.llc │ │ │ └── d39 │ │ │ │ └── d39cf273-a42d-433a-a63c-ba6357f1669e.llc │ │ └── storage │ │ │ ├── LJ002-0020.wav │ │ │ └── LJ002-0035.wav │ ├── lsmix │ │ ├── cuts.000000.jsonl.gz │ │ ├── features.000000.tar │ │ ├── recording.000000.tar │ │ └── source_feats.000000.tar │ ├── mini_librispeech │ │ ├── conf │ │ │ └── mfcc.conf │ │ ├── lhotse-b │ │ │ ├── recordings.jsonl.gz │ │ │ └── supervisions.jsonl.gz │ │ ├── lhotse │ │ │ ├── recordings.jsonl.gz │ │ │ └── supervisions.jsonl.gz │ │ ├── reco2dur │ │ ├── segments │ │ ├── spk2gender │ │ ├── spk2utt │ │ ├── text │ │ ├── utt2dur │ │ ├── utt2num_frames │ │ ├── utt2spk │ │ └── wav.scp │ ├── mini_librispeech2 │ │ ├── conf │ │ │ └── mfcc.conf │ │ ├── data │ │ │ ├── raw_mfcc_mini_librispeech2.1.ark │ │ │ └── raw_mfcc_mini_librispeech2.1.scp │ │ ├── feats.scp │ │ ├── frame_shift │ │ ├── lhotse │ │ │ ├── features.jsonl.gz │ │ │ ├── recordings.jsonl.gz │ │ │ └── supervisions.jsonl.gz │ │ ├── reco2dur │ │ ├── segments │ │ ├── spk2gender │ │ ├── spk2utt │ │ ├── text │ │ ├── utt2dur │ │ ├── utt2num_frames │ │ ├── utt2spk │ │ └── wav.scp │ ├── mix_cut_test │ │ ├── audio │ │ │ └── storage │ │ │ │ ├── 2412-153948-0000.flac │ │ │ │ └── 2412-153948-0001.flac │ │ ├── feats │ │ │ └── storage │ │ │ │ ├── 5078e7eb-57a6-4000-b0f2-fa4bf9c52090.llc │ │ │ │ └── 9dc645db-cbe4-4529-85e4-b6ed4f59c340.llc │ │ ├── offseted_audio_cut_manifest.json │ │ ├── overlayed_audio_cut_manifest.json │ │ └── overlayed_cut_manifest.json │ ├── mono_c0.opus │ ├── mono_c0.wav │ ├── mono_c1.wav │ ├── rir │ │ ├── real_8ch.wav │ │ └── sim_1ch.wav │ ├── stereo.mp3 │ ├── stereo.opus │ ├── stereo.sph │ ├── stereo.wav │ ├── supervision.ctm │ ├── supervision.json │ └── supervision_with_scores.ctm ├── known_issues │ ├── __init__.py │ ├── test_augment_with_executor.py │ ├── test_cut_consistency.py │ ├── test_lazy_cuts_issues.py │ ├── test_mixed_cut_num_frames.py │ └── test_mixing_zero_energy_cuts.py ├── recipes │ ├── __init__.py │ └── test_utils.py ├── shar │ ├── __init__.py │ ├── conftest.py │ ├── test_dataloading.py │ ├── test_missing_values.py │ ├── test_read_lazy.py │ └── test_write.py ├── test_feature_set.py ├── test_image.py ├── test_kaldi_dirs.py ├── test_lazy.py ├── test_manipulation.py ├── test_missing_torchaudio.py ├── test_multiplexing_iterables.py ├── test_parallel.py ├── test_qa.py ├── test_serialization.py ├── test_supervision_set.py ├── test_utils.py ├── test_workflows.py ├── video │ ├── __init__.py │ ├── conftest.py │ ├── test_video_cut.py │ ├── test_video_dataset.py │ └── test_video_recording.py └── workflows │ └── test_activity_detection.py └── tools └── make_release.sh /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # Migrate code style to Black 2 | b3c4db1cd7e22ee4dbfd8a5c3bfca6851605c76a 3 | 7b9fe724f570a6df86466b7bc0a19e9caef7b86c 4 | -------------------------------------------------------------------------------- /.github/codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | ignore: 3 | # Recipe code is not subject to testing. 4 | - "lhotse/recipes/**/*" 5 | - "lhotse/recipes/*" 6 | # Testing utilities shouldn't count. 7 | - "lhotse/testing/**/*" 8 | - "lhotse/testing/*" 9 | # Unit tests code artificially increases the coverage. 10 | - "test/**/*" 11 | - "test/*" 12 | -------------------------------------------------------------------------------- /.github/workflows/black.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: black 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | black: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | - uses: psf/black@stable 18 | with: 19 | options: "--check --diff --color" 20 | version: "22.3.0" 21 | -------------------------------------------------------------------------------- /.github/workflows/flake8.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: flake8 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | flake8: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: [ 3.12 ] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v1 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install flake8 27 | run: | 28 | python -m pip install --upgrade pip flake8==7.1.1 29 | - name: Lint with flake8 30 | run: | 31 | # stop the build if there are Python syntax errors or undefined names 32 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 33 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 34 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 35 | -------------------------------------------------------------------------------- /.github/workflows/isort.yml: -------------------------------------------------------------------------------- 1 | name: isort 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | isort: 11 | 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: [ 3.12 ] 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v1 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | - name: Install isort 24 | run: | 25 | python -m pip install --upgrade pip isort==5.10.1 26 | - name: Check that imports are sorted 27 | run: | 28 | isort --check --diff lhotse 29 | isort --check --diff test 30 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.2.0 4 | hooks: 5 | - id: check-executables-have-shebangs 6 | - id: end-of-file-fixer 7 | - id: mixed-line-ending 8 | - id: trailing-whitespace 9 | 10 | - repo: https://github.com/PyCQA/flake8 11 | rev: 7.1.1 12 | hooks: 13 | - id: flake8 14 | args: ['--select=E9,F63,F7,F82'] 15 | 16 | - repo: https://github.com/pycqa/isort 17 | rev: 5.12.0 18 | hooks: 19 | - id: isort 20 | args: [--profile=black] 21 | 22 | - repo: https://github.com/psf/black 23 | rev: 22.3.0 24 | hooks: 25 | - id: black 26 | additional_dependencies: ['click==8.0.1'] 27 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | build: 8 | os: "ubuntu-22.04" 9 | tools: 10 | python: "3.10" 11 | apt_packages: 12 | - "libsndfile-dev" 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: "docs/conf.py" 17 | 18 | # Optionally set the version of Python and requirements required to build your docs 19 | python: 20 | install: 21 | - requirements: "docs/requirements.txt" # install this first to get numpy 22 | - method: "pip" 23 | path: "." 24 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Żelasko" 5 | given-names: "Piotr" 6 | orcid: "https://orcid.org/0000-0002-8245-0413" 7 | - family-names: "Povey" 8 | given-names: "Daniel" 9 | orcid: "https://orcid.org/0000-0002-0611-3634" 10 | - family-names: "Trmal" 11 | given-names: "Jan" 12 | - family-names: "Khudanpur" 13 | given-names: "Sanjeev" 14 | license: Apache-2.0 License 15 | title: "Lhotse: a speech data representation library for the modern deep learning ecosystem" 16 | date-released: 2020-04-24 17 | url: "https://github.com/lhotse-speech/lhotse" 18 | preferred-citation: 19 | type: proceedings 20 | authors: 21 | - family-names: "Żelasko" 22 | given-names: "Piotr" 23 | orcid: "https://orcid.org/0000-0002-8245-0413" 24 | - family-names: "Povey" 25 | given-names: "Daniel" 26 | orcid: "https://orcid.org/0000-0002-0611-3634" 27 | - family-names: "Trmal" 28 | given-names: "Jan" 29 | - family-names: "Khudanpur" 30 | given-names: "Sanjeev" 31 | conference: 32 | name: "NeurIPS Data-Centric AI Workshop" 33 | title: "Lhotse: a speech data representation library for the modern deep learning ecosystem" 34 | url: "https://arxiv.org/abs/2110.12561" 35 | year: 2021 36 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE README.md VERSION 2 | include docs/requirements.txt 3 | recursive-exclude * __pycache__ 4 | recursive-exclude * *.pyc 5 | recursive-exclude * *.pyo 6 | recursive-exclude * *.orig 7 | prune test* 8 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Lhotse 2 | Copyright 2020-2024 Piotr Żelasko 3 | Copyright 2020-2024 Johns Hopkins University 4 | Copyright 2020-2024 Xiaomi Corporation 5 | Copyright 2022-2023 Meaning.Team Inc. 6 | Copyright 2023-2024 NVIDIA Corporation 7 | 8 | This repository includes software developed by: 9 | - Johns Hopkins University 10 | - Xiaomi Corporation 11 | - Meaning.Team Inc. 12 | - NVIDIA Corporation 13 | - other organizations and individuals. 14 | 15 | This project includes contributions from various organizations and individuals. 16 | Only major copyright holders are listed here. 17 | For a complete list of contributors, please refer to the project's version control history. 18 | 19 | Licensed under the Apache License, Version 2.0 (the "License"). 20 | See the LICENSE file for the full contents of the license. 21 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 1.31.0 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/cli.rst: -------------------------------------------------------------------------------- 1 | Command-line interface 2 | ====================== 3 | 4 | .. click:: lhotse.bin:cli 5 | :prog: lhotse 6 | :nested: full 7 | -------------------------------------------------------------------------------- /docs/cuts.rst: -------------------------------------------------------------------------------- 1 | Cuts 2 | ==== 3 | 4 | Overview 5 | ******** 6 | 7 | Audio cuts are one of the main Lhotse features. 8 | Cut is a part of a recording, but it can be longer than a supervision segment, or even span multiple segments. 9 | The regions without a supervision are just audio that we don't assume we know anything about - there may be silence, 10 | noise, non-transcribed speech, etc. 11 | Task-specific datasets can leverage this information to generate masks for such regions. 12 | 13 | .. autoclass:: lhotse.cut.Cut 14 | :no-members: 15 | :no-special-members: 16 | :noindex: 17 | 18 | .. autoclass:: lhotse.cut.CutSet 19 | :no-members: 20 | :no-special-members: 21 | :noindex: 22 | 23 | Types of cuts 24 | ************* 25 | 26 | There are three cut classes: :class:`~lhotse.cut.MonoCut`, :class:`~lhotse.cut.MixedCut`, and :class:`~lhotse.cut.PaddingCut` that are described below in more detail: 27 | 28 | .. autoclass:: lhotse.cut.MonoCut 29 | :no-members: 30 | :no-special-members: 31 | :noindex: 32 | 33 | .. autoclass:: lhotse.cut.MixedCut 34 | :no-members: 35 | :no-special-members: 36 | :noindex: 37 | 38 | 39 | .. autoclass:: lhotse.cut.PaddingCut 40 | :no-members: 41 | :no-special-members: 42 | :noindex: 43 | 44 | CLI 45 | *** 46 | 47 | We provide a limited CLI to manipulate Lhotse manifests. 48 | Some examples of how to perform manipulations in the terminal: 49 | 50 | .. code-block:: bash 51 | 52 | # Reject short segments 53 | lhotse filter 'duration>=3.0' cuts.jsonl cuts-3s.jsonl 54 | # Pad short segments to 5 seconds. 55 | lhotse cut pad --duration 5.0 cuts-3s.jsonl cuts-5s-pad.jsonl 56 | # Truncate longer segments to 5 seconds. 57 | lhotse cut truncate --max-duration 5.0 --offset-type random cuts-5s-pad.jsonl cuts-5s.jsonl 58 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. lhotse documentation master file, created by 2 | sphinx-quickstart on Thu Jul 2 08:36:51 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to lhotse's documentation! 7 | ================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | getting-started.rst 14 | corpus.rst 15 | cuts.rst 16 | features.rst 17 | parallelism.rst 18 | datasets.rst 19 | kaldi.rst 20 | cli.rst 21 | api.rst 22 | 23 | 24 | Indices and tables 25 | ================== 26 | 27 | * :ref:`genindex` 28 | * :ref:`modindex` 29 | * :ref:`search` 30 | -------------------------------------------------------------------------------- /docs/lhotse-concept-graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/docs/lhotse-concept-graph.png -------------------------------------------------------------------------------- /docs/lhotse-cut-illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/docs/lhotse-cut-illustration.png -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/docs/logo.png -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.18.1 2 | sphinx_rtd_theme==2.0.0 3 | sphinx==7.1.2 4 | sphinx-click==5.1.0 5 | sphinx-autodoc-typehints==2.0.0 6 | -------------------------------------------------------------------------------- /docs/vad_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/docs/vad_sample.png -------------------------------------------------------------------------------- /lhotse/audio/__init__.py: -------------------------------------------------------------------------------- 1 | from .backend import ( 2 | audio_backend, 3 | available_audio_backends, 4 | get_current_audio_backend, 5 | get_default_audio_backend, 6 | get_ffmpeg_torchaudio_info_enabled, 7 | info, 8 | read_audio, 9 | save_audio, 10 | set_current_audio_backend, 11 | set_ffmpeg_torchaudio_info_enabled, 12 | ) 13 | from .recording import Recording 14 | from .recording_set import RecordingSet 15 | from .source import AudioSource 16 | from .utils import ( 17 | AudioLoadingError, 18 | DurationMismatchError, 19 | VideoInfo, 20 | get_audio_duration_mismatch_tolerance, 21 | null_result_on_audio_loading_error, 22 | set_audio_duration_mismatch_tolerance, 23 | suppress_audio_loading_errors, 24 | ) 25 | 26 | __all__ = [ 27 | "AudioSource", 28 | "Recording", 29 | "RecordingSet", 30 | "AudioLoadingError", 31 | "DurationMismatchError", 32 | "VideoInfo", 33 | "audio_backend", 34 | "available_audio_backends", 35 | "get_current_audio_backend", 36 | "get_default_audio_backend", 37 | "get_audio_duration_mismatch_tolerance", 38 | "get_ffmpeg_torchaudio_info_enabled", 39 | "info", 40 | "read_audio", 41 | "save_audio", 42 | "set_current_audio_backend", 43 | "set_audio_duration_mismatch_tolerance", 44 | "set_ffmpeg_torchaudio_info_enabled", 45 | "null_result_on_audio_loading_error", 46 | "suppress_audio_loading_errors", 47 | ] 48 | -------------------------------------------------------------------------------- /lhotse/augmentation/__init__.py: -------------------------------------------------------------------------------- 1 | from .common import AugmentFn 2 | from .loudness import LoudnessNormalization 3 | from .rir import ReverbWithImpulseResponse 4 | from .torchaudio import * 5 | from .transform import AudioTransform 6 | from .utils import FastRandomRIRGenerator, convolve1d 7 | from .wpe import DereverbWPE, dereverb_wpe_numpy, dereverb_wpe_torch 8 | -------------------------------------------------------------------------------- /lhotse/augmentation/common.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | import numpy as np 4 | 5 | # def augment_fn(audio: np.ndarray, sampling_rate: int) -> np.ndarray 6 | AugmentFn = Callable[[np.ndarray, int], np.ndarray] 7 | -------------------------------------------------------------------------------- /lhotse/bin/__init__.py: -------------------------------------------------------------------------------- 1 | from lhotse.bin.modes import * 2 | -------------------------------------------------------------------------------- /lhotse/bin/lhotse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Use this script like: https://lhotse.readthedocs.io/en/latest/cli.html 4 | """ 5 | 6 | # Note: we import all the CLI modes here so they get auto-registered 7 | # in Lhotse's main CLI entry-point. Then, setuptools is told to 8 | # invoke the "cli()" method from this script. 9 | from lhotse.bin.modes import * 10 | -------------------------------------------------------------------------------- /lhotse/bin/modes/__init__.py: -------------------------------------------------------------------------------- 1 | from .cli_base import * 2 | from .cut import * 3 | from .features import * 4 | from .install_tools import * 5 | from .kaldi import * 6 | from .manipulation import * 7 | from .recipes import * 8 | from .shar import * 9 | from .supervision import * 10 | from .utils import * 11 | from .validate import * 12 | from .workflows import * 13 | -------------------------------------------------------------------------------- /lhotse/bin/modes/cli_base.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import click 4 | 5 | 6 | @click.group() 7 | @click.option("-s", "--seed", type=int, help="Random seed.") 8 | def cli(seed): 9 | """ 10 | The shell entry point to Lhotse, a tool and a library for audio data manipulation in high altitudes. 11 | """ 12 | logging.basicConfig( 13 | format="%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s", 14 | level=logging.INFO, 15 | ) 16 | if seed is not None: 17 | from lhotse.utils import fix_random_seed 18 | 19 | fix_random_seed(seed) 20 | 21 | 22 | @cli.group() 23 | def prepare(): 24 | """Command group with data preparation recipes.""" 25 | pass 26 | 27 | 28 | @cli.group() 29 | def download(): 30 | """Command group for download and extract data.""" 31 | pass 32 | -------------------------------------------------------------------------------- /lhotse/bin/modes/install_tools.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from ...tools.env import default_tools_cachedir 4 | from ...tools.sph2pipe import SPH2PIPE_URL 5 | from .cli_base import cli 6 | 7 | 8 | @cli.command(context_settings=dict(show_default=True)) 9 | @click.option( 10 | "--install-dir", 11 | type=click.Path(), 12 | default=default_tools_cachedir(), 13 | help="Directory where sph2pipe will be downloaded and installed.", 14 | ) 15 | @click.option( 16 | "--url", default=SPH2PIPE_URL, help="URL from which to download sph2pipe." 17 | ) 18 | def install_sph2pipe(install_dir: str, url: str): 19 | """ 20 | Install the sph2pipe program to handle sphere (.sph) audio files with 21 | "shorten" codec compression (needed for older LDC data). 22 | 23 | It downloads an archive and then decompresses and compiles the contents. 24 | """ 25 | from lhotse.tools.sph2pipe import install_sph2pipe 26 | 27 | install_sph2pipe(where=install_dir, download_from=url) 28 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/adept.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes import download_adept, prepare_adept 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command(context_settings=dict(show_default=True)) 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 10 | @click.argument("output_dir", type=click.Path()) 11 | def adept( 12 | corpus_dir: Pathlike, 13 | output_dir: Pathlike, 14 | ): 15 | """ADEPT prosody transfer evaluation corpus data preparation.""" 16 | prepare_adept(corpus_dir, output_dir=output_dir) 17 | 18 | 19 | @download.command(context_settings=dict(show_default=True)) 20 | @click.argument("target_dir", type=click.Path()) 21 | def adept( 22 | target_dir: Pathlike, 23 | ): 24 | """ADEPT prosody transfer evaluation corpus download.""" 25 | download_adept(target_dir) 26 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/aidatatang_200zh.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.aidatatang_200zh import ( 5 | download_aidatatang_200zh, 6 | prepare_aidatatang_200zh, 7 | ) 8 | from lhotse.utils import Pathlike 9 | 10 | __all__ = ["aidatatang_200zh"] 11 | 12 | 13 | @prepare.command(context_settings=dict(show_default=True)) 14 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 15 | @click.argument("output_dir", type=click.Path()) 16 | def aidatatang_200zh(corpus_dir: Pathlike, output_dir: Pathlike): 17 | """aidatatang_200zh ASR data preparation. 18 | Args: 19 | corpus_dir: 20 | It should contain a subdirectory "aidatatang_200zh" 21 | output_dir: 22 | The output directory. 23 | """ 24 | prepare_aidatatang_200zh(corpus_dir, output_dir=output_dir) 25 | 26 | 27 | @download.command(context_settings=dict(show_default=True)) 28 | @click.argument( 29 | "target_dir", 30 | type=click.Path(), 31 | ) 32 | def aidatatang_200zh(target_dir: Pathlike): 33 | """aidatatang_200zh download. 34 | Args: 35 | target_dir: 36 | It will create a dir aidatatang_200zh to contain all 37 | downloaded/extracted files 38 | """ 39 | download_aidatatang_200zh(target_dir) 40 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/aishell.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.aishell import download_aishell, prepare_aishell 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["aishell"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | def aishell(corpus_dir: Pathlike, output_dir: Pathlike): 14 | """Aishell ASR data preparation.""" 15 | prepare_aishell(corpus_dir, output_dir=output_dir) 16 | 17 | 18 | @download.command(context_settings=dict(show_default=True)) 19 | @click.argument("target_dir", type=click.Path()) 20 | def aishell(target_dir: Pathlike): 21 | """Aishell download.""" 22 | download_aishell(target_dir) 23 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/aishell2.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes.aishell2 import prepare_aishell2 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["aishell2"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "-j", 15 | "--num-jobs", 16 | type=int, 17 | default=1, 18 | help="How many threads to use (can give good speed-ups with slow disks).", 19 | ) 20 | def aishell2(corpus_dir: Pathlike, output_dir: Pathlike, num_jobs: int): 21 | """Aishell2 ASR data preparation.""" 22 | prepare_aishell2(corpus_dir, output_dir=output_dir, num_jobs=num_jobs) 23 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/aishell3.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.aishell3 import download_aishell3, prepare_aishell3 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["aishell3"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | def aishell3(corpus_dir: Pathlike, output_dir: Pathlike): 14 | """aishell3 data preparation.""" 15 | prepare_aishell3(corpus_dir, output_dir=output_dir) 16 | 17 | 18 | @download.command(context_settings=dict(show_default=True)) 19 | @click.argument("target_dir", type=click.Path(), default=".") 20 | def aishell3(target_dir: Pathlike): 21 | """aishell3 download.""" 22 | download_aishell3(target_dir) 23 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/aishell4.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.aishell4 import download_aishell4, prepare_aishell4 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["aishell4"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "--normalize-text", 15 | is_flag=True, 16 | default=False, 17 | help="Conduct text normalization (remove punctuation, uppercase, etc.)", 18 | ) 19 | def aishell4(corpus_dir: Pathlike, output_dir: Pathlike, normalize_text: bool): 20 | """AISHELL-4 data preparation.""" 21 | prepare_aishell4(corpus_dir, output_dir=output_dir, normalize_text=normalize_text) 22 | 23 | 24 | @download.command(context_settings=dict(show_default=True)) 25 | @click.argument("target_dir", type=click.Path()) 26 | def aishell4(target_dir: Pathlike): 27 | """AISHELL-4 download.""" 28 | download_aishell4(target_dir) 29 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/ali_meeting.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.ali_meeting import download_ali_meeting, prepare_ali_meeting 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["ali_meeting"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "--mic", type=click.Choice(["near", "far", "ihm", "sdm", "mdm"]), default="far" 15 | ) 16 | @click.option( 17 | "--normalize-text", 18 | type=click.Choice(["none", "m2met"], case_sensitive=False), 19 | default="none", 20 | help="Type of text normalization to apply (M2MeT style is from the official challenge)", 21 | ) 22 | @click.option( 23 | "--save-mono", 24 | is_flag=True, 25 | default=False, 26 | help="If True and `mic` is sdm, extract first channel and save as new recording.", 27 | ) 28 | def ali_meeting( 29 | corpus_dir: Pathlike, 30 | output_dir: Pathlike, 31 | mic: str, 32 | normalize_text: str, 33 | save_mono: bool, 34 | ): 35 | """AliMeeting data preparation.""" 36 | prepare_ali_meeting( 37 | corpus_dir, 38 | output_dir=output_dir, 39 | mic=mic, 40 | normalize_text=normalize_text, 41 | save_mono=save_mono, 42 | ) 43 | 44 | 45 | @download.command(context_settings=dict(show_default=True)) 46 | @click.argument("target_dir", type=click.Path()) 47 | @click.option("--force-download", is_flag=True, default=False) 48 | def ali_meeting(target_dir: Pathlike, force_download: bool): 49 | """AliMeeting download.""" 50 | download_ali_meeting(target_dir, force_download=force_download) 51 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/aspire.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes.aspire import prepare_aspire 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["aspire"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option("--mic", type=click.Choice(["single", "multi"]), default="single") 14 | def aspire(corpus_dir: Pathlike, output_dir: Pathlike, mic: str): 15 | """ASpIRE data preparation.""" 16 | prepare_aspire(corpus_dir, output_dir=output_dir, mic=mic) 17 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/atcosim.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.atcosim import download_atcosim, prepare_atcosim 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["atcosim"] 8 | 9 | 10 | @download.command(context_settings=dict(show_default=True)) 11 | @click.argument("target_dir", type=click.Path()) 12 | def atcosim(target_dir: Pathlike): 13 | """ATCOSIM download.""" 14 | download_atcosim(target_dir) 15 | 16 | 17 | @prepare.command(context_settings=dict(show_default=True)) 18 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 19 | @click.argument("output_dir", type=click.Path()) 20 | @click.option("--silence-sym", type=str, default="") 21 | @click.option("--breath-sym", type=str, default="") 22 | @click.option("--foreign-sym", type=str, default="") 23 | @click.option("--partial-sym", type=str, default="") 24 | @click.option("--unknown-sym", type=str, default="") 25 | def atcosim( 26 | corpus_dir: Pathlike, 27 | output_dir: Pathlike, 28 | silence_sym: str, 29 | breath_sym: str, 30 | foreign_sym: str, 31 | partial_sym: str, 32 | unknown_sym: str, 33 | ): 34 | """ATCOSIM data preparation.""" 35 | prepare_atcosim( 36 | corpus_dir, 37 | output_dir=output_dir, 38 | silence_sym=silence_sym, 39 | breath_sym=breath_sym, 40 | foreign_sym=foreign_sym, 41 | partial_sym=partial_sym, 42 | unknown_sym=unknown_sym, 43 | ) 44 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/audio_mnist.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.audio_mnist import download_audio_mnist, prepare_audio_mnist 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @download.command(context_settings=dict(show_default=True)) 11 | @click.argument("target_dir", type=click.Path()) 12 | @click.option( 13 | "--force-download", 14 | type=bool, 15 | default=False, 16 | help="If True, download even if file is present.", 17 | ) 18 | def audio_mnist(target_dir: Pathlike, force_download: bool): 19 | """AudioMNIST dataset download.""" 20 | download_audio_mnist(target_dir, force_download) 21 | 22 | 23 | @prepare.command(context_settings=dict(show_default=True)) 24 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 25 | @click.argument("output_dir", type=click.Path()) 26 | def audio_mnist(corpus_dir: Pathlike, output_dir: Pathlike): 27 | """AudioMNIST corpus data preparation.""" 28 | logging.basicConfig(level=logging.INFO) 29 | prepare_audio_mnist( 30 | corpus_dir, 31 | output_dir=output_dir, 32 | ) 33 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/babel.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes.babel import prepare_single_babel_language 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command(context_settings=dict(show_default=True)) 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 10 | @click.argument("output_dir", type=click.Path()) 11 | def babel( 12 | corpus_dir: Pathlike, 13 | output_dir: Pathlike, 14 | ): 15 | """ 16 | This is a data preparation recipe for the IARPA BABEL corpus 17 | (see: https://www.iarpa.gov/index.php/research-programs/babel). 18 | It should support all of the languages available in BABEL. 19 | It will prepare the data from the "conversational" part of BABEL. 20 | 21 | This script should be invoked separately for each language you want to prepare, e.g.: 22 | $ lhotse prepare babel /export/corpora5/Babel/IARPA_BABEL_BP_101 data/cantonese 23 | $ lhotse prepare babel /export/corpora5/Babel/BABEL_OP1_103 data/bengali 24 | """ 25 | prepare_single_babel_language(corpus_dir, output_dir=output_dir) 26 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/baker_zh.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.baker_zh import download_baker_zh, prepare_baker_zh 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["baker_zh"] 8 | 9 | 10 | @download.command(context_settings=dict(show_default=True)) 11 | @click.argument("target_dir", type=click.Path(), default=".") 12 | def baker_zh(target_dir: Pathlike): 13 | """bazker_zh download.""" 14 | download_baker_zh(target_dir) 15 | 16 | 17 | @prepare.command(context_settings=dict(show_default=True)) 18 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 19 | @click.argument("output_dir", type=click.Path()) 20 | def baker_zh( 21 | corpus_dir: Pathlike, 22 | output_dir: Pathlike, 23 | ): 24 | """bazker_zh data preparation.""" 25 | prepare_baker_zh(corpus_dir, output_dir=output_dir) 26 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/bengaliai_speech.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional, Tuple, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.bengaliai_speech import prepare_bengaliai_speech 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "-j", 15 | "--num-jobs", 16 | type=int, 17 | default=1, 18 | help="How many threads to use (can give good speed-ups with slow disks).", 19 | ) 20 | def bengaliai_speech( 21 | corpus_dir: Pathlike, 22 | output_dir: Optional[Pathlike] = None, 23 | num_jobs: int = 1, 24 | ): 25 | """Bengali.AI Speech data preparation.""" 26 | prepare_bengaliai_speech( 27 | corpus_dir=corpus_dir, 28 | output_dir=output_dir, 29 | num_jobs=num_jobs, 30 | ) 31 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/broadcast_news.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes.broadcast_news import prepare_broadcast_news 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command() 9 | @click.argument("audio_dir", type=click.Path(exists=True, file_okay=False)) 10 | @click.argument("transcript_dir", type=click.Path(exists=True, file_okay=False)) 11 | @click.argument("output_dir", type=click.Path()) 12 | def broadcast_news(audio_dir: Pathlike, transcript_dir: Pathlike, output_dir: Pathlike): 13 | """ 14 | English Broadcast News 1997 data preparation. 15 | It will output three manifests: for recordings, topic sections, and speech segments. 16 | It supports the following LDC distributions: 17 | 18 | \b 19 | * 1997 English Broadcast News Train (HUB4) 20 | Speech LDC98S71 21 | Transcripts LDC98T28 22 | 23 | This data is not available for free - your institution needs to have an LDC subscription. 24 | """ 25 | prepare_broadcast_news( 26 | audio_dir=audio_dir, transcripts_dir=transcript_dir, output_dir=output_dir 27 | ) 28 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/but_reverb_db.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes import download_but_reverb_db, prepare_but_reverb_db 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["but_reverb_db"] 10 | 11 | 12 | @prepare.command() 13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 14 | @click.argument("output_dir", type=click.Path()) 15 | @click.option( 16 | "--parts", 17 | "-p", 18 | type=str, 19 | multiple=True, 20 | default=["silence", "rir"], 21 | show_default=True, 22 | help="Parts to prepare.", 23 | ) 24 | def but_reverb_db( 25 | corpus_dir: Pathlike, output_dir: Pathlike, parts: Union[str, Sequence[str]] 26 | ): 27 | """BUT Reverb DB data preparation.""" 28 | prepare_but_reverb_db(corpus_dir, output_dir=output_dir, parts=parts) 29 | 30 | 31 | @download.command() 32 | @click.argument("target_dir", type=click.Path()) 33 | @click.option( 34 | "--force-download", 35 | type=bool, 36 | default=False, 37 | help="If True, download even if file is present.", 38 | ) 39 | def but_reverb_db(target_dir: Pathlike, force_download: bool): 40 | """BUT Reverb DB download.""" 41 | download_but_reverb_db(target_dir, force_download=force_download) 42 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/bvcc.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes import download_bvcc, prepare_bvcc 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["bvcc"] 8 | 9 | 10 | @prepare.command() 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option("-nj", "--num_jobs", type=int, default=1) 14 | def bvcc(corpus_dir: Pathlike, output_dir: Pathlike, num_jobs): 15 | """BVCC data preparation. 16 | 17 | CORPUS_DIR should contain the following dir structure 18 | 19 | ./phase1-main/README 20 | ./phase1-main/DATA/sets/* 21 | ./phase1-main/DATA/wav/* 22 | ... 23 | 24 | ./phase1-ood/README 25 | ./phase1-ood/DATA/sets/ 26 | ./phase1-ood/DATA/wav/ 27 | ... 28 | 29 | Check the READMEs for details. 30 | 31 | See 'lhotse download bvcc' for links to instructions how to obtain the corpus. 32 | """ 33 | prepare_bvcc(corpus_dir, output_dir=output_dir, num_jobs=num_jobs) 34 | 35 | 36 | @download.command() 37 | def bvcc(): 38 | """BVCC/VoiceMOS challange data cannot be downloaded. 39 | 40 | See info and instructions how to obtain BVCC dataset used for VoiceMOS challange: 41 | - https://arxiv.org/abs/2105.02373 42 | - https://nii-yamagishilab.github.io/ecooper-demo/VoiceMOS2022/index.html 43 | - https://codalab.lisn.upsaclay.fr/competitions/695 44 | """ 45 | download_bvcc( 46 | target_dir="Not needed - just prints the docstring. Hopefully the license will be lifted." 47 | ) 48 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/callhome_egyptian.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes import prepare_callhome_egyptian 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command(context_settings=dict(show_default=True)) 9 | @click.argument("audio-dir", type=click.Path(exists=True, file_okay=False)) 10 | @click.argument("transcript-dir", type=click.Path(exists=True, file_okay=False)) 11 | @click.argument("output-dir", type=click.Path()) 12 | @click.option( 13 | "--absolute-paths", 14 | default=False, 15 | help="Whether to return absolute or relative (to the corpus dir) paths for recordings.", 16 | ) 17 | def callhome_egyptian( 18 | audio_dir: Pathlike, 19 | transcript_dir: Pathlike, 20 | output_dir: Pathlike, 21 | absolute_paths: bool, 22 | ): 23 | """ 24 | About the Callhome Egyptian Arabic Corpus 25 | 26 | The CALLHOME Egyptian Arabic corpus of telephone speech consists of 120 unscripted 27 | telephone conversations between native speakers of Egyptian Colloquial Arabic (ECA), 28 | the spoken variety of Arabic found in Egypt. The dialect of ECA that this 29 | dictionary represents is Cairene Arabic. 30 | 31 | This recipe uses the speech and transcripts available through LDC. In addition, 32 | an Egyptian arabic phonetic lexicon (available via LDC) is used to get word to 33 | phoneme mappings for the vocabulary. This datasets are: 34 | 35 | Speech : LDC97S45 36 | Transcripts : LDC97T19 37 | Lexicon : LDC99L22 (unused here) 38 | 39 | To actually read the audio, you will need the SPH2PIPE binary: you can provide its path, 40 | so that we will add it in the manifests (otherwise you might need to modify your PATH 41 | environment variable to find sph2pipe). 42 | """ 43 | prepare_callhome_egyptian( 44 | audio_dir=audio_dir, 45 | transcript_dir=transcript_dir, 46 | output_dir=output_dir, 47 | absolute_paths=absolute_paths, 48 | ) 49 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/cdsd.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes.cdsd import prepare_cdsd 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["cdsd"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | def cdsd(corpus_dir: Pathlike, output_dir: Pathlike): 14 | """CDSD ASR data preparation.""" 15 | prepare_cdsd(corpus_dir, output_dir=output_dir) 16 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/cmu_arctic.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.cmu_arctic import download_cmu_arctic, prepare_cmu_arctic 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["cmu_arctic"] 8 | 9 | 10 | @prepare.command() 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | def cmu_arctic(corpus_dir: Pathlike, output_dir: Pathlike): 14 | """CMU Arctic data preparation.""" 15 | prepare_cmu_arctic(corpus_dir, output_dir=output_dir) 16 | 17 | 18 | @download.command() 19 | @click.argument("target_dir", type=click.Path()) 20 | def cmu_arctic(target_dir: Pathlike): 21 | """CMU Arctic download.""" 22 | download_cmu_arctic(target_dir) 23 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/cmu_indic.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.cmu_indic import download_cmu_indic, prepare_cmu_indic 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command() 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 10 | @click.argument("output_dir", type=click.Path()) 11 | def cmu_indic(corpus_dir: Pathlike, output_dir: Pathlike): 12 | """CMU Indic data preparation.""" 13 | prepare_cmu_indic(corpus_dir, output_dir=output_dir) 14 | 15 | 16 | @download.command() 17 | @click.argument("target_dir", type=click.Path()) 18 | def cmu_indic(target_dir: Pathlike): 19 | """CMU Indic download.""" 20 | download_cmu_indic(target_dir) 21 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/cmu_kids.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.cmu_kids import prepare_cmu_kids 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["cmu_kids"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 14 | @click.argument("output_dir", type=click.Path()) 15 | @click.option( 16 | "--absolute-paths", 17 | type=bool, 18 | default=True, 19 | help="Use absolute paths for recordings", 20 | ) 21 | def cmu_kids( 22 | corpus_dir: Pathlike, 23 | output_dir: Pathlike, 24 | absolute_paths: Optional[bool] = False, 25 | ): 26 | """CMU Kids corpus data preparation.""" 27 | prepare_cmu_kids( 28 | corpus_dir, 29 | output_dir=output_dir, 30 | absolute_paths=absolute_paths, 31 | ) 32 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/csj.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.csj import prepare_csj 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["csj"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 14 | @click.argument("manifest_dir", type=click.Path()) 15 | @click.option( 16 | "-t", 17 | "--transcript-dir", 18 | type=click.Path(), 19 | default=None, 20 | help=( 21 | "Directory to save parsed transcripts in txt format, with " 22 | "valid and eval sets created from the core and noncore datasets. " 23 | "If not provided, this script will not create valid and eval " 24 | "sets." 25 | ), 26 | ) 27 | @click.option( 28 | "-p", 29 | "--dataset-parts", 30 | type=str, 31 | default=None, 32 | multiple=True, 33 | help=( 34 | "List of dataset parts to prepare. " 35 | "To prepare multiple parts, pass each with `-p` " 36 | "Example: `-p eval1 -p eval2`" 37 | ), 38 | ) 39 | @click.option( 40 | "-j", 41 | "--num-jobs", 42 | type=int, 43 | default=1, 44 | help="How many threads to use (can give good speed-ups with slow disks).", 45 | ) 46 | def csj( 47 | corpus_dir: Pathlike, 48 | manifest_dir: Pathlike, 49 | dataset_parts: Union[str, Sequence[str]], 50 | transcript_dir: Pathlike, 51 | num_jobs: int, 52 | ): 53 | "Prepare Corpus of Spontaneous Japanese" 54 | 55 | prepare_csj( 56 | corpus_dir=corpus_dir, 57 | manifest_dir=manifest_dir, 58 | dataset_parts=dataset_parts, 59 | transcript_dir=transcript_dir, 60 | nj=num_jobs, 61 | ) 62 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/cslu_kids.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.cslu_kids import prepare_cslu_kids 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["cslu_kids"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 14 | @click.argument("output_dir", type=click.Path()) 15 | @click.option( 16 | "--absolute-paths", 17 | type=bool, 18 | default=True, 19 | help="Use absolute paths for recordings", 20 | ) 21 | @click.option( 22 | "--normalize-text", 23 | type=bool, 24 | default=True, 25 | help="Remove noise tags (, ) from spontaneous speech transcripts", 26 | ) 27 | def cslu_kids( 28 | corpus_dir: Pathlike, 29 | output_dir: Pathlike, 30 | absolute_paths: Optional[bool] = False, 31 | normalize_text: Optional[bool] = True, 32 | ): 33 | """CSLU Kids corpus data preparation.""" 34 | prepare_cslu_kids( 35 | corpus_dir, 36 | output_dir=output_dir, 37 | absolute_paths=absolute_paths, 38 | normalize_text=normalize_text, 39 | ) 40 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/daily_talk.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.daily_talk import download_daily_talk, prepare_daily_talk 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command() 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 10 | @click.argument("output_dir", type=click.Path()) 11 | @click.option("--num-jobs", type=int, default=1, help="Number of parallel workers.") 12 | def daily_talk(corpus_dir: Pathlike, output_dir: Pathlike, num_jobs: int): 13 | """ 14 | DailyTalk recording and supervision manifest preparation. 15 | """ 16 | prepare_daily_talk(corpus_dir, output_dir, num_jobs=num_jobs) 17 | 18 | 19 | @download.command() 20 | @click.argument("target_dir", type=click.Path()) 21 | @click.option("--force-download", is_flag=True, help="Force download.") 22 | def daily_talk(target_dir: Pathlike, force_download: bool = False): 23 | """ 24 | Download DailyTalk dataset. 25 | """ 26 | download_daily_talk(target_dir, force_download) 27 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/dihard3.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.dihard3 import prepare_dihard3 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["dihard3"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("output_dir", type=click.Path()) 14 | @click.option("--dev", type=click.Path(exists=True, dir_okay=True)) 15 | @click.option("--eval", type=click.Path(exists=True, dir_okay=True)) 16 | @click.option( 17 | "--uem/--no-uem", 18 | default=True, 19 | help="Specify whether or not to create UEM supervision", 20 | ) 21 | @click.option( 22 | "-j", 23 | "--num-jobs", 24 | type=int, 25 | default=1, 26 | help="Number of jobs to scan corpus directory for recordings.", 27 | ) 28 | def dihard3( 29 | output_dir: Pathlike, 30 | dev: Optional[Pathlike], 31 | eval: Optional[Pathlike], 32 | uem: Optional[float] = True, 33 | num_jobs: Optional[int] = 1, 34 | ): 35 | """DIHARD3 data preparation.""" 36 | prepare_dihard3( 37 | dev, eval, output_dir=output_dir, uem_manifest=uem, num_jobs=num_jobs 38 | ) 39 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/dipco.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.dipco import download_dipco, prepare_dipco 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["dipco"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "--mic", 15 | type=click.Choice(["ihm", "mdm"], case_sensitive=False), 16 | default="mdm", 17 | help="DiPCo microphone setting.", 18 | ) 19 | @click.option( 20 | "--normalize-text", 21 | type=click.Choice(["none", "upper", "kaldi"], case_sensitive=False), 22 | default="kaldi", 23 | help="Text normalization method.", 24 | show_default=True, 25 | ) 26 | @click.option( 27 | "--use-chime7-offset", 28 | is_flag=True, 29 | default=False, 30 | help="If True, offset session IDs (from CHiME-7 challenge).", 31 | ) 32 | def dipco( 33 | corpus_dir: Pathlike, 34 | output_dir: Pathlike, 35 | mic: str, 36 | normalize_text: str, 37 | use_chime7_offset: bool, 38 | ): 39 | """DiPCo data preparation.""" 40 | prepare_dipco( 41 | corpus_dir, 42 | output_dir=output_dir, 43 | mic=mic, 44 | normalize_text=normalize_text, 45 | use_chime7_offset=use_chime7_offset, 46 | ) 47 | 48 | 49 | @download.command(context_settings=dict(show_default=True)) 50 | @click.argument("target_dir", type=click.Path()) 51 | @click.option( 52 | "--force-download", 53 | type=bool, 54 | default=False, 55 | help="If True, download even if file is present.", 56 | ) 57 | def dipco( 58 | target_dir: Pathlike, 59 | force_download: bool, 60 | ): 61 | """DiPCo download.""" 62 | download_dipco( 63 | target_dir, 64 | force_download=force_download, 65 | ) 66 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/earnings21.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.earnings21 import download_earnings21, prepare_earnings21 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @download.command(context_settings=dict(show_default=True)) 9 | @click.argument("target_dir", type=click.Path()) 10 | def earnings21(target_dir: Pathlike): 11 | """Earnings21 dataset download.""" 12 | download_earnings21(target_dir) 13 | 14 | 15 | @prepare.command(context_settings=dict(show_default=True)) 16 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 17 | @click.argument("output_dir", type=click.Path()) 18 | @click.option( 19 | "--normalize-text/--no-normalize-text", default=False, help="Normalize the text." 20 | ) 21 | def earnings21(corpus_dir: Pathlike, output_dir: Pathlike, normalize_text: bool): 22 | """Earnings21 data preparation.""" 23 | prepare_earnings21(corpus_dir, output_dir=output_dir, normalize_text=normalize_text) 24 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/earnings22.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.earnings22 import download_earnings22, prepare_earnings22 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @download.command(context_settings=dict(show_default=True)) 9 | def earnings22(): 10 | """Earnings22 dataset download.""" 11 | download_earnings22(None) 12 | 13 | 14 | @prepare.command(context_settings=dict(show_default=True)) 15 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 16 | @click.argument("output_dir", type=click.Path()) 17 | @click.option( 18 | "--normalize-text/--no-normalize-text", default=False, help="Normalize the text." 19 | ) 20 | def earnings22(corpus_dir: Pathlike, output_dir: Pathlike, normalize_text: bool): 21 | """Earnings22 data preparation.""" 22 | prepare_earnings22(corpus_dir, output_dir=output_dir, normalize_text=normalize_text) 23 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/ears.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.ears import download_ears, prepare_ears 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "-j", 15 | "--num-jobs", 16 | type=int, 17 | default=1, 18 | help="How many threads to use (can give good speed-ups with slow disks).", 19 | ) 20 | def ears( 21 | corpus_dir: Pathlike, 22 | output_dir: Optional[Pathlike] = None, 23 | num_jobs: int = 1, 24 | ): 25 | """EARS data preparation.""" 26 | prepare_ears( 27 | corpus_dir=corpus_dir, 28 | output_dir=output_dir, 29 | num_jobs=num_jobs, 30 | ) 31 | 32 | 33 | @download.command(context_settings=dict(show_default=True)) 34 | @click.argument("target_dir", type=click.Path()) 35 | def ears( 36 | target_dir: Pathlike, 37 | ): 38 | """EARS data download.""" 39 | download_ears( 40 | target_dir=target_dir, 41 | ) 42 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/edacc.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.edacc import download_edacc, prepare_edacc 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["edacc"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 14 | @click.argument("output_dir", type=click.Path()) 15 | def edacc(corpus_dir: Pathlike, output_dir: Pathlike): 16 | """The Edinburgh International Accents of English Corpus (EDACC) data preparation.""" 17 | prepare_edacc(corpus_dir, output_dir=output_dir) 18 | 19 | 20 | @download.command(context_settings=dict(show_default=True)) 21 | @click.argument("target_dir", type=click.Path()) 22 | def edacc(target_dir: Pathlike): 23 | """The Edinburgh International Accents of English Corpus (EDACC) download.""" 24 | download_edacc(target_dir) 25 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/emilia.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes.emilia import prepare_emilia 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command(context_settings=dict(show_default=True)) 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 10 | @click.argument("output_dir", type=click.Path()) 11 | @click.option( 12 | "-l", 13 | "--lang", 14 | type=str, 15 | help="The language to process. Valid values: zh, en, ja, ko, de, fr", 16 | ) 17 | @click.option( 18 | "-j", 19 | "--num-jobs", 20 | type=int, 21 | default=1, 22 | help="How many threads to use (can give good speed-ups with slow disks).", 23 | ) 24 | def emilia( 25 | corpus_dir: Pathlike, 26 | output_dir: Pathlike, 27 | lang: str, 28 | num_jobs: int = 1, 29 | ): 30 | """Prepare the Emilia corpus manifests.""" 31 | prepare_emilia( 32 | corpus_dir=corpus_dir, 33 | output_dir=output_dir, 34 | lang=lang, 35 | num_jobs=num_jobs, 36 | ) 37 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/eval2000.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes import prepare_eval2000 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus-dir", type=click.Path(exists=True, file_okay=False)) 12 | @click.argument("output-dir", type=click.Path()) 13 | @click.option( 14 | "--transcript-dir", 15 | type=click.Path(exists=True, file_okay=False), 16 | default=None, 17 | required=False, 18 | ) 19 | @click.option( 20 | "--absolute-paths", 21 | default=False, 22 | help="Whether to return absolute or relative (to the corpus dir) paths for recordings.", 23 | ) 24 | def eval2000( 25 | corpus_dir: Pathlike, 26 | output_dir: Pathlike, 27 | absolute_paths: bool, 28 | transcript_dir: Optional[Pathlike] = None, 29 | ): 30 | """ 31 | The Eval2000 corpus preparation. 32 | 33 | \b 34 | This is conversational telephone speech collected as 2-channel, 8kHz-sampled data. 35 | The catalog number LDC2002S09 for audio corpora and LDC2002T43 for transcripts. 36 | 37 | This data is not available for free - your institution needs to have an LDC subscription. 38 | """ 39 | 40 | prepare_eval2000( 41 | corpus_dir=corpus_dir, 42 | output_dir=output_dir, 43 | absolute_paths=absolute_paths, 44 | transcript_path=transcript_dir, 45 | ) 46 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/fisher_spanish.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes import prepare_fisher_spanish 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command(context_settings=dict(show_default=True)) 9 | @click.argument("audio-dir", type=click.Path(exists=True, file_okay=False)) 10 | @click.argument("transcript-dir", type=click.Path(exists=True, file_okay=False)) 11 | @click.argument("output-dir", type=click.Path()) 12 | @click.option( 13 | "--absolute-paths", 14 | default=False, 15 | help="Whether to return absolute or relative (to the corpus dir) paths for recordings.", 16 | ) 17 | def fisher_spanish( 18 | audio_dir: Pathlike, 19 | transcript_dir: Pathlike, 20 | output_dir: Pathlike, 21 | absolute_paths: bool, 22 | ): 23 | """ 24 | The Fisher Spanish corpus preparation. 25 | 26 | \b 27 | This is conversational telephone speech collected as 2-channel μ-law, 8kHz-sampled data. 28 | The catalog number LDC2010S01 for audio corpus and LDC2010T04 for transcripts. 29 | 30 | This data is not available for free - your institution needs to have an LDC subscription. 31 | """ 32 | prepare_fisher_spanish( 33 | audio_dir_path=audio_dir, 34 | transcript_dir_path=transcript_dir, 35 | output_dir=output_dir, 36 | absolute_paths=absolute_paths, 37 | ) 38 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/fleurs.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.fleurs import download_fleurs, prepare_fleurs 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["fleurs"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 14 | @click.argument("output_dir", type=click.Path()) 15 | @click.option( 16 | "-j", 17 | "--num-jobs", 18 | type=int, 19 | default=1, 20 | help="How many threads to use (can give good speed-ups with slow disks).", 21 | ) 22 | @click.option( 23 | "-l", 24 | "--lang", 25 | multiple=True, 26 | default=["all"], 27 | help="Specify which languages to prepare, e.g., " 28 | " lhoste prepare librispeech mtedx_corpus data -l de -l fr -l es ", 29 | ) 30 | def fleurs( 31 | corpus_dir: Pathlike, 32 | output_dir: Pathlike, 33 | num_jobs: int, 34 | lang: Optional[Union[str, Sequence[str]]], 35 | ): 36 | """Fleurs ASR data preparation.""" 37 | prepare_fleurs(corpus_dir, output_dir=output_dir, num_jobs=num_jobs, languages=lang) 38 | 39 | 40 | @download.command(context_settings=dict(show_default=True)) 41 | @click.argument("target_dir", type=click.Path()) 42 | @click.option( 43 | "-l", 44 | "--lang", 45 | multiple=True, 46 | default=["all"], 47 | help="Specify which languages to download, e.g., " 48 | " lhotse download fleurs . -l hi_in -l en_us " 49 | " lhotse download fleurs", 50 | ) 51 | @click.option( 52 | "--force-download", 53 | type=bool, 54 | is_flag=True, 55 | default=False, 56 | help="Specify whether to overwrite an existing archive", 57 | ) 58 | def fleurs( 59 | target_dir: Pathlike, 60 | lang: Optional[Union[str, Sequence[str]]], 61 | force_download: bool = False, 62 | ): 63 | """FLEURS download.""" 64 | download_fleurs( 65 | target_dir, 66 | languages=lang, 67 | force_download=force_download, 68 | ) 69 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/gale_arabic.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.gale_arabic import prepare_gale_arabic 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["gale_arabic"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("output_dir", type=click.Path()) 14 | @click.option( 15 | "-s", 16 | "--audio", 17 | type=click.Path(exists=True, dir_okay=True), 18 | multiple=True, 19 | help="Paths to audio dirs, e.g., LDC2013S02. Multiple corpora can be provided by repeating `-s`.", 20 | ) 21 | @click.option( 22 | "-t", 23 | "--transcript", 24 | type=click.Path(exists=True, dir_okay=True), 25 | multiple=True, 26 | help="Paths to transcript dirs, e.g., LDC2013T17. Multiple corpora can be provided by repeating `-t`", 27 | ) 28 | @click.option( 29 | "--absolute-paths", 30 | type=bool, 31 | default=False, 32 | help="Use absolute paths for recordings", 33 | ) 34 | def gale_arabic( 35 | output_dir: Pathlike, 36 | audio: Optional[List[Pathlike]] = None, 37 | transcript: Optional[List[Pathlike]] = None, 38 | absolute_paths: Optional[bool] = False, 39 | ): 40 | """GALE Arabic Phases 1 to 4 Broadcast news and conversation data preparation.""" 41 | prepare_gale_arabic( 42 | audio, 43 | transcript, 44 | output_dir=output_dir, 45 | absolute_paths=absolute_paths, 46 | ) 47 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/gale_mandarin.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.gale_mandarin import prepare_gale_mandarin 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["gale_mandarin"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("output_dir", type=click.Path()) 14 | @click.option( 15 | "-s", 16 | "--audio", 17 | type=click.Path(exists=True, dir_okay=True), 18 | multiple=True, 19 | help="Paths to audio dirs, e.g., LDC2013S08. Multiple corpora can be provided by repeating `-s`.", 20 | ) 21 | @click.option( 22 | "-t", 23 | "--transcript", 24 | type=click.Path(exists=True, dir_okay=True), 25 | multiple=True, 26 | help="Paths to transcript dirs, e.g., LDC2013T20. Multiple corpora can be provided by repeating `-t`", 27 | ) 28 | @click.option( 29 | "--absolute-paths", 30 | type=bool, 31 | default=False, 32 | help="Use absolute paths for recordings", 33 | ) 34 | @click.option( 35 | "--segment-words", 36 | type=bool, 37 | default=False, 38 | help="Use 'jieba' package to perform word segmentation on the text", 39 | ) 40 | def gale_mandarin( 41 | output_dir: Pathlike, 42 | audio: Optional[List[Pathlike]] = None, 43 | transcript: Optional[List[Pathlike]] = None, 44 | absolute_paths: Optional[bool] = False, 45 | segment_words: Optional[bool] = False, 46 | ): 47 | """GALE Mandarin Broadcast speech data preparation.""" 48 | prepare_gale_mandarin( 49 | audio, 50 | transcript, 51 | output_dir=output_dir, 52 | absolute_paths=absolute_paths, 53 | segment_words=segment_words, 54 | ) 55 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/grid.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes import download_grid, prepare_grid 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command(context_settings=dict(show_default=True)) 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 10 | @click.argument("output_dir", type=click.Path()) 11 | @click.option( 12 | "--with-supervisions/--no-supervisions", 13 | default=True, 14 | help="Note: using supervisions might discard some recordings that do not have them.", 15 | ) 16 | @click.option("-j", "--jobs", default=1, type=int, help="The number of parallel jobs.") 17 | def grid( 18 | corpus_dir: Pathlike, 19 | output_dir: Pathlike, 20 | with_supervisions: bool, 21 | jobs: int, 22 | ): 23 | """Grid audio-visual speech corpus preparation.""" 24 | prepare_grid( 25 | corpus_dir, 26 | output_dir=output_dir, 27 | with_supervisions=with_supervisions, 28 | num_jobs=jobs, 29 | ) 30 | 31 | 32 | @download.command(context_settings=dict(show_default=True)) 33 | @click.argument("target_dir", type=click.Path()) 34 | def grid( 35 | target_dir: Pathlike, 36 | ): 37 | """Grid audio-visual speech corpus download.""" 38 | download_grid(target_dir) 39 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/heroico.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.heroico import download_heroico, prepare_heroico 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["heroico"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("speech_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("transcript_dir", type=click.Path(exists=True, dir_okay=True)) 13 | @click.argument("output_dir", type=click.Path()) 14 | def heroico(speech_dir: Pathlike, transcript_dir: Pathlike, output_dir: Pathlike): 15 | """heroico Answers ASR data preparation.""" 16 | prepare_heroico(speech_dir, transcript_dir, output_dir) 17 | 18 | 19 | @download.command(context_settings=dict(show_default=True)) 20 | @click.argument("target_dir", type=click.Path()) 21 | def heroico(target_dir: Pathlike): 22 | """heroico download.""" 23 | download_heroico(target_dir) 24 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/hifitts.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes import download_hifitts, prepare_hifitts 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command(context_settings=dict(show_default=True)) 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 10 | @click.argument("output_dir", type=click.Path()) 11 | @click.option( 12 | "-j", 13 | "--num-jobs", 14 | type=int, 15 | default=1, 16 | help="How many jobs to use (can give good speed-ups with slow disks).", 17 | ) 18 | def hifitts(corpus_dir: Pathlike, output_dir: Pathlike, num_jobs: int): 19 | """HiFiTTS data preparation.""" 20 | prepare_hifitts(corpus_dir, output_dir=output_dir, num_jobs=num_jobs) 21 | 22 | 23 | @download.command(context_settings=dict(show_default=True)) 24 | @click.argument("target_dir", type=click.Path()) 25 | def hifitts( 26 | target_dir: Pathlike, 27 | ): 28 | """HiFiTTS data download.""" 29 | download_hifitts(target_dir) 30 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/himia.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.himia import download_himia, prepare_himia 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["himia"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 14 | @click.argument("output_dir", type=click.Path()) 15 | @click.option( 16 | "-p", 17 | "--dataset-parts", 18 | type=str, 19 | default=["auto"], 20 | multiple=True, 21 | help="List of dataset parts to prepare. To prepare multiple parts, pass each with `-p` " 22 | "Example: `-p test -p cw_test` " 23 | "Prepare both HI_MIA and HI_MIA_CW by default " 24 | "All possible data parts are train, dev, test and cw_test", 25 | ) 26 | def himia( 27 | corpus_dir: Pathlike, 28 | output_dir: Pathlike, 29 | dataset_parts: Sequence[str], 30 | ): 31 | """HI_MIA and HI_MIA_CW data preparation.""" 32 | if len(dataset_parts) == 1: 33 | dataset_parts = dataset_parts[0] 34 | prepare_himia( 35 | corpus_dir=corpus_dir, 36 | output_dir=output_dir, 37 | dataset_parts=dataset_parts, 38 | ) 39 | 40 | 41 | @download.command(context_settings=dict(show_default=True)) 42 | @click.argument("target_dir", type=click.Path()) 43 | @click.option( 44 | "-p", 45 | "--dataset-parts", 46 | type=str, 47 | default=["auto"], 48 | multiple=True, 49 | help="List of dataset parts to download. To download multiple parts, pass each with `-p` " 50 | "Example: `-p test -p cw_test` " 51 | "Download both HI_MIA and HI_MIA_CW by default " 52 | "All possible data parts are train, dev, test and cw_test", 53 | ) 54 | def himia( 55 | target_dir: Pathlike, 56 | dataset_parts: Sequence[str], 57 | ): 58 | """HI-MIA and HI_MIA_CW download.""" 59 | if len(dataset_parts) == 1: 60 | dataset_parts = dataset_parts[0] 61 | download_himia(target_dir, dataset_parts=dataset_parts) 62 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/icmcasr.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional, Tuple, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.icmcasr import prepare_icmcasr 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "-j", 15 | "--num-jobs", 16 | type=int, 17 | default=1, 18 | help="How many threads to use (can give good speed-ups with slow disks).", 19 | ) 20 | @click.option( 21 | "--mic", 22 | type=click.Choice(["ihm", "sdm", "mdm"]), 23 | default="ihm", 24 | help="Microphone type.", 25 | ) 26 | def icmcasr( 27 | corpus_dir: Pathlike, 28 | output_dir: Optional[Pathlike] = None, 29 | mic: str = "ihm", 30 | num_jobs: int = 1, 31 | ): 32 | """ICMC-ASR data preparation.""" 33 | prepare_icmcasr( 34 | corpus_dir=corpus_dir, 35 | output_dir=output_dir, 36 | mic=mic, 37 | num_jobs=num_jobs, 38 | ) 39 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/iwslt22_ta.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.iwslt22_ta import prepare_iwslt22_ta 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("splits", type=click.Path(exists=True, dir_okay=True)) 13 | @click.argument("output_dir", type=click.Path()) 14 | @click.option( 15 | "-j", 16 | "--num-jobs", 17 | type=int, 18 | default=1, 19 | help="How many threads to use (can give good speed-ups with slow disks).", 20 | ) 21 | @click.option( 22 | "--normalize-text", 23 | default=False, 24 | help="Whether to perform additional text cleaning and normalization from https://aclanthology.org/2022.iwslt-1.29.pdf.", 25 | ) 26 | @click.option( 27 | "--langs", 28 | default="", 29 | help="Comma-separated list of language abbreviations for source and target languages", 30 | ) 31 | def iwslt22_ta( 32 | corpus_dir: Pathlike, 33 | splits: Pathlike, 34 | output_dir: Pathlike, 35 | normalize_text: bool, 36 | langs: str, 37 | num_jobs: int, 38 | ): 39 | """ 40 | IWSLT_2022 data preparation. 41 | \b 42 | This is conversational telephone speech collected as 8kHz-sampled data. 43 | The catalog number LDC2022E01 corresponds to the train, dev, and test1 44 | splits of the iwslt2022 shared task. 45 | To obtaining this data your institution needs to have an LDC subscription. 46 | You also should download the predined splits with 47 | git clone https://github.com/kevinduh/iwslt22-dialect.git 48 | """ 49 | langs_list = langs.split(",") 50 | prepare_iwslt22_ta( 51 | corpus_dir, 52 | splits, 53 | output_dir=output_dir, 54 | num_jobs=num_jobs, 55 | clean=normalize_text, 56 | langs=langs_list, 57 | ) 58 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/kespeech.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.kespeech import prepare_kespeech 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "-p", 15 | "--dataset-parts", 16 | type=str, 17 | default=["all"], 18 | multiple=True, 19 | help="List of dataset parts to prepare. To prepare multiple parts," 20 | "pass each with `-p` Example: `-p dev_phase1 -p dev_phase2`", 21 | ) 22 | @click.option( 23 | "-j", 24 | "--num-jobs", 25 | type=int, 26 | default=1, 27 | help="How many threads to use (can give good speed-ups with slow disks).", 28 | ) 29 | def kespeech( 30 | corpus_dir: Pathlike, 31 | output_dir: Pathlike, 32 | dataset_parts: Sequence[str], 33 | num_jobs: int, 34 | ): 35 | """ 36 | The KeSpeech corpus preparation. 37 | """ 38 | prepare_kespeech( 39 | corpus_dir, 40 | output_dir=output_dir, 41 | num_jobs=num_jobs, 42 | dataset_parts=dataset_parts, 43 | ) 44 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/ksponspeech.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.ksponspeech import prepare_ksponspeech 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["ksponspeech"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 14 | @click.argument("output_dir", type=click.Path()) 15 | @click.option( 16 | "-p", 17 | "--dataset-parts", 18 | type=str, 19 | default=["all"], 20 | multiple=True, 21 | help="List of dataset parts to prepare. To prepare multiple parts, pass each with `-p` " 22 | "Example: `-p train -p test`", 23 | ) 24 | @click.option( 25 | "-j", 26 | "--num-jobs", 27 | type=int, 28 | default=1, 29 | help="How many threads to use (can give good speed-ups with slow disks).", 30 | ) 31 | @click.option( 32 | "--normalize-text", 33 | type=click.Choice(["none", "default"], case_sensitive=False), 34 | default="default", 35 | help="Type of text normalization to apply.", 36 | ) 37 | def ksponspeech( 38 | corpus_dir: Pathlike, 39 | output_dir: Pathlike, 40 | dataset_parts: Sequence[str], 41 | num_jobs: int, 42 | normalize_text: str, 43 | ): 44 | """KsponSpeech ASR data preparation.""" 45 | if len(dataset_parts) == 1: 46 | dataset_parts = dataset_parts[0] 47 | prepare_ksponspeech( 48 | corpus_dir, 49 | output_dir=output_dir, 50 | num_jobs=num_jobs, 51 | dataset_parts=dataset_parts, 52 | normalize_text=normalize_text, 53 | ) 54 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/l2_arctic.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes.l2_arctic import prepare_l2_arctic 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["l2_arctic"] 8 | 9 | 10 | @prepare.command() 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | def l2_arctic(corpus_dir: Pathlike, output_dir: Pathlike): 14 | """L2 Arctic data preparation.""" 15 | prepare_l2_arctic(corpus_dir, output_dir=output_dir) 16 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/libricss.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.libricss import download_libricss, prepare_libricss 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command() 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 10 | @click.argument("output_dir", type=click.Path()) 11 | @click.option( 12 | "--type", 13 | type=click.Choice(["ihm", "ihm-mix", "sdm", "mdm"]), 14 | default="mdm", 15 | help="Type of the corpus to prepare", 16 | show_default=True, 17 | ) 18 | @click.option( 19 | "--segmented/--no-segmented", 20 | default=False, 21 | help="If True, the manifest will contain Cuts corresponding to 1-minute segments.", 22 | show_default=True, 23 | ) 24 | def libricss(corpus_dir: Pathlike, output_dir: Pathlike, type: str, segmented: bool): 25 | """ 26 | LibriCSS recording and supervision manifest preparation. 27 | """ 28 | prepare_libricss(corpus_dir, output_dir, type=type, segmented_cuts=segmented) 29 | 30 | 31 | @download.command() 32 | @click.argument("target_dir", type=click.Path()) 33 | @click.option("--force-download", is_flag=True, help="Force download") 34 | def libricss(target_dir: Pathlike, force_download: bool = False): 35 | """ 36 | Download LibriCSS dataset. 37 | """ 38 | download_libricss(target_dir, force_download) 39 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/librilight.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional, Tuple, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.librilight import prepare_librilight 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "-j", 15 | "--num-jobs", 16 | type=int, 17 | default=1, 18 | help="How many threads to use (can give good speed-ups with slow disks).", 19 | ) 20 | def librilight( 21 | corpus_dir: Pathlike, 22 | output_dir: Optional[Pathlike] = None, 23 | num_jobs: int = 1, 24 | ): 25 | """LibriLight data preparation.""" 26 | prepare_librilight( 27 | corpus_dir=corpus_dir, 28 | output_dir=output_dir, 29 | num_jobs=num_jobs, 30 | ) 31 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/librimix.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.librimix import download_librimix, prepare_librimix 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["librimix"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("librimix-csv", type=click.Path(exists=True, dir_okay=False)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "--sampling-rate", 15 | type=int, 16 | default=16000, 17 | help="Sampling rate to set in the RecordingSet manifest.", 18 | ) 19 | @click.option( 20 | "--min-segment-seconds", 21 | type=float, 22 | default=3.0, 23 | help="Remove segments shorter than MIN_SEGMENT_SECONDS.", 24 | ) 25 | @click.option( 26 | "--with-precomputed-mixtures/--no-precomputed-mixtures", 27 | type=bool, 28 | default=False, 29 | help="Optionally create an RecordingSet manifest including the precomputed LibriMix mixtures.", 30 | ) 31 | def librimix( 32 | librimix_csv: Pathlike, 33 | output_dir: Pathlike, 34 | sampling_rate: int, 35 | min_segment_seconds: float, 36 | with_precomputed_mixtures: bool, 37 | ): 38 | """LibrMix source separation data preparation.""" 39 | prepare_librimix( 40 | librimix_csv=librimix_csv, 41 | output_dir=output_dir, 42 | sampling_rate=sampling_rate, 43 | min_segment_seconds=min_segment_seconds, 44 | with_precomputed_mixtures=with_precomputed_mixtures, 45 | ) 46 | 47 | 48 | @download.command(context_settings=dict(show_default=True)) 49 | @click.argument("target_dir", type=click.Path()) 50 | def librimix(target_dir: Pathlike): 51 | """Mini LibriMix download.""" 52 | download_librimix(target_dir) 53 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/ljspeech.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.ljspeech import download_ljspeech, prepare_ljspeech 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["ljspeech"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | def ljspeech( 14 | corpus_dir: Pathlike, 15 | output_dir: Pathlike, 16 | ): 17 | """LJSpeech data preparation.""" 18 | prepare_ljspeech(corpus_dir, output_dir=output_dir) 19 | 20 | 21 | @download.command(context_settings=dict(show_default=True)) 22 | @click.argument("target_dir", type=click.Path(), default=".") 23 | def ljspeech(target_dir: Pathlike): 24 | """LJSpeech download.""" 25 | download_ljspeech(target_dir) 26 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/magicdata.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.magicdata import download_magicdata, prepare_magicdata 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["magicdata"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | def magicdata(corpus_dir: Pathlike, output_dir: Pathlike): 14 | """Magicdata ASR data preparation.""" 15 | prepare_magicdata(corpus_dir, output_dir=output_dir) 16 | 17 | 18 | @download.command(context_settings=dict(show_default=True)) 19 | @click.argument("target_dir", type=click.Path()) 20 | def magicdata(target_dir: Pathlike): 21 | """Magicdata download.""" 22 | download_magicdata(target_dir) 23 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/mdcc.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.mdcc import download_mdcc, prepare_mdcc 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "-p", 15 | "--dataset-parts", 16 | type=str, 17 | default=["all"], 18 | multiple=True, 19 | help="List of dataset parts to prepare. To prepare multiple parts, pass each with `-p` " 20 | "Example: `-p train -p valid`", 21 | ) 22 | def MDCC( 23 | corpus_dir: Pathlike, 24 | dataset_parts: Sequence[str], 25 | output_dir: Optional[Pathlike] = None, 26 | ): 27 | """MDCC data preparation.""" 28 | prepare_mdcc( 29 | corpus_dir=corpus_dir, 30 | dataset_parts=dataset_parts, 31 | output_dir=output_dir, 32 | ) 33 | 34 | 35 | @download.command(context_settings=dict(show_default=True)) 36 | @click.argument("target_dir", type=click.Path()) 37 | @click.option( 38 | "--force-download", 39 | is_flag=True, 40 | default=False, 41 | help="if True, it will download the MDCC data even if it is already present.", 42 | ) 43 | def MDCC( 44 | target_dir: Pathlike, 45 | force_download: Optional[bool] = False, 46 | ): 47 | """MDCC download.""" 48 | download_mdcc( 49 | target_dir=target_dir, 50 | force_download=force_download, 51 | ) 52 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/medical.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional, Tuple, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.medical import download_medical, prepare_medical 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "-j", 15 | "--num-jobs", 16 | type=int, 17 | default=1, 18 | help="How many threads to use (can give good speed-ups with slow disks).", 19 | ) 20 | def medical( 21 | corpus_dir: Pathlike, 22 | output_dir: Optional[Pathlike] = None, 23 | num_jobs: int = 1, 24 | ): 25 | """Medical data preparation.""" 26 | prepare_medical( 27 | corpus_dir=corpus_dir, 28 | output_dir=output_dir, 29 | num_jobs=num_jobs, 30 | ) 31 | 32 | 33 | @download.command(context_settings=dict(show_default=True)) 34 | @click.argument("target_dir", type=click.Path()) 35 | @click.option("--force-download", is_flag=True, default=False, help="Force download") 36 | def medical( 37 | target_dir: Pathlike, 38 | force_download: Optional[bool] = False, 39 | ): 40 | """Medical download.""" 41 | download_medical( 42 | target_dir=target_dir, 43 | force_download=force_download, 44 | ) 45 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/mgb2.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes.mgb2 import prepare_mgb2 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["mgb2"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "--text-cleaning/--no-text-cleaning", default=True, help="Basic text cleaning." 15 | ) 16 | @click.option( 17 | "--buck-walter/--no-buck-walter", 18 | default=False, 19 | help="Use BuckWalter transliteration.", 20 | ) 21 | @click.option( 22 | "-j", 23 | "--num-jobs", 24 | type=int, 25 | default=1, 26 | help="How many threads to use (can give good speed-ups with slow disks).", 27 | ) 28 | @click.option( 29 | "--mer-thresh", 30 | default=80, 31 | help="filter out segments based on mer (Match Error Rate).", 32 | ) 33 | def mgb2( 34 | corpus_dir: Pathlike, 35 | output_dir: Pathlike, 36 | text_cleaning: bool, 37 | buck_walter: bool, 38 | num_jobs: int, 39 | mer_thresh: int, 40 | ): 41 | """mgb2 ASR data preparation.""" 42 | prepare_mgb2( 43 | corpus_dir, 44 | output_dir, 45 | text_cleaning=text_cleaning, 46 | buck_walter=buck_walter, 47 | num_jobs=num_jobs, 48 | mer_thresh=mer_thresh, 49 | ) 50 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/mls.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes.mls import prepare_mls 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["mls"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "--opus/--flac", 15 | type=bool, 16 | default=True, 17 | help="Which codec should be used (OPUS or FLAC)", 18 | ) 19 | @click.option( 20 | "-j", 21 | "--num-jobs", 22 | type=int, 23 | default=1, 24 | help="How many threads to use (can give good speed-ups with slow disks).", 25 | ) 26 | def mls(corpus_dir: Pathlike, output_dir: Pathlike, opus: bool, num_jobs: int): 27 | """ 28 | Multilingual Librispeech (MLS) data preparation. 29 | 30 | Multilingual LibriSpeech (MLS) dataset is a large multilingual corpus suitable for speech research. 31 | The dataset is derived from read audiobooks from LibriVox and consists of 8 languages - 32 | English, German, Dutch, Spanish, French, Italian, Portuguese, Polish. 33 | It is available at OpenSLR: http://openslr.org/94 34 | """ 35 | prepare_mls(corpus_dir, opus=opus, output_dir=output_dir, num_jobs=num_jobs) 36 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/mtedx.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.mtedx import download_mtedx, prepare_mtedx 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["mtedx"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 14 | @click.argument("output_dir", type=click.Path()) 15 | @click.option( 16 | "-j", 17 | "--num-jobs", 18 | type=int, 19 | default=1, 20 | help="How many threads to use (can give good speed-ups with slow disks).", 21 | ) 22 | @click.option( 23 | "-l", 24 | "--lang", 25 | multiple=True, 26 | default=["all"], 27 | help="Specify which languages to prepare, e.g., " 28 | " lhoste prepare librispeech mtedx_corpus data -l de -l fr -l es ", 29 | ) 30 | def mtedx( 31 | corpus_dir: Pathlike, 32 | output_dir: Pathlike, 33 | num_jobs: int, 34 | lang: Optional[Union[str, Sequence[str]]], 35 | ): 36 | """MTEDx ASR data preparation.""" 37 | prepare_mtedx(corpus_dir, output_dir=output_dir, num_jobs=num_jobs, languages=lang) 38 | 39 | 40 | @download.command(context_settings=dict(show_default=True)) 41 | @click.argument("target_dir", type=click.Path()) 42 | @click.option( 43 | "-l", 44 | "--lang", 45 | multiple=True, 46 | default=["all"], 47 | help="Specify which languages to download, e.g., " 48 | " lhoste download mtedx . -l de -l fr -l es " 49 | " lhoste download mtedx", 50 | ) 51 | def mtedx( 52 | target_dir: Pathlike, 53 | lang: Optional[Union[str, Sequence[str]]], 54 | ): 55 | """MTEDx download.""" 56 | download_mtedx(target_dir, languages=lang) 57 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/musan.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.musan import download_musan, prepare_musan 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["musan"] 8 | 9 | 10 | @prepare.command() 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "--use-vocals/--no-vocals", 15 | default=True, 16 | help='Whether to include vocal music in "music" part.', 17 | ) 18 | def musan(corpus_dir: Pathlike, output_dir: Pathlike, use_vocals: bool): 19 | """MUSAN data preparation.""" 20 | prepare_musan(corpus_dir, output_dir=output_dir, use_vocals=use_vocals) 21 | 22 | 23 | @download.command() 24 | @click.argument("target_dir", type=click.Path()) 25 | def musan(target_dir: Pathlike): 26 | """MUSAN download.""" 27 | download_musan(target_dir) 28 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/must_c.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.must_c import prepare_must_c 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option("--tgt-lang", type=str, help="The target language, e.g., zh, de, fr.") 14 | @click.option( 15 | "-j", 16 | "--num-jobs", 17 | type=int, 18 | default=1, 19 | help="How many threads to use (can give good speed-ups with slow disks).", 20 | ) 21 | def must_c(corpus_dir: Pathlike, output_dir: Pathlike, tgt_lang, num_jobs: int): 22 | """MUST-C speech translation data preparation.""" 23 | logging.basicConfig(level=logging.INFO) 24 | prepare_must_c( 25 | corpus_dir, 26 | output_dir=output_dir, 27 | tgt_lang=tgt_lang, 28 | num_jobs=num_jobs, 29 | ) 30 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/nsc.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes.nsc import NSC_PARTS, prepare_nsc 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command(context_settings=dict(show_default=True)) 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 10 | @click.argument("output_dir", type=click.Path()) 11 | @click.option( 12 | "-p", 13 | "--dataset-part", 14 | type=click.Choice(NSC_PARTS), 15 | default="PART3_SameCloseMic", 16 | help="Which part of NSC should be prepared", 17 | ) 18 | @click.option( 19 | "-j", 20 | "--num-jobs", 21 | type=int, 22 | default=1, 23 | help="How many threads to use (can give good speed-ups with slow disks).", 24 | ) 25 | def nsc(corpus_dir: Pathlike, output_dir: Pathlike, dataset_part: str, num_jobs: int): 26 | """ 27 | \b 28 | This is a data preparation recipe for the National Corpus of Speech in Singaporean English. 29 | CORPUS_DIR: root directory that contains all NSC shared folder. Eg. 30 | ├── IMDA - National Speech Corpus 31 | │ ├── LEXICON 32 | │ ├── PART1 33 | │ ├── PART2 34 | │ └── PART3 35 | ├── IMDA - National Speech Corpus - Additional 36 | │ └── IMDA - National Speech Corpus (Additional) 37 | │ ├── PART4 38 | │ ├── PART5 39 | │ └── PART6 40 | """ 41 | prepare_nsc( 42 | corpus_dir, dataset_part=dataset_part, output_dir=output_dir, num_jobs=num_jobs 43 | ) 44 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/peoples_speech.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes.peoples_speech import prepare_peoples_speech 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command(context_settings=dict(show_default=True)) 9 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 10 | @click.argument("output_dir", type=click.Path()) 11 | @click.option( 12 | "-j", 13 | "--num-jobs", 14 | type=int, 15 | default=1, 16 | help="How many threads to use (can give good speed-ups with slow disks).", 17 | ) 18 | def peoples_speech( 19 | corpus_dir: Pathlike, 20 | output_dir: Pathlike, 21 | num_jobs: int = 1, 22 | ): 23 | """Prepare The People's Speech corpus manifests.""" 24 | prepare_peoples_speech( 25 | corpus_dir=corpus_dir, 26 | output_dir=output_dir, 27 | num_jobs=num_jobs, 28 | ) 29 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/primewords.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.primewords import download_primewords, prepare_primewords 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["primewords"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | def primewords(corpus_dir: Pathlike, output_dir: Pathlike): 14 | """Primewords ASR data preparation.""" 15 | prepare_primewords(corpus_dir, output_dir=output_dir) 16 | 17 | 18 | @download.command(context_settings=dict(show_default=True)) 19 | @click.argument("target_dir", type=click.Path()) 20 | def primewords(target_dir: Pathlike): 21 | """Primewords download.""" 22 | download_primewords(target_dir) 23 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/radio.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Sequence, Tuple, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.radio import prepare_radio 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["radio"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("corpus_dir", type=click.Path(dir_okay=True)) 14 | @click.argument("output_dir", type=click.Path(dir_okay=True)) 15 | @click.option( 16 | "-d", 17 | "--min-seg-dur", 18 | type=float, 19 | default=0.5, 20 | help="The minimum segment duration", 21 | ) 22 | @click.option( 23 | "-j", 24 | "--num-jobs", 25 | type=int, 26 | default=4, 27 | help="The number of parallel threads to use for data preparation", 28 | ) 29 | def radio( 30 | corpus_dir: Pathlike, 31 | output_dir: Pathlike, 32 | min_seg_dur: float = 0.5, 33 | num_jobs: int = 4, 34 | ): 35 | """Data preparation""" 36 | prepare_radio( 37 | corpus_dir, 38 | output_dir=output_dir, 39 | num_jobs=num_jobs, 40 | min_segment_duration=min_seg_dur, 41 | ) 42 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/reazonspeech.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List 3 | 4 | import click 5 | 6 | from lhotse.bin.modes import download, prepare 7 | from lhotse.recipes.reazonspeech import ( 8 | REAZONSPEECH, 9 | download_reazonspeech, 10 | prepare_reazonspeech, 11 | ) 12 | from lhotse.utils import Pathlike 13 | 14 | __all__ = ["reazonspeech"] 15 | 16 | 17 | @prepare.command(context_settings=dict(show_default=True)) 18 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 19 | @click.argument("output_dir", type=click.Path()) 20 | @click.option( 21 | "-j", 22 | "--num-jobs", 23 | type=int, 24 | default=1, 25 | help="How many threads to use (can give good speed-ups with slow disks).", 26 | ) 27 | def reazonspeech( 28 | corpus_dir: Pathlike, 29 | output_dir: Pathlike, 30 | num_jobs: int, 31 | ): 32 | """ReazonSpeech ASR data preparation.""" 33 | logging.basicConfig(level=logging.INFO) 34 | prepare_reazonspeech(corpus_dir, output_dir=output_dir, num_jobs=num_jobs) 35 | 36 | 37 | @download.command(context_settings=dict(show_default=True)) 38 | @click.argument("target_dir", type=click.Path()) 39 | @click.option( 40 | "--subset", 41 | type=click.Choice(("auto",) + REAZONSPEECH), 42 | multiple=True, 43 | default=["auto"], 44 | help="List of dataset parts to prepare (default: small-v1). To prepare multiple parts, pass each with `--subset` " 45 | "Example: `--subset all", 46 | ) 47 | @click.option( 48 | "-j", 49 | "--num-jobs", 50 | type=int, 51 | default=1, 52 | help="How many threads to use (can give good speed-ups with slow disks).", 53 | ) 54 | def reazonspeech(target_dir: Pathlike, subset: List[str], num_jobs: int): 55 | """ReazonSpeech download.""" 56 | logging.basicConfig(level=logging.INFO) 57 | if "auto" in subset: 58 | subset = "auto" 59 | download_reazonspeech(target_dir, dataset_parts=subset, num_jobs=num_jobs) 60 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/rir_noise.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes import download_rir_noise, prepare_rir_noise 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["rir_noise"] 10 | 11 | 12 | @prepare.command() 13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 14 | @click.argument("output_dir", type=click.Path()) 15 | @click.option( 16 | "--parts", 17 | "-p", 18 | type=str, 19 | multiple=True, 20 | default=["point_noise", "iso_noise", "real_rir", "sim_rir"], 21 | show_default=True, 22 | help="Parts to prepare.", 23 | ) 24 | def rir_noise( 25 | corpus_dir: Pathlike, output_dir: Pathlike, parts: Union[str, Sequence[str]] 26 | ): 27 | """RIRS and noises data preparation.""" 28 | prepare_rir_noise(corpus_dir, output_dir=output_dir, parts=parts) 29 | 30 | 31 | @download.command() 32 | @click.argument("target_dir", type=click.Path()) 33 | def rir_noise(target_dir: Pathlike): 34 | """RIRS and noises download.""" 35 | download_rir_noise(target_dir) 36 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/sbcsae.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.sbcsae import download_sbcsae, prepare_sbcsae 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["sbcsae"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 14 | @click.argument("output_dir", type=click.Path()) 15 | @click.option( 16 | "--geolocation", 17 | type=bool, 18 | is_flag=True, 19 | default=False, 20 | help="Include geographic coordinates of speakers' hometowns in the manifests.", 21 | ) 22 | @click.option( 23 | "--omit-realignments", 24 | type=bool, 25 | is_flag=True, 26 | default=False, 27 | help="Only output the original corpus segmentation without boundary improvements.", 28 | ) 29 | def sbcsae( 30 | corpus_dir: Pathlike, 31 | output_dir: Pathlike, 32 | geolocation: bool, 33 | omit_realignments: bool, 34 | ): 35 | """SBCSAE data preparation.""" 36 | prepare_sbcsae( 37 | corpus_dir, 38 | output_dir=output_dir, 39 | geolocation=geolocation, 40 | omit_realignments=omit_realignments, 41 | ) 42 | 43 | 44 | @download.command(context_settings=dict(show_default=True)) 45 | @click.argument("target_dir", type=click.Path()) 46 | @click.option( 47 | "--force-download", 48 | type=bool, 49 | is_flag=True, 50 | default=False, 51 | help="Force download.", 52 | ) 53 | def sbcsae( 54 | target_dir: Pathlike, 55 | force_download: bool, 56 | ): 57 | """SBCSAE download.""" 58 | download_sbcsae(target_dir, force_download=force_download) 59 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/slu.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Sequence, Tuple, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.slu import prepare_slu 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path()) 12 | @click.argument("output_dir", type=click.Path()) 13 | def slu( 14 | corpus_dir: Pathlike, 15 | output_dir: Pathlike, 16 | ): 17 | prepare_slu(corpus_dir=corpus_dir, output_dir=output_dir) 18 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/speechcommands.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional, Tuple, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.speechcommands import ( 7 | download_speechcommands, 8 | prepare_speechcommands, 9 | ) 10 | from lhotse.utils import Pathlike 11 | 12 | 13 | @prepare.command(context_settings=dict(show_default=True)) 14 | @click.argument("speechcommands_version", type=str) 15 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 16 | @click.argument("output_dir", type=click.Path()) 17 | def speechcommands( 18 | speechcommands_version: str, 19 | corpus_dir: Pathlike, 20 | output_dir: Optional[Pathlike] = None, 21 | ): 22 | """Speech Commands v0.01 or v0.02 data preparation.""" 23 | prepare_speechcommands( 24 | speechcommands_version=speechcommands_version, 25 | corpus_dir=corpus_dir, 26 | output_dir=output_dir, 27 | ) 28 | 29 | 30 | @download.command(context_settings=dict(show_default=True)) 31 | @click.argument("speechcommands_version", type=str) 32 | @click.argument("target_dir", type=click.Path()) 33 | @click.option("--force-download", is_flag=True, default=False, help="Force download") 34 | def speechcommands( 35 | speechcommands_version: str, 36 | target_dir: Pathlike, 37 | force_download: Optional[bool] = False, 38 | ): 39 | """Speech Commands v0.01 or v0.02 download.""" 40 | download_speechcommands( 41 | speechcommands_version=speechcommands_version, 42 | target_dir=target_dir, 43 | force_download=force_download, 44 | ) 45 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/speechio.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional, Tuple, Union 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.speechio import prepare_speechio 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | def speechio( 14 | corpus_dir: Pathlike, 15 | output_dir: Optional[Pathlike] = None, 16 | num_jobs: int = 1, 17 | ): 18 | """SpeechIO data preparation. See https://github.com/SpeechColab/Leaderboard""" 19 | prepare_speechio( 20 | corpus_dir=corpus_dir, 21 | output_dir=output_dir, 22 | ) 23 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/spgispeech.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.spgispeech import download_spgispeech, prepare_spgispeech 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["spgispeech"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "-j", 15 | "--num-jobs", 16 | type=int, 17 | default=1, 18 | help="How many threads to use (can give good speed-ups with slow disks).", 19 | ) 20 | @click.option( 21 | "--normalize-text/--no-normalize-text", default=True, help="Normalize the text." 22 | ) 23 | def spgispeech( 24 | corpus_dir: Pathlike, 25 | output_dir: Pathlike, 26 | num_jobs: int, 27 | normalize_text: bool, 28 | ): 29 | """SPGISpeech ASR data preparation.""" 30 | prepare_spgispeech( 31 | corpus_dir, 32 | output_dir, 33 | num_jobs=num_jobs, 34 | normalize_text=normalize_text, 35 | ) 36 | 37 | 38 | @download.command(context_settings=dict(show_default=True)) 39 | @click.argument("target_dir", type=click.Path()) 40 | def spgispeech(target_dir: Pathlike): 41 | """SPGISpeech download.""" 42 | download_spgispeech(target_dir) 43 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/stcmds.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.stcmds import download_stcmds, prepare_stcmds 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["stcmds"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | def stcmds(corpus_dir: Pathlike, output_dir: Pathlike): 14 | """Stcmds ASR data preparation.""" 15 | prepare_stcmds(corpus_dir, output_dir=output_dir) 16 | 17 | 18 | @download.command(context_settings=dict(show_default=True)) 19 | @click.argument("target_dir", type=click.Path()) 20 | def stcmds(target_dir: Pathlike): 21 | """Stcmds download.""" 22 | download_stcmds(target_dir) 23 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/switchboard.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes import prepare_switchboard 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @prepare.command(context_settings=dict(show_default=True)) 9 | @click.argument("audio-dir", type=click.Path(exists=True, file_okay=False)) 10 | @click.argument("output-dir", type=click.Path()) 11 | @click.option("--transcript-dir", type=click.Path(exists=True, file_okay=False)) 12 | @click.option( 13 | "--sentiment-dir", 14 | type=click.Path(exists=True, file_okay=False), 15 | help="Optional path to LDC2020T14 package with sentiment annotations for SWBD.", 16 | ) 17 | @click.option( 18 | "--omit-silence/--retain-silence", 19 | default=True, 20 | help="Should the [silence] segments be kept.", 21 | ) 22 | @click.option( 23 | "--absolute-paths", 24 | default=False, 25 | help="Whether to return absolute or relative (to the corpus dir) paths for recordings.", 26 | ) 27 | def switchboard( 28 | audio_dir: Pathlike, 29 | output_dir: Pathlike, 30 | transcript_dir: Pathlike, 31 | sentiment_dir: Pathlike, 32 | omit_silence: bool, 33 | absolute_paths: bool, 34 | ): 35 | """ 36 | The Switchboard corpus preparation. 37 | 38 | \b 39 | This is conversational telephone speech collected as 2-channel, 8kHz-sampled 40 | data. We are using just the Switchboard-1 Phase 1 training data. 41 | The catalog number LDC97S62 (Switchboard-1 Release 2) corresponds, we believe, 42 | to what we have. We also use the Mississippi State transcriptions, which 43 | we download separately from 44 | http://www.isip.piconepress.com/projects/switchboard/releases/switchboard_word_alignments.tar.gz 45 | 46 | This data is not available for free - your institution needs to have an LDC subscription. 47 | """ 48 | prepare_switchboard( 49 | audio_dir=audio_dir, 50 | transcripts_dir=transcript_dir, 51 | sentiment_dir=sentiment_dir, 52 | output_dir=output_dir, 53 | omit_silence=omit_silence, 54 | absolute_paths=absolute_paths, 55 | ) 56 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/tal_asr.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes.tal_asr import prepare_tal_asr 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["tal_asr"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | def tal_asr(corpus_dir: Pathlike, output_dir: Pathlike): 14 | """Tal_asr ASR data preparation.""" 15 | prepare_tal_asr(corpus_dir, output_dir=output_dir) 16 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/tal_csasr.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import prepare 4 | from lhotse.recipes.tal_csasr import prepare_tal_csasr 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["tal_csasr"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "-j", 15 | "--num-jobs", 16 | type=int, 17 | default=1, 18 | help="How many threads to use (can give good speed-ups with slow disks).", 19 | ) 20 | def tal_csasr(corpus_dir: Pathlike, output_dir: Pathlike, num_jobs: int): 21 | """Tal_csasr ASR data preparation.""" 22 | prepare_tal_csasr(corpus_dir, output_dir=output_dir, num_jobs=num_jobs) 23 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/tedlium.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.tedlium import TEDLIUM_PARTS, download_tedlium, prepare_tedlium 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command() 11 | @click.argument( 12 | "tedlium_dir", type=click.Path(exists=True, dir_okay=True, file_okay=False) 13 | ) 14 | @click.argument("output_dir", type=click.Path()) 15 | @click.option( 16 | "--parts", 17 | "-p", 18 | type=click.Choice(TEDLIUM_PARTS), 19 | multiple=True, 20 | default=list(TEDLIUM_PARTS), 21 | help="Which parts of TED-LIUM v3 to prepare (by default all).", 22 | ) 23 | @click.option( 24 | "-j", 25 | "--num-jobs", 26 | type=int, 27 | default=1, 28 | help="How many threads to use (can give good speed-ups with slow disks).", 29 | ) 30 | @click.option( 31 | "--normalize-text", 32 | type=click.Choice(["none", "upper", "kaldi"], case_sensitive=False), 33 | default="none", 34 | help="Type of text normalization to apply (no normalization, by default). " 35 | "Selecting `kaldi` will remove tokens and join suffixes.", 36 | ) 37 | def tedlium( 38 | tedlium_dir: Pathlike, 39 | output_dir: Pathlike, 40 | parts: List[str], 41 | num_jobs: int, 42 | normalize_text: str, 43 | ): 44 | """ 45 | TED-LIUM v3 recording and supervision manifest preparation. 46 | """ 47 | prepare_tedlium( 48 | tedlium_root=tedlium_dir, 49 | output_dir=output_dir, 50 | dataset_parts=parts, 51 | num_jobs=num_jobs, 52 | normalize_text=normalize_text, 53 | ) 54 | 55 | 56 | @download.command() 57 | @click.argument("target_dir", type=click.Path()) 58 | def tedlium(target_dir: Pathlike): 59 | """TED-LIUM v3 download (approx. 11GB).""" 60 | download_tedlium(target_dir) 61 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/tedlium2.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import download, prepare 6 | from lhotse.recipes.tedlium2 import TEDLIUM_PARTS, download_tedlium2, prepare_tedlium2 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command() 11 | @click.argument( 12 | "tedlium_dir", type=click.Path(exists=True, dir_okay=True, file_okay=False) 13 | ) 14 | @click.argument("output_dir", type=click.Path()) 15 | @click.option( 16 | "--parts", 17 | "-p", 18 | type=click.Choice(TEDLIUM_PARTS), 19 | multiple=True, 20 | default=list(TEDLIUM_PARTS), 21 | help=f"Which parts of TED-LIUM v2 to prepare (by default all, i.e., {TEDLIUM_PARTS}).", 22 | ) 23 | @click.option( 24 | "-j", 25 | "--num-jobs", 26 | type=int, 27 | default=1, 28 | help="How many threads to use (can give good speed-ups with slow disks).", 29 | ) 30 | @click.option( 31 | "--normalize-text", 32 | type=click.Choice(["none", "upper", "kaldi"], case_sensitive=False), 33 | default="none", 34 | help="Type of text normalization to apply (no normalization, by default). " 35 | "Selecting `kaldi` will remove tokens and join suffixes.", 36 | ) 37 | def tedlium2( 38 | tedlium_dir: Pathlike, 39 | output_dir: Pathlike, 40 | parts: List[str], 41 | num_jobs: int, 42 | normalize_text: str, 43 | ): 44 | """ 45 | TED-LIUM v2 recording and supervision manifest preparation. 46 | """ 47 | prepare_tedlium2( 48 | tedlium_root=tedlium_dir, 49 | output_dir=output_dir, 50 | dataset_parts=parts, 51 | num_jobs=num_jobs, 52 | normalize_text=normalize_text, 53 | ) 54 | 55 | 56 | @download.command() 57 | @click.argument("target_dir", type=click.Path()) 58 | def tedlium(target_dir: Pathlike): 59 | """TED-LIUM v2 download (approx. 35GB).""" 60 | download_tedlium2(target_dir) 61 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/thchs_30.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.thchs_30 import download_thchs_30, prepare_thchs_30 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["thchs_30"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | def thchs_30(corpus_dir: Pathlike, output_dir: Pathlike): 14 | """thchs_30 ASR data preparation.""" 15 | prepare_thchs_30(corpus_dir, output_dir=output_dir) 16 | 17 | 18 | @download.command(context_settings=dict(show_default=True)) 19 | @click.argument("target_dir", type=click.Path()) 20 | def thchs_30(target_dir: Pathlike): 21 | """thchs_30 download.""" 22 | download_thchs_30(target_dir) 23 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/this_american_life.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.this_american_life import ( 5 | download_this_american_life, 6 | prepare_this_american_life, 7 | ) 8 | from lhotse.utils import Pathlike 9 | 10 | __all__ = ["this_american_life"] 11 | 12 | 13 | @download.command(context_settings=dict(show_default=True)) 14 | @click.argument("target_dir", type=click.Path()) 15 | @click.option( 16 | "-f", 17 | "--force-download", 18 | is_flag=True, 19 | default=False, 20 | ) 21 | def this_american_life(target_dir: Pathlike, force_download: bool = False): 22 | """This American Life dataset download.""" 23 | download_this_american_life(target_dir) 24 | 25 | 26 | @prepare.command(context_settings=dict(show_default=True)) 27 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 28 | @click.argument("output_dir", type=click.Path()) 29 | def this_american_life(corpus_dir: Pathlike, output_dir: Pathlike): 30 | """This American Life data preparation.""" 31 | prepare_this_american_life(corpus_dir, output_dir=output_dir) 32 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/timit.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.timit import download_timit, prepare_timit 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["timit"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "-p", 15 | "--num-phones", 16 | type=int, 17 | default=48, 18 | help="The number of phones (60, 48 or 39) for modeling. " 19 | "And 48 is regarded as the default value.", 20 | ) 21 | @click.option( 22 | "-j", 23 | "--num-jobs", 24 | type=int, 25 | default=1, 26 | help="How many threads to use (can give good speed-ups with slow disks).", 27 | ) 28 | def timit( 29 | corpus_dir: Pathlike, 30 | output_dir: Pathlike, 31 | num_phones: int, 32 | num_jobs: int = 1, 33 | ): 34 | """TIMIT data preparation. 35 | :param corpus_dir: Pathlike, the path of the data dir. 36 | :param output_dir: Pathlike, the path where to write and save the manifests. 37 | """ 38 | prepare_timit( 39 | corpus_dir, 40 | output_dir=output_dir, 41 | num_phones=num_phones, 42 | num_jobs=num_jobs, 43 | ) 44 | 45 | 46 | @download.command(context_settings=dict(show_default=True)) 47 | @click.argument("target_dir", type=click.Path()) 48 | def timit(target_dir: Pathlike): 49 | """TIMIT download.""" 50 | download_timit(target_dir) 51 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/uwb_atcc.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.uwb_atcc import download_uwb_atcc, prepare_uwb_atcc 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["uwb_atcc"] 8 | 9 | 10 | @download.command(context_settings=dict(show_default=True)) 11 | @click.argument("target_dir", type=click.Path()) 12 | def uwb_atcc(target_dir: Pathlike): 13 | """UWB-ATCC download.""" 14 | download_uwb_atcc(target_dir) 15 | 16 | 17 | @prepare.command(context_settings=dict(show_default=True)) 18 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 19 | @click.argument("output_dir", type=click.Path()) 20 | @click.option("--silence-sym", type=str, default="") 21 | @click.option("--breath-sym", type=str, default="") 22 | @click.option("--noise-sym", type=str, default="") 23 | @click.option("--foreign-sym", type=str, default="") 24 | @click.option("--partial-sym", type=str, default="") 25 | @click.option("--unintelligble-sym", type=str, default="") 26 | @click.option("--unknown-sym", type=str, default="") 27 | def uwb_atcc( 28 | corpus_dir: Pathlike, 29 | output_dir: Pathlike, 30 | silence_sym: str, 31 | breath_sym: str, 32 | noise_sym: str, 33 | foreign_sym: str, 34 | partial_sym: str, 35 | unintelligble_sym: str, 36 | unknown_sym: str, 37 | ): 38 | """UWB-ATCC data preparation.""" 39 | prepare_uwb_atcc( 40 | corpus_dir, 41 | output_dir=output_dir, 42 | silence_sym=silence_sym, 43 | breath_sym=breath_sym, 44 | noise_sym=noise_sym, 45 | foreign_sym=foreign_sym, 46 | partial_sym=partial_sym, 47 | unintelligble_sym=unintelligble_sym, 48 | unknown_sym=unknown_sym, 49 | ) 50 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/vctk.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes import download_vctk, prepare_vctk 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["vctk"] 8 | 9 | 10 | @prepare.command() 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option("--use-edinburgh-vctk-url", default=False) 14 | def vctk(corpus_dir: Pathlike, output_dir: Pathlike, use_edinburgh_vctk_url: bool): 15 | """VCTK data preparation.""" 16 | prepare_vctk( 17 | corpus_dir, output_dir=output_dir, use_edinburgh_vctk_url=use_edinburgh_vctk_url 18 | ) 19 | 20 | 21 | @download.command() 22 | @click.argument("target_dir", type=click.Path()) 23 | @click.option("--use-edinburgh-vctk-url", default=False) 24 | def vctk(target_dir: Pathlike, use_edinburgh_vctk_url: bool): 25 | """VCTK download.""" 26 | download_vctk(target_dir, use_edinburgh_vctk_url=use_edinburgh_vctk_url) 27 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/voxconverse.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.voxconverse import download_voxconverse, prepare_voxconverse 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @download.command(context_settings=dict(show_default=True)) 9 | @click.argument("target_dir", type=click.Path()) 10 | @click.option("--force-download", is_flag=True, default=False, help="Force download") 11 | def voxconverse(target_dir: Pathlike, force_download=False): 12 | """VoxConverse dataset download.""" 13 | download_voxconverse(target_dir, force_download=force_download) 14 | 15 | 16 | @prepare.command(context_settings=dict(show_default=True)) 17 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 18 | @click.argument("output_dir", type=click.Path()) 19 | @click.option( 20 | "--split-test", 21 | is_flag=True, 22 | default=False, 23 | help="Split test part into dev and test parts", 24 | ) 25 | def voxconverse(corpus_dir: Pathlike, output_dir: Pathlike, split_test: bool = False): 26 | """VoxConverse data preparation.""" 27 | prepare_voxconverse(corpus_dir, output_dir=output_dir, split_test=split_test) 28 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/wenet_speech.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes.wenet_speech import prepare_wenet_speech 7 | from lhotse.utils import Pathlike 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | @click.option( 14 | "-p", 15 | "--dataset-parts", 16 | type=str, 17 | default=["all"], 18 | multiple=True, 19 | help="List of dataset parts to prepare. To prepare multiple parts," 20 | "pass each with `-p` Example: `-p M -p TEST_NET`", 21 | ) 22 | @click.option( 23 | "-j", 24 | "--num-jobs", 25 | type=int, 26 | default=1, 27 | help="How many threads to use (can give good speed-ups with slow disks).", 28 | ) 29 | def wenet_speech( 30 | corpus_dir: Pathlike, 31 | output_dir: Pathlike, 32 | dataset_parts: Sequence[str], 33 | num_jobs: int, 34 | ): 35 | """ 36 | The WenetSpeech corpus preparation. 37 | """ 38 | prepare_wenet_speech( 39 | corpus_dir, 40 | output_dir=output_dir, 41 | num_jobs=num_jobs, 42 | dataset_parts=dataset_parts, 43 | ) 44 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/wenetspeech4tts.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | import click 4 | 5 | from lhotse.bin.modes import prepare 6 | from lhotse.recipes import prepare_wenetspeech4tts 7 | from lhotse.utils import Pathlike 8 | 9 | __all__ = ["wenetspeech4tts"] 10 | 11 | 12 | @prepare.command(context_settings=dict(show_default=True)) 13 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 14 | @click.argument("output_dir", type=click.Path()) 15 | @click.option( 16 | "-j", 17 | "--num-jobs", 18 | type=int, 19 | default=1, 20 | help="How many jobs to use (can give good speed-ups with slow disks).", 21 | ) 22 | @click.option( 23 | "-p", 24 | "--dataset-parts", 25 | type=str, 26 | default=["all"], 27 | multiple=True, 28 | help="List of dataset parts to prepare. To prepare multiple parts, pass each with `-p` " 29 | "Example: `-p Basic -p Premium`", 30 | ) 31 | def wenetspeech4tts( 32 | corpus_dir: Pathlike, 33 | output_dir: Pathlike, 34 | dataset_parts: Sequence[str], 35 | num_jobs: int, 36 | ): 37 | """WenetSpeech4TTS data preparation.""" 38 | prepare_wenetspeech4tts( 39 | corpus_dir, 40 | output_dir=output_dir, 41 | num_jobs=num_jobs, 42 | dataset_parts=dataset_parts, 43 | ) 44 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/xbmu_amdo31.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.xbmu_amdo31 import download_xbmu_amdo31, prepare_xbmu_amdo31 5 | from lhotse.utils import Pathlike 6 | 7 | __all__ = ["xbmu_amdo31"] 8 | 9 | 10 | @prepare.command(context_settings=dict(show_default=True)) 11 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 12 | @click.argument("output_dir", type=click.Path()) 13 | def xbmu_amdo31(corpus_dir: Pathlike, output_dir: Pathlike): 14 | """XBMU-AMDO31 ASR data preparation.""" 15 | prepare_xbmu_amdo31(corpus_dir, output_dir=output_dir) 16 | 17 | 18 | @download.command(context_settings=dict(show_default=True)) 19 | @click.argument("target_dir", type=click.Path()) 20 | def xbmu_amdo31(target_dir: Pathlike): 21 | """XBMU-AMDO31 download.""" 22 | download_xbmu_amdo31(target_dir) 23 | -------------------------------------------------------------------------------- /lhotse/bin/modes/recipes/yesno.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from lhotse.bin.modes import download, prepare 4 | from lhotse.recipes.yesno import download_yesno, prepare_yesno 5 | from lhotse.utils import Pathlike 6 | 7 | 8 | @download.command(context_settings=dict(show_default=True)) 9 | @click.argument("target_dir", type=click.Path()) 10 | def yesno(target_dir: Pathlike): 11 | """yes_no dataset download.""" 12 | download_yesno(target_dir) 13 | 14 | 15 | @prepare.command(context_settings=dict(show_default=True)) 16 | @click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True)) 17 | @click.argument("output_dir", type=click.Path()) 18 | def yesno(corpus_dir: Pathlike, output_dir: Pathlike): 19 | """yes_no data preparation.""" 20 | prepare_yesno(corpus_dir, output_dir=output_dir) 21 | -------------------------------------------------------------------------------- /lhotse/bin/modes/utils.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from .cli_base import cli 4 | 5 | 6 | @cli.command() 7 | def list_audio_backends(): 8 | """ 9 | List the names of all available audio backends. 10 | """ 11 | from lhotse import available_audio_backends 12 | 13 | click.echo(available_audio_backends()) 14 | -------------------------------------------------------------------------------- /lhotse/cut/text.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Any, Dict, Optional, Union 3 | 4 | import numpy as np 5 | 6 | from lhotse.custom import CustomFieldMixin 7 | 8 | 9 | @dataclass 10 | class TextExample(CustomFieldMixin): 11 | """ 12 | Represents a single text example. Useful e.g. for language modeling. 13 | """ 14 | 15 | text: str 16 | tokens: Optional[np.ndarray] = None 17 | custom: Optional[Dict[str, Any]] = None 18 | 19 | @property 20 | def num_tokens(self) -> Optional[int]: 21 | if self.tokens is None: 22 | return None 23 | return len(self.tokens) 24 | 25 | 26 | @dataclass 27 | class TextPairExample(CustomFieldMixin): 28 | """ 29 | Represents a pair of text examples. Useful e.g. for sequence-to-sequence tasks. 30 | """ 31 | 32 | source: TextExample 33 | target: TextExample 34 | custom: Optional[Dict[str, Any]] = None 35 | 36 | @property 37 | def num_tokens(self) -> Optional[int]: 38 | return self.source.num_tokens 39 | -------------------------------------------------------------------------------- /lhotse/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from . import cut_transforms, input_strategies, sampling, signal_transforms 2 | from .audio_tagging import AudioTaggingDataset 3 | from .cut_transforms import * 4 | from .dataloading import make_worker_init_fn 5 | from .diarization import DiarizationDataset 6 | from .input_strategies import AudioSamples, OnTheFlyFeatures, PrecomputedFeatures 7 | from .iterable_dataset import IterableDatasetWrapper 8 | from .sampling import * 9 | from .signal_transforms import GlobalMVN, RandomizedSmoothing, SpecAugment 10 | from .source_separation import ( 11 | DynamicallyMixedSourceSeparationDataset, 12 | PreMixedSourceSeparationDataset, 13 | SourceSeparationDataset, 14 | ) 15 | from .speech_recognition import K2SpeechRecognitionDataset 16 | from .speech_synthesis import SpeechSynthesisDataset 17 | from .surt import K2SurtDataset 18 | from .unsupervised import ( 19 | DynamicUnsupervisedDataset, 20 | UnsupervisedDataset, 21 | UnsupervisedWaveformDataset, 22 | ) 23 | from .vad import VadDataset 24 | from .vis import plot_batch 25 | from .webdataset import LazyWebdatasetIterator, WebdatasetWriter, export_to_webdataset 26 | -------------------------------------------------------------------------------- /lhotse/dataset/cut_transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .concatenate import CutConcatenate, concat_cuts 2 | from .extra_padding import ExtraPadding 3 | from .mix import CutMix 4 | from .perturb_speed import PerturbSpeed 5 | from .perturb_tempo import PerturbTempo 6 | from .perturb_volume import PerturbVolume 7 | from .reverberate import ReverbWithImpulseResponse 8 | 9 | __all__ = [ 10 | "CutConcatenate", 11 | "CutMix", 12 | "ExtraPadding", 13 | "PerturbSpeed", 14 | "PerturbTempo", 15 | "PerturbVolume", 16 | "ReverbWithImpulseResponse", 17 | ] 18 | -------------------------------------------------------------------------------- /lhotse/dataset/cut_transforms/perturb_speed.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import Sequence, Union 3 | 4 | from lhotse import CutSet 5 | 6 | 7 | class PerturbSpeed: 8 | """ 9 | A transform on batch of cuts (``CutSet``) that perturbs the speed of the recordings 10 | with a given probability :attr:`p`. 11 | 12 | If the effect is applied, then one of the perturbation factors from the constructor's 13 | :attr:`factors` parameter is sampled with uniform probability. 14 | """ 15 | 16 | def __init__( 17 | self, 18 | factors: Union[float, Sequence[float]], 19 | p: float, 20 | randgen: random.Random = None, 21 | preserve_id: bool = False, 22 | ) -> None: 23 | self.factors = factors if isinstance(factors, Sequence) else [factors] 24 | self.p = p 25 | self.random = randgen 26 | self.preserve_id = preserve_id 27 | 28 | def __call__(self, cuts: CutSet) -> CutSet: 29 | if self.random is None: 30 | self.random = random.Random() 31 | return CutSet.from_cuts( 32 | cut.perturb_speed( 33 | factor=self.random.choice(self.factors), affix_id=not self.preserve_id 34 | ) 35 | if self.random.random() <= self.p 36 | else cut 37 | for cut in cuts 38 | ) 39 | -------------------------------------------------------------------------------- /lhotse/dataset/cut_transforms/perturb_tempo.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import Sequence, Union 3 | 4 | from lhotse import CutSet 5 | 6 | 7 | class PerturbTempo: 8 | """ 9 | A transform on batch of cuts (``CutSet``) that perturbs the tempo of the recordings 10 | with a given probability :attr:`p`. 11 | 12 | If the effect is applied, then one of the perturbation factors from the constructor's 13 | :attr:`factors` parameter is sampled with uniform probability. 14 | """ 15 | 16 | def __init__( 17 | self, 18 | factors: Union[float, Sequence[float]], 19 | p: float, 20 | randgen: random.Random = None, 21 | preserve_id: bool = False, 22 | ) -> None: 23 | self.factors = factors if isinstance(factors, Sequence) else [factors] 24 | self.p = p 25 | self.random = randgen 26 | self.preserve_id = preserve_id 27 | 28 | def __call__(self, cuts: CutSet) -> CutSet: 29 | if self.random is None: 30 | self.random = random 31 | return CutSet.from_cuts( 32 | cut.perturb_tempo( 33 | factor=self.random.choice(self.factors), affix_id=not self.preserve_id 34 | ) 35 | if self.random.random() <= self.p 36 | else cut 37 | for cut in cuts 38 | ) 39 | -------------------------------------------------------------------------------- /lhotse/dataset/cut_transforms/perturb_volume.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from lhotse import CutSet 4 | 5 | 6 | class PerturbVolume: 7 | """ 8 | A transform on batch of cuts (``CutSet``) that perturbs the volume of the recordings 9 | with a given probability :attr:`p`. 10 | 11 | If the effect is applied, then one of the perturbation factors from the constructor's 12 | :attr:`factors` parameter is sampled with uniform probability. 13 | """ 14 | 15 | def __init__( 16 | self, 17 | p: float, 18 | scale_low: float = 0.125, 19 | scale_high: float = 2.0, 20 | randgen: random.Random = None, 21 | preserve_id: bool = False, 22 | ) -> None: 23 | self.p = p 24 | self.scale_low = scale_low 25 | self.scale_high = scale_high 26 | self.random = randgen 27 | self.preserve_id = preserve_id 28 | 29 | def __call__(self, cuts: CutSet) -> CutSet: 30 | if self.random is None: 31 | self.random = random 32 | return CutSet.from_cuts( 33 | cut.perturb_volume( 34 | factor=self.random.uniform(self.scale_low, self.scale_high), 35 | affix_id=not self.preserve_id, 36 | ) 37 | if self.random.random() <= self.p 38 | else cut 39 | for cut in cuts 40 | ) 41 | -------------------------------------------------------------------------------- /lhotse/dataset/cut_transforms/reverberate.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import List, Optional 3 | 4 | from lhotse import CutSet, RecordingSet 5 | 6 | 7 | class ReverbWithImpulseResponse: 8 | """ 9 | A transform on batch of cuts (``CutSet``) that convolves each cut with an impulse 10 | response with some probability :attr:`p`. 11 | The impulse response is chosen randomly from a specified CutSet of RIRs :attr:`rir_cuts`. 12 | If no RIRs are specified, we will generate them using a fast random generator (https://arxiv.org/abs/2208.04101). 13 | If `early_only` is set to True, convolution is performed only with the first 50ms of the impulse response. 14 | """ 15 | 16 | def __init__( 17 | self, 18 | rir_recordings: Optional[RecordingSet] = None, 19 | p: float = 0.5, 20 | normalize_output: bool = True, 21 | randgen: random.Random = None, 22 | preserve_id: bool = False, 23 | early_only: bool = False, 24 | rir_channels: List[int] = [0], 25 | ) -> None: 26 | self.rir_recordings = list(rir_recordings) if rir_recordings is not None else [] 27 | self.p = p 28 | self.normalize_output = normalize_output 29 | self.random = randgen 30 | self.preserve_id = preserve_id 31 | self.early_only = early_only 32 | self.rir_channels = rir_channels 33 | 34 | def __call__(self, cuts: CutSet) -> CutSet: 35 | if self.random is None: 36 | self.random = random.Random() 37 | return CutSet.from_cuts( 38 | cut.reverb_rir( 39 | rir_recording=self.random.choice(self.rir_recordings) 40 | if self.rir_recordings 41 | else None, 42 | normalize_output=self.normalize_output, 43 | early_only=self.early_only, 44 | affix_id=not self.preserve_id, 45 | rir_channels=self.rir_channels, 46 | ) 47 | if self.random.random() <= self.p 48 | else cut 49 | for cut in cuts 50 | ) 51 | -------------------------------------------------------------------------------- /lhotse/dataset/sampling/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import ( 2 | SamplingConstraint, 3 | SamplingDiagnostics, 4 | TimeConstraint, 5 | TokenConstraint, 6 | ) 7 | from .bucketing import BucketingSampler 8 | from .cut_pairs import CutPairsSampler 9 | from .dynamic import DynamicCutSampler 10 | from .dynamic_bucketing import DynamicBucketingSampler 11 | from .round_robin import RoundRobinSampler 12 | from .simple import SimpleCutSampler 13 | from .stateless import StatelessSampler 14 | from .utils import find_pessimistic_batches, report_padding_ratio_estimate 15 | from .weighted_simple import WeightedSimpleCutSampler 16 | from .zip import ZipSampler 17 | 18 | __all__ = [ 19 | "TokenConstraint", 20 | "TimeConstraint", 21 | "SamplingDiagnostics", 22 | "SamplingConstraint", 23 | "BucketingSampler", 24 | "CutPairsSampler", 25 | "DynamicCutSampler", 26 | "DynamicBucketingSampler", 27 | "RoundRobinSampler", 28 | "SimpleCutSampler", 29 | "WeightedSimpleCutSampler", 30 | "StatelessSampler", 31 | "ZipSampler", 32 | "find_pessimistic_batches", 33 | "report_padding_ratio_estimate", 34 | ] 35 | -------------------------------------------------------------------------------- /lhotse/dataset/vad.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Dict, Sequence 2 | 3 | import torch 4 | 5 | from lhotse import validate 6 | from lhotse.cut import CutSet 7 | from lhotse.dataset.input_strategies import BatchIO, PrecomputedFeatures 8 | from lhotse.utils import ifnone 9 | 10 | 11 | class VadDataset(torch.utils.data.Dataset): 12 | """ 13 | The PyTorch Dataset for the voice activity detection task. 14 | Each item in this dataset is a dict of: 15 | 16 | .. code-block:: 17 | 18 | { 19 | 'inputs': (B x T x F) tensor 20 | 'input_lens': (B,) tensor 21 | 'is_voice': (T x 1) tensor 22 | 'cut': List[Cut] 23 | } 24 | """ 25 | 26 | def __init__( 27 | self, 28 | input_strategy: BatchIO = PrecomputedFeatures(), 29 | cut_transforms: Sequence[Callable[[CutSet], CutSet]] = None, 30 | input_transforms: Sequence[Callable[[torch.Tensor], torch.Tensor]] = None, 31 | ) -> None: 32 | super().__init__() 33 | self.input_strategy = input_strategy 34 | self.cut_transforms = ifnone(cut_transforms, []) 35 | self.input_transforms = ifnone(input_transforms, []) 36 | 37 | def __getitem__(self, cuts: CutSet) -> Dict[str, torch.Tensor]: 38 | validate(cuts) 39 | cuts = cuts.sort_by_duration() 40 | for tfnm in self.cut_transforms: 41 | cuts = tfnm(cuts) 42 | inputs, input_lens = self.input_strategy(cuts) 43 | for tfnm in self.input_transforms: 44 | inputs = tfnm(inputs) 45 | return { 46 | "inputs": inputs, 47 | "input_lens": input_lens, 48 | "is_voice": self.input_strategy.supervision_masks(cuts), 49 | "cut": cuts, 50 | } 51 | -------------------------------------------------------------------------------- /lhotse/dataset/video.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | import torch 4 | 5 | from lhotse import CutSet 6 | from lhotse.dataset.collation import collate_video 7 | 8 | 9 | class UnsupervisedAudioVideoDataset(torch.utils.data.Dataset): 10 | """ 11 | A basic dataset that loads, pads, collates, and returns video and audio tensors. 12 | 13 | Returns: 14 | 15 | .. code-block:: 16 | 17 | { 18 | 'video': (B x NumFrames x Color x Height x Width) uint8 tensor 19 | 'video_lens': (B, ) int32 tensor 20 | 'audio': (B x NumChannels x NumSamples) float32 tensor 21 | 'audio_lens': (B, ) int32 tensor 22 | 'cuts': CutSet of length B 23 | } 24 | """ 25 | 26 | def __getitem__(self, cuts: CutSet) -> Dict[str, Any]: 27 | video, video_lens, audio, audio_lens, cuts = collate_video( 28 | cuts, fault_tolerant=True 29 | ) 30 | return { 31 | "cuts": cuts, 32 | "video": video, 33 | "video_lens": video_lens, 34 | "audio": audio, 35 | "audio_lens": audio_lens, 36 | } 37 | -------------------------------------------------------------------------------- /lhotse/features/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import ( 2 | FeatureExtractor, 3 | Features, 4 | FeatureSet, 5 | FeatureSetBuilder, 6 | create_default_feature_extractor, 7 | ) 8 | from .fbank import TorchaudioFbank, TorchaudioFbankConfig 9 | from .io import ( 10 | ChunkedLilcomHdf5Reader, 11 | ChunkedLilcomHdf5Writer, 12 | FeaturesReader, 13 | FeaturesWriter, 14 | KaldiReader, 15 | LilcomChunkyReader, 16 | LilcomChunkyWriter, 17 | LilcomFilesReader, 18 | LilcomFilesWriter, 19 | LilcomHdf5Reader, 20 | LilcomHdf5Writer, 21 | LilcomURLReader, 22 | LilcomURLWriter, 23 | NumpyFilesReader, 24 | NumpyFilesWriter, 25 | NumpyHdf5Reader, 26 | NumpyHdf5Writer, 27 | available_storage_backends, 28 | close_cached_file_handles, 29 | ) 30 | from .kaldi.extractors import ( 31 | Fbank, 32 | FbankConfig, 33 | LogSpectrogram, 34 | LogSpectrogramConfig, 35 | Mfcc, 36 | MfccConfig, 37 | Spectrogram, 38 | SpectrogramConfig, 39 | ) 40 | from .kaldifeat import ( 41 | KaldifeatFbank, 42 | KaldifeatFbankConfig, 43 | KaldifeatMfcc, 44 | KaldifeatMfccConfig, 45 | ) 46 | from .librosa_fbank import LibrosaFbank, LibrosaFbankConfig 47 | from .mfcc import TorchaudioMfcc, TorchaudioMfccConfig 48 | from .mixer import FeatureMixer 49 | from .opensmile import OpenSmileConfig, OpenSmileExtractor 50 | from .spectrogram import TorchaudioSpectrogram, TorchaudioSpectrogramConfig 51 | from .ssl import S3PRLSSL, S3PRLSSLConfig 52 | from .whisper_fbank import WhisperFbank, WhisperFbankConfig 53 | -------------------------------------------------------------------------------- /lhotse/features/compression.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import lilcom 4 | import numpy as np 5 | 6 | 7 | def lilcom_compress_chunked( 8 | data: np.ndarray, 9 | tick_power: int = -5, 10 | do_regression=True, 11 | chunk_size: int = 100, 12 | temporal_dim: int = 0, 13 | ) -> List[bytes]: 14 | assert temporal_dim < data.ndim 15 | num_frames = data.shape[temporal_dim] 16 | compressed = [] 17 | for begin in range(0, num_frames, chunk_size): 18 | compressed.append( 19 | lilcom.compress( 20 | data[begin : begin + chunk_size], 21 | tick_power=tick_power, 22 | do_regression=do_regression, 23 | ) 24 | ) 25 | return compressed 26 | -------------------------------------------------------------------------------- /lhotse/features/kaldi/__init__.py: -------------------------------------------------------------------------------- 1 | from .extractors import ( 2 | Fbank, 3 | FbankConfig, 4 | LogSpectrogram, 5 | LogSpectrogramConfig, 6 | Mfcc, 7 | MfccConfig, 8 | Spectrogram, 9 | SpectrogramConfig, 10 | ) 11 | from .layers import Wav2FFT, Wav2LogFilterBank, Wav2LogSpec, Wav2MFCC, Wav2Spec, Wav2Win 12 | -------------------------------------------------------------------------------- /lhotse/features/mfcc.py: -------------------------------------------------------------------------------- 1 | from dataclasses import asdict, dataclass 2 | from typing import Any, Dict 3 | 4 | from lhotse.features.base import TorchaudioFeatureExtractor, register_extractor 5 | from lhotse.utils import EPSILON, Seconds 6 | 7 | 8 | @dataclass 9 | class TorchaudioMfccConfig: 10 | # Spectogram-related part 11 | dither: float = 0.0 12 | window_type: str = "povey" 13 | # Note that frame_length and frame_shift will be converted to milliseconds before torchaudio/Kaldi sees them 14 | frame_length: Seconds = 0.025 15 | frame_shift: Seconds = 0.01 16 | remove_dc_offset: bool = True 17 | round_to_power_of_two: bool = True 18 | energy_floor: float = EPSILON 19 | min_duration: float = 0.0 20 | preemphasis_coefficient: float = 0.97 21 | raw_energy: bool = True 22 | 23 | # MFCC-related part 24 | low_freq: float = 20.0 25 | high_freq: float = -400.0 26 | num_mel_bins: int = 23 27 | use_energy: bool = False 28 | vtln_low: float = 100.0 29 | vtln_high: float = -500.0 30 | vtln_warp: float = 1.0 31 | cepstral_lifter: float = 22.0 32 | num_ceps: int = 13 33 | 34 | def to_dict(self) -> Dict[str, Any]: 35 | return asdict(self) 36 | 37 | @staticmethod 38 | def from_dict(data: Dict[str, Any]) -> "TorchaudioMfccConfig": 39 | return TorchaudioMfccConfig(**data) 40 | 41 | 42 | @register_extractor 43 | class TorchaudioMfcc(TorchaudioFeatureExtractor): 44 | """MFCC feature extractor based on ``torchaudio.compliance.kaldi.mfcc`` function.""" 45 | 46 | name = "mfcc" 47 | config_type = TorchaudioMfccConfig 48 | 49 | def _feature_fn(self, *args, **kwargs): 50 | from torchaudio.compliance.kaldi import mfcc 51 | 52 | return mfcc(*args, **kwargs) 53 | 54 | def feature_dim(self, sampling_rate: int) -> int: 55 | return self.config.num_ceps 56 | -------------------------------------------------------------------------------- /lhotse/image/__init__.py: -------------------------------------------------------------------------------- 1 | from .image import Image 2 | from .io import ( 3 | PillowInMemoryReader, 4 | PillowInMemoryWriter, 5 | PillowReader, 6 | PillowWriter, 7 | available_storage_backends, 8 | get_reader, 9 | get_writer, 10 | register_reader, 11 | register_writer, 12 | ) 13 | -------------------------------------------------------------------------------- /lhotse/shar/__init__.py: -------------------------------------------------------------------------------- 1 | from .readers import * 2 | from .writers import * 3 | 4 | __all__ = [ 5 | "ArrayTarWriter", 6 | "AudioTarWriter", 7 | "JsonlShardWriter", 8 | "LazySharIterator", 9 | "SharWriter", 10 | "TarIterator", 11 | "TarWriter", 12 | ] 13 | -------------------------------------------------------------------------------- /lhotse/shar/readers/__init__.py: -------------------------------------------------------------------------------- 1 | from .lazy import LazySharIterator 2 | from .tar import TarIterator 3 | 4 | __all__ = [ 5 | "LazySharIterator", 6 | "TarIterator", 7 | ] 8 | -------------------------------------------------------------------------------- /lhotse/shar/writers/__init__.py: -------------------------------------------------------------------------------- 1 | from .array import ArrayTarWriter 2 | from .audio import AudioTarWriter 3 | from .cut import JsonlShardWriter 4 | from .shar import SharWriter 5 | from .tar import TarWriter 6 | 7 | __all__ = [ 8 | "ArrayTarWriter", 9 | "AudioTarWriter", 10 | "JsonlShardWriter", 11 | "SharWriter", 12 | "TarWriter", 13 | ] 14 | -------------------------------------------------------------------------------- /lhotse/testing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/lhotse/testing/__init__.py -------------------------------------------------------------------------------- /lhotse/testing/random.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | import pytest 5 | import torch 6 | 7 | 8 | @pytest.fixture 9 | def deterministic_rng(request): 10 | """ 11 | Pytest fixture that ensures deterministic RNG behavior. 12 | After the test finishes, it restores the previous RNG state. 13 | 14 | Example usage:: 15 | 16 | >>> def my_test(deterministic_rng): 17 | ... x = torch.randn(10, 5) # always has the same values 18 | 19 | You can also set random seed like this:: 20 | 21 | >>> @pytest.mark.seed(1337) 22 | ... def my_test(deterministic_rng): 23 | ... x = torch.randn(10, 5) 24 | 25 | .. note: Learn more about pytest fixtures setup/teardown here: 26 | https://docs.pytest.org/en/latest/how-to/fixtures.html#teardown-cleanup-aka-fixture-finalization 27 | """ 28 | 29 | # The mechanism below is pytest's way of parameterizing fixtures. 30 | # We use that to optionally sed a different random seed than the default 0. 31 | # See: https://docs.pytest.org/en/7.1.x/how-to/fixtures.html#using-markers-to-pass-data-to-fixtures 32 | marker = request.node.get_closest_marker("seed") 33 | if marker is None: 34 | # Handle missing marker in some way... 35 | SEED = 0 36 | else: 37 | SEED = marker.args[0] 38 | 39 | torch_state = torch.get_rng_state() 40 | np_state = np.random.get_state() 41 | py_state = random.getstate() 42 | 43 | torch.manual_seed(SEED) 44 | np.random.seed(SEED) 45 | random.seed(SEED) 46 | 47 | yield SEED 48 | 49 | random.setstate(py_state) 50 | np.random.set_state(np_state) 51 | torch.set_rng_state(torch_state) 52 | -------------------------------------------------------------------------------- /lhotse/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .sph2pipe import install_sph2pipe 2 | -------------------------------------------------------------------------------- /lhotse/tools/env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | from pathlib import Path 5 | 6 | 7 | def default_tools_cachedir(force_mkdir: bool = False) -> Path: 8 | d = Path.home() / ".lhotse/tools" 9 | try: 10 | d.mkdir(exist_ok=True, parents=True) 11 | except OSError: 12 | if force_mkdir: 13 | raise 14 | else: 15 | logging.warning( 16 | f"We couldn't create lhotse utilities directory: {d} (not enough space/no permissions?)" 17 | ) 18 | return d 19 | 20 | 21 | def add_tools_to_path(): 22 | sph2pipe_path = str(default_tools_cachedir() / "sph2pipe-2.5") 23 | sys.path.append(sph2pipe_path) 24 | os.environ["PATH"] += os.pathsep + sph2pipe_path # platform-agnostic 25 | -------------------------------------------------------------------------------- /lhotse/tools/sph2pipe.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import subprocess 3 | import tarfile 4 | from pathlib import Path 5 | from typing import Optional 6 | 7 | from lhotse.tools.env import default_tools_cachedir 8 | from lhotse.utils import Pathlike, resumable_download, safe_extract 9 | 10 | SPH2PIPE_URL = "https://github.com/burrmill/sph2pipe/archive/2.5.tar.gz" 11 | 12 | 13 | def install_sph2pipe( 14 | where: Optional[Pathlike] = None, 15 | download_from: str = SPH2PIPE_URL, 16 | force: bool = False, 17 | ) -> None: 18 | """ 19 | Install the sph2pipe program to handle sphere (.sph) audio files with 20 | "shorten" codec compression (needed for older LDC data). 21 | 22 | It downloads an archive and then decompresses and compiles the contents. 23 | """ 24 | if where is None: 25 | where = default_tools_cachedir(force_mkdir=True) 26 | where = Path(where) 27 | # Download 28 | download_and_untar_sph2pipe(where, url=download_from, force_download=force) 29 | # Compile 30 | subprocess.run([f'make -C {where / "sph2pipe-2.5"}'], shell=True, check=True) 31 | logging.info("Finished installing sph2pipe.") 32 | 33 | 34 | def download_and_untar_sph2pipe( 35 | target_dir: Pathlike, 36 | url: str, 37 | force_download: bool = False, 38 | ) -> Path: 39 | target_dir = Path(target_dir) 40 | sph2pipe_dir = target_dir / "sph2pipe-2.5" 41 | if (sph2pipe_dir / "Makefile").is_file() and not force_download: 42 | return sph2pipe_dir 43 | target_dir.mkdir(parents=True, exist_ok=True) 44 | tar_name = "sph2pipe-2.5.tar.gz" 45 | tar_path = target_dir / tar_name 46 | resumable_download(url, filename=tar_path, force_download=force_download) 47 | with tarfile.open(tar_path) as tar: 48 | safe_extract(tar, path=target_dir) 49 | return sph2pipe_dir 50 | -------------------------------------------------------------------------------- /lhotse/workflows/__init__.py: -------------------------------------------------------------------------------- 1 | from .activity_detection import * 2 | from .dnsmos import annotate_dnsmos 3 | from .forced_alignment import align_with_torchaudio 4 | from .meeting_simulation import * 5 | from .whisper import annotate_with_whisper 6 | -------------------------------------------------------------------------------- /lhotse/workflows/activity_detection/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Activity, ActivityDetector 2 | from .silero_vad import SileroVAD8k, SileroVAD16k 3 | -------------------------------------------------------------------------------- /lhotse/workflows/forced_alignment/__init__.py: -------------------------------------------------------------------------------- 1 | from .asr_aligner import * 2 | from .base import * 3 | from .mms_aligner import * 4 | from .workflow import * 5 | -------------------------------------------------------------------------------- /lhotse/workflows/meeting_simulation/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseMeetingSimulator 2 | from .conversational import ConversationalMeetingSimulator 3 | from .speaker_independent import SpeakerIndependentMeetingSimulator 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.isort] 2 | profile = "black" 3 | skip = ["lhotse/__init__.py"] 4 | 5 | [tool.black] 6 | force-exclude = ''' 7 | /( 8 | \.git 9 | | \.github 10 | )/ 11 | ''' 12 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/__init__.py -------------------------------------------------------------------------------- /test/audio/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/audio/__init__.py -------------------------------------------------------------------------------- /test/audio/test_resample_randomized.py: -------------------------------------------------------------------------------- 1 | from math import isclose 2 | 3 | import hypothesis.strategies as st 4 | from hypothesis import given, settings 5 | 6 | from lhotse.testing.fixtures import RandomCutTestCase 7 | 8 | 9 | class TestResample(RandomCutTestCase): 10 | @settings(deadline=None, print_blob=True) 11 | @given( 12 | st.one_of( 13 | st.just(8000), 14 | st.just(16000), 15 | st.just(22050), 16 | st.just(44100), 17 | st.just(48000), 18 | ), 19 | st.one_of( 20 | st.just(8000), 21 | st.just(16000), 22 | st.just(22050), 23 | st.just(44100), 24 | st.just(48000), 25 | ), 26 | st.data(), 27 | ) 28 | def test_resample(self, source_sampling_rate, target_sampling_rate, randgen): 29 | # Draw a number of samples between 0.9 - 1.1 times the sampling rate 30 | num_samples = randgen.draw( 31 | st.integers( 32 | round(source_sampling_rate * 0.9), round(source_sampling_rate * 1.1) 33 | ), 34 | label="Numbers of samples for Recordings", 35 | ) 36 | # Generate random recording 37 | rec = self.with_recording( 38 | sampling_rate=source_sampling_rate, num_samples=num_samples 39 | ) 40 | # Actual test 41 | rec_rs = rec.resample(target_sampling_rate) 42 | assert rec_rs.id == rec.id 43 | # Tolerance of one sample in the resampled domain 44 | assert isclose(rec_rs.duration, rec.duration, abs_tol=1 / target_sampling_rate) 45 | samples = rec_rs.load_audio() 46 | assert samples.shape[0] == rec_rs.num_channels 47 | assert samples.shape[1] == rec_rs.num_samples 48 | # Cleanup open file handles 49 | self.cleanup() 50 | -------------------------------------------------------------------------------- /test/augmentation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/augmentation/__init__.py -------------------------------------------------------------------------------- /test/cut/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/cut/__init__.py -------------------------------------------------------------------------------- /test/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/dataset/__init__.py -------------------------------------------------------------------------------- /test/dataset/sampling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/dataset/sampling/__init__.py -------------------------------------------------------------------------------- /test/dataset/test_audio_tagging.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from lhotse.cut import CutSet 4 | from lhotse.dataset import AudioTaggingDataset 5 | 6 | 7 | @pytest.fixture 8 | def dummy_cut_set(): 9 | cuts = CutSet.from_json("test/fixtures/libri/cuts.json") 10 | 11 | def _add_audio_event(c): 12 | c.supervisions[0].audio_event = "Speech; Whisper" 13 | return c 14 | 15 | cuts = cuts.map(_add_audio_event) 16 | return cuts 17 | 18 | 19 | def test_audio_tagging_dataset(dummy_cut_set): 20 | dataset = AudioTaggingDataset() 21 | out = dataset[dummy_cut_set] 22 | supervisions = out["supervisions"] 23 | assert "audio_event" in supervisions 24 | print("Pass the test") 25 | -------------------------------------------------------------------------------- /test/dataset/test_iterable_dataset.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch.utils.data 3 | 4 | from lhotse import CutSet 5 | from lhotse.dataset import IterableDatasetWrapper, SimpleCutSampler 6 | from lhotse.testing.dummies import DummyManifest 7 | 8 | 9 | class IdentityDataset(torch.utils.data.Dataset): 10 | def __getitem__(self, item): 11 | return item 12 | 13 | 14 | @pytest.mark.parametrize("persistent_workers", [False, True]) 15 | def test_iterable_dataset_wrapper(persistent_workers): 16 | cuts = DummyManifest(CutSet, begin_id=0, end_id=10) 17 | sampler = SimpleCutSampler(cuts, max_cuts=10, shuffle=True) # one batch 18 | dataset = IdentityDataset() 19 | dloader = torch.utils.data.DataLoader( 20 | IterableDatasetWrapper( 21 | dataset, sampler, auto_increment_epoch=persistent_workers 22 | ), 23 | batch_size=None, 24 | num_workers=1, 25 | persistent_workers=persistent_workers, 26 | ) 27 | 28 | batches_per_epoch = [] 29 | for epoch in range(2): 30 | dloader.dataset.set_epoch(epoch) 31 | batches = list(dloader) 32 | epoch_cuts = CutSet.from_cuts(c for b in batches for c in b) 33 | batches_per_epoch.append(epoch_cuts) 34 | 35 | assert list(batches_per_epoch[0].ids) != list(batches_per_epoch[1].ids) 36 | -------------------------------------------------------------------------------- /test/dataset/test_speech_synthesis_dataset.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | from lhotse import CutSet 5 | from lhotse.dataset.signal_transforms import GlobalMVN 6 | from lhotse.dataset.speech_synthesis import SpeechSynthesisDataset 7 | 8 | 9 | @pytest.fixture 10 | def cut_set(): 11 | return CutSet.from_json("test/fixtures/ljspeech/cuts.json") 12 | 13 | 14 | @pytest.mark.parametrize("transform", [None, GlobalMVN, [GlobalMVN]]) 15 | def test_speech_synthesis_dataset(cut_set, transform): 16 | if isinstance(transform, list): 17 | transform = [transform[0].from_cuts(cut_set)] 18 | elif isinstance(transform, GlobalMVN): 19 | transform = transform(cut_set) 20 | else: 21 | transform = None 22 | 23 | dataset = SpeechSynthesisDataset(feature_transforms=transform) 24 | example = dataset[cut_set] 25 | assert example["audio"].shape[1] > 0 26 | assert example["features"].shape[1] > 0 27 | assert len(example["text"]) > 0 28 | assert len(example["text"][0]) > 0 29 | 30 | assert example["audio"].ndim == 2 31 | assert example["features"].ndim == 3 32 | 33 | assert isinstance(example["audio_lens"], torch.IntTensor) 34 | assert isinstance(example["features_lens"], torch.IntTensor) 35 | 36 | assert example["audio_lens"].ndim == 1 37 | assert example["features_lens"].ndim == 1 38 | -------------------------------------------------------------------------------- /test/dataset/test_surt_dataset.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from torch.utils.data import DataLoader 3 | 4 | from lhotse.cut import CutSet 5 | from lhotse.dataset.sampling import SimpleCutSampler 6 | from lhotse.dataset.surt import K2SurtDataset 7 | 8 | 9 | @pytest.fixture 10 | def cut_set(): 11 | return CutSet.from_shar(in_dir="test/fixtures/lsmix") 12 | 13 | 14 | @pytest.mark.parametrize("num_workers", [0, 1]) 15 | @pytest.mark.parametrize("return_sources", [True, False]) 16 | def test_surt_iterable_dataset(cut_set, num_workers, return_sources): 17 | dataset = K2SurtDataset(return_sources=return_sources, return_cuts=True) 18 | sampler = SimpleCutSampler(cut_set, shuffle=False, max_cuts=10000) 19 | # Note: "batch_size=None" disables the automatic batching mechanism, 20 | # which is required when Dataset takes care of the collation itself. 21 | dloader = DataLoader( 22 | dataset, batch_size=None, sampler=sampler, num_workers=num_workers 23 | ) 24 | batch = next(iter(dloader)) 25 | assert batch["inputs"].shape == (2, 2238, 80) 26 | assert batch["input_lens"].tolist() == [2238, 985] 27 | 28 | assert len(batch["supervisions"][1]) == 2 29 | assert len(batch["text"][1]) == 2 30 | assert batch["text"][1] == [ 31 | "BY THIS MANOEUVRE WE DON'T LET ANYBODY IN THE CAR AND WE TRY AND KEEP THEM CLEAR OF THE CAR SHORT OF SHOOTING THEM THAT IS CARRIED NO OTHER MESSAGE", 32 | "THE AMERICAN INTERPOSED BRUSQUELY BETWEEN PAROXYSMS AND THEY CAUGHT HIM AT IT EH", 33 | ] 34 | if return_sources: 35 | assert len(batch["source_feats"]) == 2 36 | assert all( 37 | len(batch["source_feats"][i]) == len(batch["cuts"][i].supervisions) 38 | for i in range(2) 39 | ) 40 | -------------------------------------------------------------------------------- /test/features/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/features/__init__.py -------------------------------------------------------------------------------- /test/features/test_chunky_writer.py: -------------------------------------------------------------------------------- 1 | from tempfile import NamedTemporaryFile 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from lhotse import ChunkedLilcomHdf5Writer, LilcomChunkyWriter 7 | from lhotse.features.io import get_reader 8 | from lhotse.utils import is_module_available 9 | 10 | 11 | @pytest.mark.parametrize( 12 | ["writer_type", "ext"], 13 | [ 14 | (LilcomChunkyWriter, ".lca"), 15 | pytest.param( 16 | ChunkedLilcomHdf5Writer, 17 | ".h5", 18 | marks=pytest.mark.skipif( 19 | not is_module_available("h5py"), 20 | reason="Requires h5py to run HDF5 tests.", 21 | ), 22 | ), 23 | ], 24 | ) 25 | def test_chunky_writer_left_right_offsets_equal(writer_type, ext): 26 | # Generate small random numbers that are nicely compressed with lilcom 27 | arr = np.log(np.random.uniform(size=(11, 80)).astype(np.float32) / 100) 28 | 29 | with NamedTemporaryFile(suffix=ext) as f: 30 | 31 | with writer_type(f.name) as writer: 32 | key = writer.write("dummy-key", arr) 33 | 34 | f.flush() 35 | reader = get_reader(writer.name)(f.name) 36 | 37 | # Reading full array -- works as expected 38 | arr1 = reader.read(key) 39 | np.testing.assert_almost_equal(arr, arr1, decimal=1) 40 | 41 | # Reading an empty subset should return an empty array 42 | arr2 = reader.read(key, left_offset_frames=0, right_offset_frames=0) 43 | assert arr2.shape == (0,) 44 | -------------------------------------------------------------------------------- /test/features/test_librosa_fbank.py: -------------------------------------------------------------------------------- 1 | from math import ceil 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from lhotse.features.librosa_fbank import LibrosaFbank, pad_or_truncate_features 7 | from lhotse.utils import is_module_available 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "feats,expected_num_frames,abs_tol", 12 | [ 13 | (np.zeros((5, 2)), 5, 0), 14 | (np.zeros((5, 2)), 4, 1), 15 | (np.zeros((5, 2)), 6, 1), 16 | (np.zeros((5, 2)), 3, 2), 17 | (np.zeros((5, 2)), 7, 2), 18 | ], 19 | ) 20 | def test_pad_or_truncate_features_shape(feats, expected_num_frames, abs_tol): 21 | feats_adjusted = pad_or_truncate_features(feats, expected_num_frames, abs_tol) 22 | assert feats_adjusted.shape == (expected_num_frames, feats.shape[-1]) 23 | 24 | 25 | @pytest.mark.parametrize( 26 | "feats,expected_num_frames,abs_tol", 27 | [ 28 | (np.zeros((5, 2)), 4, 0), 29 | (np.zeros((5, 2)), 3, 1), 30 | (np.zeros((5, 2)), 7, 1), 31 | (np.zeros((5, 2)), 2, 2), 32 | (np.zeros((5, 2)), 8, 2), 33 | ], 34 | ) 35 | def test_pad_or_truncate_features_fails(feats, expected_num_frames, abs_tol): 36 | with pytest.raises(ValueError): 37 | pad_or_truncate_features(feats, expected_num_frames, abs_tol) 38 | 39 | 40 | @pytest.mark.skipif( 41 | not is_module_available("librosa"), reason="Librosa is an optional dependency." 42 | ) 43 | @pytest.mark.parametrize("audio_len", [22050, 11025, 1024, 512, 24000, 16000]) 44 | def test_librosa_fbank_with_different_audio_lengths(audio_len): 45 | 46 | extractor = LibrosaFbank() 47 | 48 | kernel_size = extractor.config.fft_size 49 | stride = extractor.config.hop_size 50 | pad = stride 51 | expected_n_frames = ceil((audio_len - kernel_size + 2 * pad) / stride + 1) 52 | 53 | n_frames = len(extractor.extract(np.zeros(audio_len), 22050)) 54 | assert abs(n_frames - expected_n_frames) <= 1 55 | -------------------------------------------------------------------------------- /test/features/test_whisper_fbank.py: -------------------------------------------------------------------------------- 1 | from math import ceil 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from lhotse.features.whisper_fbank import WhisperFbank, WhisperFbankConfig 7 | from lhotse.utils import is_module_available 8 | 9 | 10 | @pytest.mark.skipif( 11 | not is_module_available("librosa"), reason="Librosa is an optional dependency." 12 | ) 13 | @pytest.mark.parametrize("audio_len", [22050, 11025, 1024, 512, 24000, 16000]) 14 | def test_whisper_fbank_with_different_audio_lengths(audio_len): 15 | 16 | extractor = WhisperFbank(WhisperFbankConfig(device="cpu")) 17 | 18 | kernel_size = 400 19 | stride = extractor.hop_length 20 | pad = stride 21 | expected_n_frames = ceil((audio_len - kernel_size + 2 * pad) / stride + 1) 22 | 23 | n_frames = len(extractor.extract(np.zeros(audio_len, dtype=np.float32), 16000)) 24 | assert abs(n_frames - expected_n_frames) <= 1 25 | -------------------------------------------------------------------------------- /test/fixtures/ami/350b3ee0-a6fd-47ab-b921-fd298b1d53c0.llc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ami/350b3ee0-a6fd-47ab-b921-fd298b1d53c0.llc -------------------------------------------------------------------------------- /test/fixtures/ami/ES2011a.Headset-0-40s-46s.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ami/ES2011a.Headset-0-40s-46s.wav -------------------------------------------------------------------------------- /test/fixtures/ami/ES2011a_sups.jsonl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ami/ES2011a_sups.jsonl.gz -------------------------------------------------------------------------------- /test/fixtures/ami/cuts.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "channel": 0, 4 | "duration": 6.0, 5 | "features": { 6 | "channels": 0, 7 | "duration": 6.0, 8 | "frame_shift": 0.01, 9 | "num_features": 23, 10 | "num_frames": 600, 11 | "recording_id": "ES2011a.Headset-0-40s-46s.wav", 12 | "sampling_rate": 16000, 13 | "start": 0.0, 14 | "storage_path": "test/fixtures/ami", 15 | "storage_key": "350b3ee0-a6fd-47ab-b921-fd298b1d53c0.llc", 16 | "storage_type": "lilcom_files", 17 | "type": "fbank" 18 | }, 19 | "id": "a7889ee6-1703-4d0d-98b3-91f1d45a790d", 20 | "recording": { 21 | "duration": 6, 22 | "id": "ES2011a.Headset-0-40s-46s.wav", 23 | "num_samples": 96000, 24 | "sampling_rate": 16000, 25 | "sources": [ 26 | { 27 | "channels": [ 28 | 0 29 | ], 30 | "source": "test/fixtures/ami/ES2011a.Headset-0-40s-46s.wav", 31 | "type": "file" 32 | } 33 | ] 34 | }, 35 | "start": 0.0, 36 | "supervisions": [ 37 | { 38 | "channel": 0, 39 | "duration": 1.36, 40 | "id": "ES2011a.Headset-0-40s-46s-0-3", 41 | "language": "English", 42 | "recording_id": "ES2011a.Headset-0-40s-46s.wav", 43 | "speaker": "ES2011a.Headset-1", 44 | "start": 1.46, 45 | "text": "I'M ABIGAIL CLAFLIN" 46 | }, 47 | { 48 | "channel": 0, 49 | "duration": 1.0, 50 | "id": "ES2011a.Headset-0-40s-46s-0-4", 51 | "language": "English", 52 | "recording_id": "ES2011a.Headset-0-40s-46s.wav", 53 | "speaker": "ES2011a.Headset-2", 54 | "start": 3.36, 55 | "text": "YOU CAN CALL ME ABBIE" 56 | } 57 | ], 58 | "type": "MonoCut" 59 | } 60 | ] 61 | -------------------------------------------------------------------------------- /test/fixtures/audio.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "recording-1", 4 | "sampling_rate": 8000, 5 | "num_samples": 4000, 6 | "duration": 0.5, 7 | "sources": [ 8 | { 9 | "type": "file", 10 | "channels": [ 11 | 0 12 | ], 13 | "source": "test/fixtures/mono_c0.wav" 14 | }, 15 | { 16 | "type": "command", 17 | "channels": [ 18 | 1 19 | ], 20 | "source": "cat test/fixtures/mono_c1.wav | cat" 21 | } 22 | ] 23 | }, 24 | { 25 | "id": "recording-2", 26 | "sampling_rate": 8000, 27 | "num_samples": 8000, 28 | "duration": 1.0, 29 | "sources": [ 30 | { 31 | "type": "file", 32 | "channels": [ 33 | 0, 34 | 1 35 | ], 36 | "source": "test/fixtures/stereo.wav" 37 | } 38 | ] 39 | }, 40 | { 41 | "id": "recording-3", 42 | "sampling_rate": 8000, 43 | "num_samples": 8000, 44 | "duration": 1.0, 45 | "sources": [ 46 | { 47 | "type": "file", 48 | "channels": [ 49 | 0, 50 | 1 51 | ], 52 | "source": "test/fixtures/stereo.sph" 53 | } 54 | ] 55 | }, 56 | { 57 | "id": "recording-4", 58 | "sampling_rate": 8000, 59 | "num_samples": 4444, 60 | "duration": 0.56, 61 | "sources": [ 62 | { 63 | "type": "file", 64 | "channels": [ 65 | 0 66 | ], 67 | "source": "test/fixtures/mono_c0.wav" 68 | }, 69 | { 70 | "type": "command", 71 | "channels": [ 72 | 1 73 | ], 74 | "source": "sox test/fixtures/mono_c1.wav -t wav - speed 0.9 | cat" 75 | } 76 | ] 77 | } 78 | ] 79 | -------------------------------------------------------------------------------- /test/fixtures/big_buck_bunny_small.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/big_buck_bunny_small.mp4 -------------------------------------------------------------------------------- /test/fixtures/common_voice_en_651325.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/common_voice_en_651325.mp3 -------------------------------------------------------------------------------- /test/fixtures/dummy_feats/feature_manifest.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "channels": 0, 4 | "duration": 0.5, 5 | "frame_shift": 0.01, 6 | "num_features": 23, 7 | "num_frames": 50, 8 | "recording_id": "recording-1", 9 | "sampling_rate": 16000, 10 | "start": 0.0, 11 | "storage_path": "test/fixtures/dummy_feats/storage", 12 | "storage_key": "89739de9-308c-4487-9fa5-1c690d44e718.llc", 13 | "storage_type": "lilcom_files", 14 | "type": "fbank" 15 | }, 16 | { 17 | "channels": 1, 18 | "duration": 0.5, 19 | "frame_shift": 0.01, 20 | "num_features": 23, 21 | "num_frames": 50, 22 | "recording_id": "recording-1", 23 | "sampling_rate": 16000, 24 | "start": 0.0, 25 | "storage_path": "test/fixtures/dummy_feats/storage", 26 | "storage_key": "25959652-8816-4810-a88a-0b022d6b9b6d.llc", 27 | "storage_type": "lilcom_files", 28 | "type": "fbank" 29 | }, 30 | { 31 | "channels": 0, 32 | "duration": 1.0, 33 | "frame_shift": 0.01, 34 | "num_features": 23, 35 | "num_frames": 100, 36 | "recording_id": "recording-2", 37 | "sampling_rate": 16000, 38 | "start": 0.0, 39 | "storage_path": "test/fixtures/dummy_feats/storage", 40 | "storage_key": "dbf9a0ec-f79d-4eb8-ae83-143a6d5de64d.llc", 41 | "storage_type": "lilcom_files", 42 | "type": "fbank" 43 | }, 44 | { 45 | "channels": 1, 46 | "duration": 1.0, 47 | "frame_shift": 0.01, 48 | "num_features": 23, 49 | "num_frames": 100, 50 | "recording_id": "recording-2", 51 | "sampling_rate": 16000, 52 | "start": 0.0, 53 | "storage_path": "test/fixtures/dummy_feats/storage", 54 | "storage_key": "d3466ce9-d604-48c3-8c1f-26480aaf07d1.llc", 55 | "storage_type": "lilcom_files", 56 | "type": "fbank" 57 | } 58 | ] -------------------------------------------------------------------------------- /test/fixtures/dummy_feats/storage/25959652-8816-4810-a88a-0b022d6b9b6d.llc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/dummy_feats/storage/25959652-8816-4810-a88a-0b022d6b9b6d.llc -------------------------------------------------------------------------------- /test/fixtures/dummy_feats/storage/89739de9-308c-4487-9fa5-1c690d44e718.llc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/dummy_feats/storage/89739de9-308c-4487-9fa5-1c690d44e718.llc -------------------------------------------------------------------------------- /test/fixtures/dummy_feats/storage/d3466ce9-d604-48c3-8c1f-26480aaf07d1.llc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/dummy_feats/storage/d3466ce9-d604-48c3-8c1f-26480aaf07d1.llc -------------------------------------------------------------------------------- /test/fixtures/dummy_feats/storage/dbf9a0ec-f79d-4eb8-ae83-143a6d5de64d.llc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/dummy_feats/storage/dbf9a0ec-f79d-4eb8-ae83-143a6d5de64d.llc -------------------------------------------------------------------------------- /test/fixtures/feature_config.yml: -------------------------------------------------------------------------------- 1 | feature_extractor: 2 | fbank_config: 3 | use_log_fbank: true 4 | mfcc_config: 5 | cepstral_lifter: 22.0 6 | num_ceps: 13 7 | mfcc_fbank_common_config: 8 | high_freq: 0.0 9 | low_freq: 20.0 10 | num_mel_bins: 23 11 | use_energy: false 12 | vtln_high: -500.0 13 | vtln_low: 100.0 14 | vtln_warp: 1.0 15 | spectrogram_config: 16 | dither: 0.0 17 | energy_floor: 0.0 18 | frame_length: 25.0 19 | frame_shift: 10.0 20 | min_duration: 0.0 21 | preemphasis_coefficient: 0.97 22 | raw_energy: true 23 | remove_dc_offset: true 24 | round_to_power_of_two: true 25 | snip_edges: false 26 | window_type: povey 27 | type: mfcc 28 | -------------------------------------------------------------------------------- /test/fixtures/libri/audio.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "recording-1", 4 | "sampling_rate": 16000, 5 | "num_samples": 256640, 6 | "duration": 16.04, 7 | "sources": [ 8 | { 9 | "type": "file", 10 | "channels": [ 11 | 0 12 | ], 13 | "source": "test/fixtures/libri/libri-1088-134315-0000.wav" 14 | } 15 | ] 16 | } 17 | ] -------------------------------------------------------------------------------- /test/fixtures/libri/cuts.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "e3e70682-c209-4cac-629f-6fbed82c07cd", 4 | "start": 0.0, 5 | "duration": 10.0, 6 | "channel": 0, 7 | "supervisions": [ 8 | { 9 | "id": "sup-1", 10 | "recording_id": "recording-1", 11 | "start": 0, 12 | "duration": 10.0, 13 | "channel": 0, 14 | "text": "EXAMPLE OF TEXT", 15 | "speaker": "libri-spk1" 16 | } 17 | ], 18 | "features": { 19 | "type": "fbank", 20 | "num_frames": 1604, 21 | "num_features": 40, 22 | "frame_shift": 0.01, 23 | "sampling_rate": 16000, 24 | "start": 0, 25 | "duration": 16.04, 26 | "storage_type": "lilcom_files", 27 | "storage_path": "test/fixtures/libri/storage", 28 | "storage_key": "30c2440c-93cb-4e83-b382-f2a59b3859b4.llc", 29 | "recording_id": "recording-1", 30 | "channels": 0 31 | }, 32 | "recording": { 33 | "id": "recording-1", 34 | "sources": [ 35 | { 36 | "type": "file", 37 | "channels": [ 38 | 0 39 | ], 40 | "source": "test/fixtures/libri/libri-1088-134315-0000.wav" 41 | } 42 | ], 43 | "sampling_rate": 16000, 44 | "num_samples": 256640, 45 | "duration": 16.04 46 | }, 47 | "type": "MonoCut" 48 | } 49 | ] 50 | -------------------------------------------------------------------------------- /test/fixtures/libri/cuts_multi.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "e3e70682-c209-4cac-629f-7gcfe93d18de", 4 | "start": 0.0, 5 | "duration": 10.0, 6 | "channel": 0, 7 | "supervisions": [ 8 | { 9 | "id": "sup-1", 10 | "recording_id": "recording-1", 11 | "start": 0, 12 | "duration": 10.0, 13 | "channel": 0, 14 | "text": "EXAMPLE OF TEXT" 15 | } 16 | ], 17 | "recording": { 18 | "id": "recording-1", 19 | "sources": [ 20 | { 21 | "type": "file", 22 | "channels": [ 23 | 0 24 | ], 25 | "source": "test/fixtures/libri/libri-1088-134315-0000_8ch.wav" 26 | } 27 | ], 28 | "sampling_rate": 16000, 29 | "num_samples": 256640, 30 | "duration": 16.04 31 | }, 32 | "type": "MultiCut" 33 | } 34 | ] 35 | -------------------------------------------------------------------------------- /test/fixtures/libri/cuts_no_feats.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "e3e70682-c209-4cac-629f-6fbed82c07cd", 4 | "start": 0.0, 5 | "duration": 10.0, 6 | "channel": 0, 7 | "supervisions": [], 8 | "recording": { 9 | "id": "recording-1", 10 | "sources": [ 11 | { 12 | "type": "file", 13 | "channels": [ 14 | 0 15 | ], 16 | "source": "test/fixtures/libri/libri-1088-134315-0000.wav" 17 | } 18 | ], 19 | "sampling_rate": 16000, 20 | "num_samples": 256640, 21 | "duration": 16.04 22 | }, 23 | "type": "Cut" 24 | } 25 | ] -------------------------------------------------------------------------------- /test/fixtures/libri/cuts_no_recording.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "e3e70682-c209-4cac-629f-6fbed82c07cd", 4 | "start": 0.0, 5 | "duration": 10.0, 6 | "channel": 0, 7 | "supervisions": [], 8 | "features": { 9 | "type": "fbank", 10 | "num_frames": 1604, 11 | "num_features": 40, 12 | "frame_shift": 0.01, 13 | "sampling_rate": 16000, 14 | "start": 0, 15 | "duration": 16.04, 16 | "storage_type": "lilcom_files", 17 | "storage_path": "test/fixtures/libri/storage", 18 | "storage_key": "30c2440c-93cb-4e83-b382-f2a59b3859b4.llc", 19 | "recording_id": "recording-1", 20 | "channels": 0 21 | }, 22 | "type": "Cut" 23 | } 24 | ] -------------------------------------------------------------------------------- /test/fixtures/libri/feature_manifest.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/libri/feature_manifest.json.gz -------------------------------------------------------------------------------- /test/fixtures/libri/libri-1088-134315-0000.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/libri/libri-1088-134315-0000.wav -------------------------------------------------------------------------------- /test/fixtures/libri/libri-1088-134315-0000_8ch.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/libri/libri-1088-134315-0000_8ch.wav -------------------------------------------------------------------------------- /test/fixtures/libri/libri-1088-134315-0000_rvb.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/libri/libri-1088-134315-0000_rvb.wav -------------------------------------------------------------------------------- /test/fixtures/libri/recreate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | dir=test/fixtures/libri 4 | if [ ! -f $dir/audio.json ]; then 5 | echo "Expected to run this script in the main Lhotse repo directory." 6 | exit 1 7 | fi 8 | 9 | rm $dir/cuts* 10 | rm $dir/feature_manifest.json.gz 11 | rm -rf $dir/storage 12 | 13 | lhotse feat extract $dir/audio.json $dir 14 | # Create three variants of cut manifests. 15 | # Seed 0 ensures the RNG always picks the same ID for the cuts. 16 | lhotse --seed 0 cut simple -r $dir/audio.json -f $dir/feature_manifest.json.gz $dir/cuts.json 17 | lhotse --seed 0 cut simple -r $dir/audio.json $dir/cuts_no_feats.json 18 | lhotse --seed 0 cut simple -f $dir/feature_manifest.json.gz $dir/cuts_no_recording.json 19 | 20 | for f in $dir/cuts*; do 21 | lhotse cut truncate -d 10.0 --preserve-id $f $f 22 | done 23 | -------------------------------------------------------------------------------- /test/fixtures/libri/storage/30c2440c-93cb-4e83-b382-f2a59b3859b4.llc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/libri/storage/30c2440c-93cb-4e83-b382-f2a59b3859b4.llc -------------------------------------------------------------------------------- /test/fixtures/ljspeech/feats/5bb/5bb52a3d-aaf6-42ff-8891-2be7852a4858.llc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ljspeech/feats/5bb/5bb52a3d-aaf6-42ff-8891-2be7852a4858.llc -------------------------------------------------------------------------------- /test/fixtures/ljspeech/feats/d39/d39cf273-a42d-433a-a63c-ba6357f1669e.llc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ljspeech/feats/d39/d39cf273-a42d-433a-a63c-ba6357f1669e.llc -------------------------------------------------------------------------------- /test/fixtures/ljspeech/storage/LJ002-0020.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ljspeech/storage/LJ002-0020.wav -------------------------------------------------------------------------------- /test/fixtures/ljspeech/storage/LJ002-0035.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/ljspeech/storage/LJ002-0035.wav -------------------------------------------------------------------------------- /test/fixtures/lsmix/cuts.000000.jsonl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/lsmix/cuts.000000.jsonl.gz -------------------------------------------------------------------------------- /test/fixtures/lsmix/features.000000.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/lsmix/features.000000.tar -------------------------------------------------------------------------------- /test/fixtures/lsmix/source_feats.000000.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/lsmix/source_feats.000000.tar -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech/conf/mfcc.conf: -------------------------------------------------------------------------------- 1 | --use-energy=false # only non-default option. 2 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech/lhotse-b/recordings.jsonl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech/lhotse-b/recordings.jsonl.gz -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech/lhotse-b/supervisions.jsonl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech/lhotse-b/supervisions.jsonl.gz -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech/lhotse/recordings.jsonl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech/lhotse/recordings.jsonl.gz -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech/lhotse/supervisions.jsonl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech/lhotse/supervisions.jsonl.gz -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech/reco2dur: -------------------------------------------------------------------------------- 1 | lbi-1272-135031-0000 10.885 2 | lbi-1272-141231-0000 4.650 3 | lbi-1462-170142-0000 4.715 4 | lbi-1462-170145-0000 15.404999732971191 5 | lbi-174-168635-0000 4.53000020980835 6 | lbi-1988-147956-0000 14.949999809265137 7 | lbi-1988-24833-0000 3.319999933242798 8 | lbi-1993-147964-0000 8.420000076293945 9 | lbi-2035-147960-0000 9.020000457763672 10 | lbi-2035-147961-0000 15.069999694824219 11 | lbi-2035-152373-0000 7.875 12 | lbi-2412-153948-0000 11.65999984741211 13 | lbi-2428-83699-0000 13.305000305175781 14 | lbi-251-118436-0000 6.260000228881836 15 | lbi-251-136532-0000 9.8100004196167 16 | lbi-2803-154320-0000 11.479999542236328 17 | lbi-2803-161169-0000 11.699999809265137 18 | lbi-3000-15664-0000 3.130000114440918 19 | lbi-3536-23268-0000 19.889999389648438 20 | lbi-3576-138058-0000 14.140000343322754 21 | lbi-3752-4944-0000 3.3350000381469727 22 | lbi-5338-24640-0000 3.450000047683716 23 | lbi-5338-284437-0000 4.550000190734863 24 | lbi-5694-64038-0000 2.5950000286102295 25 | lbi-5895-34615-0000 3.3350000381469727 26 | lbi-5895-34622-0000 3.369999885559082 27 | lbi-5895-34629-0000 2.259999990463257 28 | lbi-6241-61943-0000 6.949999809265137 29 | lbi-6241-61946-0000 6.235000133514404 30 | lbi-6295-244435-0000 3.1050000190734863 31 | lbi-6319-57405-0000 7.295000076293945 32 | lbi-777-126732-0000 2.740000009536743 33 | lbi-7850-281318-0000 4.175000190734863 34 | lbi-7850-286674-0000 8.454999923706055 35 | lbi-7976-110523-0000 15.220000267028809 36 | lbi-8297-275156-0000 3.5799999237060547 37 | lbi-84-121550-0000 8.4350004196167 38 | lbi-8842-304647-0000 9.710000038146973 39 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech/segments: -------------------------------------------------------------------------------- 1 | lbi-1272-135031-0000 lbi-1272-135031-0000 0 10.885 2 | lbi-1272-141231-0000 lbi-1272-141231-0000 0 4.65 3 | lbi-1462-170142-0000 lbi-1462-170142-0000 0 4.715 4 | lbi-1462-170145-0000 lbi-1462-170145-0000 0 15.405 5 | lbi-174-168635-0000 lbi-174-168635-0000 0 4.53 6 | lbi-1988-147956-0000 lbi-1988-147956-0000 0 14.95 7 | lbi-1988-24833-0000 lbi-1988-24833-0000 0 3.32 8 | lbi-1993-147964-0000 lbi-1993-147964-0000 0 8.42 9 | lbi-2035-147960-0000 lbi-2035-147960-0000 0 9.02 10 | lbi-2035-147961-0000 lbi-2035-147961-0000 0 15.07 11 | lbi-2035-152373-0000 lbi-2035-152373-0000 0 7.875 12 | lbi-2412-153948-0000 lbi-2412-153948-0000 0 11.66 13 | lbi-2428-83699-0000 lbi-2428-83699-0000 0 13.305 14 | lbi-251-118436-0000 lbi-251-118436-0000 0 6.26 15 | lbi-251-136532-0000 lbi-251-136532-0000 0 9.81 16 | lbi-2803-154320-0000 lbi-2803-154320-0000 0 11.48 17 | lbi-2803-161169-0000 lbi-2803-161169-0000 0 11.7 18 | lbi-3000-15664-0000 lbi-3000-15664-0000 0 3.13 19 | lbi-3536-23268-0000 lbi-3536-23268-0000 0 19.89 20 | lbi-3576-138058-0000 lbi-3576-138058-0000 0 14.14 21 | lbi-3752-4944-0000 lbi-3752-4944-0000 0 3.335 22 | lbi-5338-24640-0000 lbi-5338-24640-0000 0 3.45 23 | lbi-5338-284437-0000 lbi-5338-284437-0000 0 4.55 24 | lbi-5694-64038-0000 lbi-5694-64038-0000 0 2.595 25 | lbi-5895-34615-0000 lbi-5895-34615-0000 0 3.335 26 | lbi-5895-34622-0000 lbi-5895-34622-0000 0 3.37 27 | lbi-5895-34629-0000 lbi-5895-34629-0000 0 2.26 28 | lbi-6241-61943-0000 lbi-6241-61943-0000 0 6.95 29 | lbi-6241-61946-0000 lbi-6241-61946-0000 0 6.235 30 | lbi-6295-244435-0000 lbi-6295-244435-0000 0 3.105 31 | lbi-6319-57405-0000 lbi-6319-57405-0000 0 7.295 32 | lbi-777-126732-0000 lbi-777-126732-0000 0 2.74 33 | lbi-7850-281318-0000 lbi-7850-281318-0000 0 4.175 34 | lbi-7850-286674-0000 lbi-7850-286674-0000 0 8.455 35 | lbi-7976-110523-0000 lbi-7976-110523-0000 0 15.22 36 | lbi-8297-275156-0000 lbi-8297-275156-0000 0 3.58 37 | lbi-84-121550-0000 lbi-84-121550-0000 0 8.435 38 | lbi-8842-304647-0000 lbi-8842-304647-0000 0 9.71 39 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech/spk2gender: -------------------------------------------------------------------------------- 1 | lbi-1272-135031 m 2 | lbi-1272-141231 m 3 | lbi-1462-170142 f 4 | lbi-1462-170145 f 5 | lbi-174-168635 m 6 | lbi-1988-147956 f 7 | lbi-1988-24833 f 8 | lbi-1993-147964 f 9 | lbi-2035-147960 f 10 | lbi-2035-147961 f 11 | lbi-2035-152373 f 12 | lbi-2412-153948 f 13 | lbi-2428-83699 m 14 | lbi-251-118436 m 15 | lbi-251-136532 m 16 | lbi-2803-154320 m 17 | lbi-2803-161169 m 18 | lbi-3000-15664 m 19 | lbi-3536-23268 f 20 | lbi-3576-138058 f 21 | lbi-3752-4944 m 22 | lbi-5338-24640 f 23 | lbi-5338-284437 f 24 | lbi-5694-64038 m 25 | lbi-5895-34615 f 26 | lbi-5895-34622 f 27 | lbi-5895-34629 f 28 | lbi-6241-61943 m 29 | lbi-6241-61946 m 30 | lbi-6295-244435 m 31 | lbi-6319-57405 f 32 | lbi-777-126732 m 33 | lbi-7850-281318 f 34 | lbi-7850-286674 f 35 | lbi-7976-110523 m 36 | lbi-8297-275156 m 37 | lbi-84-121550 f 38 | lbi-8842-304647 f 39 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech/spk2utt: -------------------------------------------------------------------------------- 1 | lbi-1272-135031 lbi-1272-135031-0000 2 | lbi-1272-141231 lbi-1272-141231-0000 3 | lbi-1462-170142 lbi-1462-170142-0000 4 | lbi-1462-170145 lbi-1462-170145-0000 5 | lbi-174-168635 lbi-174-168635-0000 6 | lbi-1988-147956 lbi-1988-147956-0000 7 | lbi-1988-24833 lbi-1988-24833-0000 8 | lbi-1993-147964 lbi-1993-147964-0000 9 | lbi-2035-147960 lbi-2035-147960-0000 10 | lbi-2035-147961 lbi-2035-147961-0000 11 | lbi-2035-152373 lbi-2035-152373-0000 12 | lbi-2412-153948 lbi-2412-153948-0000 13 | lbi-2428-83699 lbi-2428-83699-0000 14 | lbi-251-118436 lbi-251-118436-0000 15 | lbi-251-136532 lbi-251-136532-0000 16 | lbi-2803-154320 lbi-2803-154320-0000 17 | lbi-2803-161169 lbi-2803-161169-0000 18 | lbi-3000-15664 lbi-3000-15664-0000 19 | lbi-3536-23268 lbi-3536-23268-0000 20 | lbi-3576-138058 lbi-3576-138058-0000 21 | lbi-3752-4944 lbi-3752-4944-0000 22 | lbi-5338-24640 lbi-5338-24640-0000 23 | lbi-5338-284437 lbi-5338-284437-0000 24 | lbi-5694-64038 lbi-5694-64038-0000 25 | lbi-5895-34615 lbi-5895-34615-0000 26 | lbi-5895-34622 lbi-5895-34622-0000 27 | lbi-5895-34629 lbi-5895-34629-0000 28 | lbi-6241-61943 lbi-6241-61943-0000 29 | lbi-6241-61946 lbi-6241-61946-0000 30 | lbi-6295-244435 lbi-6295-244435-0000 31 | lbi-6319-57405 lbi-6319-57405-0000 32 | lbi-777-126732 lbi-777-126732-0000 33 | lbi-7850-281318 lbi-7850-281318-0000 34 | lbi-7850-286674 lbi-7850-286674-0000 35 | lbi-7976-110523 lbi-7976-110523-0000 36 | lbi-8297-275156 lbi-8297-275156-0000 37 | lbi-84-121550 lbi-84-121550-0000 38 | lbi-8842-304647 lbi-8842-304647-0000 39 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech/utt2dur: -------------------------------------------------------------------------------- 1 | lbi-1272-135031-0000 10.885 2 | lbi-1272-141231-0000 4.65 3 | lbi-1462-170142-0000 4.715 4 | lbi-1462-170145-0000 15.405 5 | lbi-174-168635-0000 4.53 6 | lbi-1988-147956-0000 14.95 7 | lbi-1988-24833-0000 3.32 8 | lbi-1993-147964-0000 8.42 9 | lbi-2035-147960-0000 9.02 10 | lbi-2035-147961-0000 15.07 11 | lbi-2035-152373-0000 7.875 12 | lbi-2412-153948-0000 11.66 13 | lbi-2428-83699-0000 13.305 14 | lbi-251-118436-0000 6.26 15 | lbi-251-136532-0000 9.81 16 | lbi-2803-154320-0000 11.48 17 | lbi-2803-161169-0000 11.7 18 | lbi-3000-15664-0000 3.13 19 | lbi-3536-23268-0000 19.89 20 | lbi-3576-138058-0000 14.14 21 | lbi-3752-4944-0000 3.335 22 | lbi-5338-24640-0000 3.45 23 | lbi-5338-284437-0000 4.55 24 | lbi-5694-64038-0000 2.595 25 | lbi-5895-34615-0000 3.335 26 | lbi-5895-34622-0000 3.37 27 | lbi-5895-34629-0000 2.26 28 | lbi-6241-61943-0000 6.95 29 | lbi-6241-61946-0000 6.235 30 | lbi-6295-244435-0000 3.105 31 | lbi-6319-57405-0000 7.295 32 | lbi-777-126732-0000 2.74 33 | lbi-7850-281318-0000 4.175 34 | lbi-7850-286674-0000 8.455 35 | lbi-7976-110523-0000 15.22 36 | lbi-8297-275156-0000 3.58 37 | lbi-84-121550-0000 8.435 38 | lbi-8842-304647-0000 9.71 39 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech/utt2num_frames: -------------------------------------------------------------------------------- 1 | lbi-1272-135031-0000 1087 2 | lbi-1272-141231-0000 463 3 | lbi-1462-170142-0000 470 4 | lbi-1462-170145-0000 1539 5 | lbi-174-168635-0000 451 6 | lbi-1988-147956-0000 1493 7 | lbi-1988-24833-0000 330 8 | lbi-1993-147964-0000 840 9 | lbi-2035-147960-0000 900 10 | lbi-2035-147961-0000 1505 11 | lbi-2035-152373-0000 786 12 | lbi-2412-153948-0000 1164 13 | lbi-2428-83699-0000 1329 14 | lbi-251-118436-0000 624 15 | lbi-251-136532-0000 979 16 | lbi-2803-154320-0000 1146 17 | lbi-2803-161169-0000 1168 18 | lbi-3000-15664-0000 311 19 | lbi-3536-23268-0000 1987 20 | lbi-3576-138058-0000 1412 21 | lbi-3752-4944-0000 332 22 | lbi-5338-24640-0000 343 23 | lbi-5338-284437-0000 453 24 | lbi-5694-64038-0000 258 25 | lbi-5895-34615-0000 332 26 | lbi-5895-34622-0000 335 27 | lbi-5895-34629-0000 224 28 | lbi-6241-61943-0000 693 29 | lbi-6241-61946-0000 622 30 | lbi-6295-244435-0000 309 31 | lbi-6319-57405-0000 728 32 | lbi-777-126732-0000 272 33 | lbi-7850-281318-0000 416 34 | lbi-7850-286674-0000 844 35 | lbi-7976-110523-0000 1520 36 | lbi-8297-275156-0000 356 37 | lbi-84-121550-0000 842 38 | lbi-8842-304647-0000 969 39 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech/utt2spk: -------------------------------------------------------------------------------- 1 | lbi-1272-135031-0000 lbi-1272-135031 2 | lbi-1272-141231-0000 lbi-1272-141231 3 | lbi-1462-170142-0000 lbi-1462-170142 4 | lbi-1462-170145-0000 lbi-1462-170145 5 | lbi-174-168635-0000 lbi-174-168635 6 | lbi-1988-147956-0000 lbi-1988-147956 7 | lbi-1988-24833-0000 lbi-1988-24833 8 | lbi-1993-147964-0000 lbi-1993-147964 9 | lbi-2035-147960-0000 lbi-2035-147960 10 | lbi-2035-147961-0000 lbi-2035-147961 11 | lbi-2035-152373-0000 lbi-2035-152373 12 | lbi-2412-153948-0000 lbi-2412-153948 13 | lbi-2428-83699-0000 lbi-2428-83699 14 | lbi-251-118436-0000 lbi-251-118436 15 | lbi-251-136532-0000 lbi-251-136532 16 | lbi-2803-154320-0000 lbi-2803-154320 17 | lbi-2803-161169-0000 lbi-2803-161169 18 | lbi-3000-15664-0000 lbi-3000-15664 19 | lbi-3536-23268-0000 lbi-3536-23268 20 | lbi-3576-138058-0000 lbi-3576-138058 21 | lbi-3752-4944-0000 lbi-3752-4944 22 | lbi-5338-24640-0000 lbi-5338-24640 23 | lbi-5338-284437-0000 lbi-5338-284437 24 | lbi-5694-64038-0000 lbi-5694-64038 25 | lbi-5895-34615-0000 lbi-5895-34615 26 | lbi-5895-34622-0000 lbi-5895-34622 27 | lbi-5895-34629-0000 lbi-5895-34629 28 | lbi-6241-61943-0000 lbi-6241-61943 29 | lbi-6241-61946-0000 lbi-6241-61946 30 | lbi-6295-244435-0000 lbi-6295-244435 31 | lbi-6319-57405-0000 lbi-6319-57405 32 | lbi-777-126732-0000 lbi-777-126732 33 | lbi-7850-281318-0000 lbi-7850-281318 34 | lbi-7850-286674-0000 lbi-7850-286674 35 | lbi-7976-110523-0000 lbi-7976-110523 36 | lbi-8297-275156-0000 lbi-8297-275156 37 | lbi-84-121550-0000 lbi-84-121550 38 | lbi-8842-304647-0000 lbi-8842-304647 39 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/conf/mfcc.conf: -------------------------------------------------------------------------------- 1 | --use-energy=false # only non-default option. 2 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/data/raw_mfcc_mini_librispeech2.1.ark: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech2/data/raw_mfcc_mini_librispeech2.1.ark -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/data/raw_mfcc_mini_librispeech2.1.scp: -------------------------------------------------------------------------------- 1 | lbi-3536-23268-0000 data/raw_mfcc_mini_librispeech2.1.ark:20 2 | lbi-6241-61943-0000 data/raw_mfcc_mini_librispeech2.1.ark:25996 3 | lbi-8842-304647-0000 data/raw_mfcc_mini_librispeech2.1.ark:35151 4 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/feats.scp: -------------------------------------------------------------------------------- 1 | lbi-3536-23268-0000 data/raw_mfcc_mini_librispeech2.1.ark:20 2 | lbi-6241-61943-0000 data/raw_mfcc_mini_librispeech2.1.ark:25996 3 | lbi-8842-304647-0000 data/raw_mfcc_mini_librispeech2.1.ark:35151 4 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/frame_shift: -------------------------------------------------------------------------------- 1 | 0.01 2 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/lhotse/features.jsonl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech2/lhotse/features.jsonl.gz -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/lhotse/recordings.jsonl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech2/lhotse/recordings.jsonl.gz -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/lhotse/supervisions.jsonl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mini_librispeech2/lhotse/supervisions.jsonl.gz -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/reco2dur: -------------------------------------------------------------------------------- 1 | lbi-3536-23268-0000 19.889999389648438 2 | lbi-6241-61943-0000 6.949999809265137 3 | lbi-8842-304647-0000 9.710000038146973 4 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/segments: -------------------------------------------------------------------------------- 1 | lbi-3536-23268-0000 lbi-3536-23268-0000 1.0 20.89 2 | lbi-6241-61943-0000 lbi-6241-61943-0000 0 6.95 3 | lbi-8842-304647-0000 lbi-8842-304647-0000 0 9.71 4 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/spk2gender: -------------------------------------------------------------------------------- 1 | lbi-3536-23268 f 2 | lbi-6241-61943 m 3 | lbi-8842-304647 f 4 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/spk2utt: -------------------------------------------------------------------------------- 1 | lbi-3536-23268 lbi-3536-23268-0000 2 | lbi-6241-61943 lbi-6241-61943-0000 3 | lbi-8842-304647 lbi-8842-304647-0000 4 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/text: -------------------------------------------------------------------------------- 1 | lbi-3536-23268-0000 SIR EDWARD NOT WHOLLY DISCOURAGED BY THE DENIAL WITH WHICH DORRIFORTH HAD WITH DELICACY ACQUAINTED HIM STILL HOPED FOR A KIND RECEPTION AND WAS SO OFTEN AT THE HOUSE OF MISSUS HORTON THAT LORD FREDERICK'S JEALOUSY WAS EXCITED AND THE TORTURES HE SUFFERED IN CONSEQUENCE CONVINCED HIM BEYOND A DOUBT OF THE SINCERITY OF HIS AFFECTION 2 | lbi-6241-61943-0000 ON THE SECOND OF THE MONTH AT TWO IN THE MORNING OUR PRECIOUS CARGO OF LUGGAGE WAS TAKEN ON BOARD THE GOOD SHIP VALKYRIE 3 | lbi-8842-304647-0000 HE LIVES THY LOSS HE DIES FROM EVERY LIMB MANGLED BY THEE LIGHTNINGS OF GODHEAD SHINE FROM WHICH THY DARKNESS HATH NOT WHERE TO HIDE 4 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/utt2dur: -------------------------------------------------------------------------------- 1 | lbi-3536-23268-0000 19.89 2 | lbi-6241-61943-0000 6.95 3 | lbi-8842-304647-0000 9.71 4 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/utt2num_frames: -------------------------------------------------------------------------------- 1 | lbi-3536-23268-0000 1987 2 | lbi-6241-61943-0000 693 3 | lbi-8842-304647-0000 969 4 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/utt2spk: -------------------------------------------------------------------------------- 1 | lbi-3536-23268-0000 lbi-3536-23268 2 | lbi-6241-61943-0000 lbi-6241-61943 3 | lbi-8842-304647-0000 lbi-8842-304647 4 | -------------------------------------------------------------------------------- /test/fixtures/mini_librispeech2/wav.scp: -------------------------------------------------------------------------------- 1 | lbi-3536-23268-0000 sox -r 16000 -b16 -c 1 --null -t wav - synth 318240s sine 300-3300 | 2 | lbi-6241-61943-0000 sox -r 16000 -b16 -c 1 --null -t wav - synth 111200s sine 300-3300 | 3 | lbi-8842-304647-0000 sox -r 16000 -b16 -c 1 --null -t wav - synth 155360s sine 300-3300 | 4 | -------------------------------------------------------------------------------- /test/fixtures/mix_cut_test/audio/storage/2412-153948-0000.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mix_cut_test/audio/storage/2412-153948-0000.flac -------------------------------------------------------------------------------- /test/fixtures/mix_cut_test/audio/storage/2412-153948-0001.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mix_cut_test/audio/storage/2412-153948-0001.flac -------------------------------------------------------------------------------- /test/fixtures/mix_cut_test/feats/storage/5078e7eb-57a6-4000-b0f2-fa4bf9c52090.llc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mix_cut_test/feats/storage/5078e7eb-57a6-4000-b0f2-fa4bf9c52090.llc -------------------------------------------------------------------------------- /test/fixtures/mix_cut_test/feats/storage/9dc645db-cbe4-4529-85e4-b6ed4f59c340.llc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mix_cut_test/feats/storage/9dc645db-cbe4-4529-85e4-b6ed4f59c340.llc -------------------------------------------------------------------------------- /test/fixtures/mono_c0.opus: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mono_c0.opus -------------------------------------------------------------------------------- /test/fixtures/mono_c0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mono_c0.wav -------------------------------------------------------------------------------- /test/fixtures/mono_c1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/mono_c1.wav -------------------------------------------------------------------------------- /test/fixtures/rir/real_8ch.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/rir/real_8ch.wav -------------------------------------------------------------------------------- /test/fixtures/rir/sim_1ch.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/rir/sim_1ch.wav -------------------------------------------------------------------------------- /test/fixtures/stereo.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/stereo.mp3 -------------------------------------------------------------------------------- /test/fixtures/stereo.opus: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/stereo.opus -------------------------------------------------------------------------------- /test/fixtures/stereo.sph: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/stereo.sph -------------------------------------------------------------------------------- /test/fixtures/stereo.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/fixtures/stereo.wav -------------------------------------------------------------------------------- /test/fixtures/supervision.ctm: -------------------------------------------------------------------------------- 1 | recording-1 0 0.10 0.08 transcript 2 | recording-1 0 0.18 0.02 of 3 | recording-1 0 0.20 0.03 the 4 | recording-1 0 0.23 0.07 first 5 | recording-1 0 0.30 0.10 segment 6 | -------------------------------------------------------------------------------- /test/fixtures/supervision.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "segment-1", 4 | "recording_id": "recording-1", 5 | "channel": 0, 6 | "start": 0.1, 7 | "duration": 0.3, 8 | "text": "transcript of the first segment", 9 | "language": "english", 10 | "speaker": "Norman Dyhrentfurth" 11 | }, 12 | { 13 | "id": "segment-2", 14 | "recording_id": "recording-1", 15 | "start": 0.5, 16 | "duration": 0.4 17 | }, 18 | { 19 | "id": "segment-3", 20 | "recording_id": "recording-2", 21 | "start": 0.1, 22 | "duration": 0.2 23 | }, 24 | { 25 | "id": "segment-4", 26 | "recording_id": "recording-2", 27 | "start": 0.3, 28 | "duration": 0.2 29 | } 30 | ] -------------------------------------------------------------------------------- /test/fixtures/supervision_with_scores.ctm: -------------------------------------------------------------------------------- 1 | recording-1 0 0.10 0.08 transcript 0.9 2 | recording-1 0 0.18 0.02 of 0.8 3 | recording-1 0 0.20 0.03 the 0.85 4 | recording-1 0 0.23 0.07 first 0.7 5 | recording-1 0 0.30 0.10 segment 0.98 6 | -------------------------------------------------------------------------------- /test/known_issues/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/known_issues/__init__.py -------------------------------------------------------------------------------- /test/known_issues/test_lazy_cuts_issues.py: -------------------------------------------------------------------------------- 1 | from tempfile import NamedTemporaryFile, TemporaryDirectory 2 | 3 | from lhotse import CutSet, combine, load_manifest_lazy 4 | from lhotse.testing.dummies import DummyManifest 5 | 6 | 7 | def test_lazy_cuts_combine_split_issue(): 8 | # Test for lack of exception 9 | cuts = DummyManifest(CutSet, begin_id=0, end_id=1000) 10 | with TemporaryDirectory() as d, NamedTemporaryFile(suffix=".jsonl.gz") as f: 11 | cuts.to_file(f.name) 12 | f.flush() 13 | 14 | cuts_lazy = load_manifest_lazy(f.name) 15 | cuts_lazy = combine(cuts_lazy, cuts_lazy.perturb_speed(0.9)) 16 | cuts_lazy.split_lazy(d, chunk_size=100) 17 | -------------------------------------------------------------------------------- /test/known_issues/test_mixing_zero_energy_cuts.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from lhotse import CutSet 5 | from lhotse.dataset.collation import collate_audio 6 | from lhotse.testing.fixtures import RandomCutTestCase 7 | from lhotse.utils import NonPositiveEnergyError 8 | 9 | 10 | class TestMixZeroEnergyCuts(RandomCutTestCase): 11 | @pytest.mark.parametrize("snr", [None, 10]) 12 | def test_mix_zero_energy_cut_raises(self, snr): 13 | sr = 16000 14 | zero_cut = self.with_cut( 15 | sampling_rate=sr, num_samples=sr, features=False, use_zeroes=True 16 | ) 17 | rand_cut = self.with_cut(sampling_rate=sr, num_samples=sr, features=False) 18 | 19 | mixed = zero_cut.mix(rand_cut, snr=snr) 20 | 21 | mix_cut_samples = mixed.load_audio() 22 | np.testing.assert_equal(rand_cut.load_audio(), mix_cut_samples) 23 | -------------------------------------------------------------------------------- /test/recipes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/recipes/__init__.py -------------------------------------------------------------------------------- /test/recipes/test_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | from pathlib import Path 4 | 5 | import pytest 6 | 7 | from lhotse.audio import RecordingSet 8 | from lhotse.recipes.utils import read_manifests_if_cached 9 | from lhotse.supervision import SupervisionSet 10 | 11 | 12 | @pytest.fixture 13 | def recording_set() -> RecordingSet: 14 | return RecordingSet.from_json("test/fixtures/audio.json") 15 | 16 | 17 | @pytest.fixture 18 | def supervision_set() -> SupervisionSet: 19 | return SupervisionSet.from_json( 20 | "test/fixtures/supervision.json" 21 | ).with_alignment_from_ctm("test/fixtures/supervision.ctm") 22 | 23 | 24 | def test_read_manifests_if_cached( 25 | recording_set: RecordingSet, supervision_set: SupervisionSet 26 | ): 27 | tmp_test_dir = Path(f"{tempfile.gettempdir()}/lhotse_test_read_manifests_if_cached") 28 | if not tmp_test_dir.exists(): 29 | tmp_test_dir.mkdir() 30 | data_part = "dev" 31 | suffix = "jsonl.gz" 32 | tmp_recording_set_file = tmp_test_dir / f"recordings_{data_part}.{suffix}" 33 | tmp_supervision_set_file = tmp_test_dir / f"supervisions_{data_part}.{suffix}" 34 | recording_set.to_jsonl(tmp_recording_set_file) 35 | supervision_set.to_jsonl(tmp_supervision_set_file) 36 | 37 | try: 38 | cached_manifests = read_manifests_if_cached( 39 | [data_part], output_dir=tmp_test_dir 40 | ) 41 | assert data_part in cached_manifests 42 | assert cached_manifests[data_part]["recordings"] == recording_set 43 | 44 | cached_manifests = read_manifests_if_cached(data_part, output_dir=tmp_test_dir) 45 | assert data_part in cached_manifests 46 | assert cached_manifests[data_part]["recordings"] == recording_set 47 | finally: 48 | os.remove(tmp_recording_set_file) 49 | os.remove(tmp_supervision_set_file) 50 | -------------------------------------------------------------------------------- /test/shar/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/shar/__init__.py -------------------------------------------------------------------------------- /test/shar/test_missing_values.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from lhotse import CutSet 4 | from lhotse.testing.dummies import DummyManifest 5 | 6 | 7 | @pytest.mark.parametrize("drop_everything", [True, False]) 8 | def test_cut_set_from_shar(tmp_path, drop_everything): 9 | # Prepare data -- it needs to have missing values for some cuts 10 | cuts = DummyManifest(CutSet, begin_id=0, end_id=20, with_data=True) 11 | cuts[0].recording = None 12 | cuts[0].features = None 13 | cuts[0].custom_indexes = None 14 | cuts[0].custom_recording = None 15 | cuts[0].custom_features = None 16 | if drop_everything: 17 | cuts[0].custom_embedding = None 18 | 19 | # Prepare system under test 20 | cuts.to_shar( 21 | tmp_path, 22 | fields={ 23 | "recording": "wav", 24 | "features": "lilcom", 25 | "custom_embedding": "numpy", 26 | "custom_features": "lilcom", 27 | "custom_indexes": "numpy", 28 | "custom_recording": "wav", 29 | }, 30 | shard_size=10, 31 | ) 32 | cuts_shar = CutSet.from_shar(in_dir=tmp_path).to_eager() 33 | 34 | assert not cuts_shar[0].has_recording 35 | assert not cuts_shar[0].has_features 36 | assert not cuts_shar[0].has_custom("custom_indexes") 37 | assert not cuts_shar[0].has_custom("custom_recording") 38 | assert not cuts_shar[0].has_custom("custom_features") 39 | assert cuts_shar[0].has_custom("custom_embedding") == (not drop_everything) 40 | for cut in cuts_shar.subset(last=19): 41 | assert cut.has_recording 42 | assert cut.has_features 43 | assert cut.has_custom("custom_indexes") 44 | assert cut.has_custom("custom_recording") 45 | assert cut.has_custom("custom_features") 46 | assert cut.has_custom("custom_embedding") 47 | -------------------------------------------------------------------------------- /test/test_parallel.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from lhotse.parallel import parallel_map 4 | 5 | 6 | def pow2(x): 7 | return x**2 8 | 9 | 10 | def mul(x, y): 11 | return x * y 12 | 13 | 14 | @pytest.mark.parametrize("num_jobs", [1, 2]) 15 | def test_parallel_map_num_jobs(num_jobs): 16 | squares = list(map(pow2, range(100))) 17 | squares_parallel = list(parallel_map(pow2, range(100), num_jobs=num_jobs)) 18 | assert squares == squares_parallel 19 | 20 | 21 | def test_parallel_map_threads(): 22 | squares = list(map(pow2, range(100))) 23 | squares_parallel = list(parallel_map(pow2, range(100), num_jobs=2, threads=True)) 24 | assert squares == squares_parallel 25 | 26 | 27 | def test_parallel_map_two_iterables(): 28 | squares = list(map(mul, range(100), range(100))) 29 | squares_parallel = list(parallel_map(mul, range(100), range(100), num_jobs=2)) 30 | assert squares == squares_parallel 31 | -------------------------------------------------------------------------------- /test/video/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lhotse-speech/lhotse/1b5e31d56b55942bfff94fe3ce221e3968af17e8/test/video/__init__.py -------------------------------------------------------------------------------- /test/video/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from lhotse import Recording 6 | from lhotse.audio.backend import torchaudio_ffmpeg_backend_available 7 | 8 | # Disable video tests for PyTorch/Torchaudio < 2.0 9 | collect_ignore = [] 10 | if not torchaudio_ffmpeg_backend_available(): 11 | collect_ignore_glob = ["test_video_*.py"] 12 | 13 | 14 | @pytest.fixture(scope="session") 15 | def video_path() -> Path: 16 | return Path("test/fixtures/big_buck_bunny_small.mp4") 17 | 18 | 19 | @pytest.fixture(scope="session") 20 | def video_recording(video_path) -> Recording: 21 | return Recording.from_file(video_path) 22 | -------------------------------------------------------------------------------- /test/video/test_video_dataset.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from torch.utils.data import DataLoader 3 | 4 | from lhotse import CutSet, MultiCut 5 | from lhotse.dataset import DynamicCutSampler 6 | from lhotse.dataset.collation import collate_video 7 | from lhotse.dataset.video import UnsupervisedAudioVideoDataset 8 | 9 | COLOR = 3 10 | HEIGHT = 240 11 | WIDTH = 320 12 | FPS = 25.0 13 | FRAMES = 132 14 | AUDIO_CHANNELS = 6 15 | 16 | 17 | @pytest.fixture(scope="session") 18 | def video_cut(video_recording) -> MultiCut: 19 | return video_recording.to_cut() 20 | 21 | 22 | @pytest.fixture(scope="session") 23 | def video_cut_set(video_cut) -> CutSet: 24 | return ( 25 | CutSet.from_cuts([video_cut]) 26 | .resample(16000) 27 | .cut_into_windows(duration=1.0, hop=0.48) 28 | .filter(lambda c: c.duration > 1 / FPS) 29 | .repeat(100) 30 | ) 31 | 32 | 33 | def test_collate_video(video_cut): 34 | cuts = CutSet.from_cuts([video_cut]).repeat(2) 35 | video, video_lens, audio, audio_lens = collate_video(cuts) 36 | assert video.shape == (2, FRAMES, COLOR, HEIGHT, WIDTH) 37 | assert video_lens.tolist() == [FRAMES, FRAMES] 38 | assert audio.shape == (2, AUDIO_CHANNELS, 253440) 39 | assert audio_lens.tolist() == [253440, 253440] 40 | 41 | 42 | def test_video_dataloading(video_cut_set): 43 | dataset = UnsupervisedAudioVideoDataset() 44 | sampler = DynamicCutSampler(video_cut_set, max_duration=2.0, shuffle=True) 45 | dloader = DataLoader(dataset, sampler=sampler, batch_size=None) 46 | 47 | for step, batch in enumerate(dloader): 48 | if step == 10: 49 | break 50 | 51 | for k in "cuts video audio video_lens audio_lens".split(): 52 | assert k in batch 53 | 54 | # Mostly just test that it runs without exceptions for a few steps. 55 | -------------------------------------------------------------------------------- /tools/make_release.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eou pipefail # "strict" mode 4 | 5 | set -x # show executed commands 6 | 7 | # Clean up old builds. 8 | rm -rf dist/ build/ lhotse.egg_info/ 9 | 10 | export LHOTSE_PREPARING_RELEASE=1 11 | 12 | # Build wheels and package current source code 13 | python setup.py sdist bdist_wheel 14 | 15 | set +x # stop showing the executed commands 16 | 17 | echo 18 | echo "Lhotse is packaged SUCCESSFULLY!" 19 | echo 20 | echo "To upload a TEST RELEASE to testpypi (recommended):" 21 | echo " twine upload -r testpypi dist/*" 22 | echo 23 | echo "To upload a PUBLIC RELEASE to pypi:" 24 | echo " twine upload dist/*" 25 | --------------------------------------------------------------------------------