├── .gitignore ├── LICENSE ├── README.md ├── SAFEBOX └── README.md ├── env_vars.sh ├── misc ├── debug │ └── debug.list └── metadata_versions.txt ├── toolkits ├── athena │ ├── extract_meta.py │ └── prepare_data.py ├── kaldi │ ├── extract_meta.py │ ├── gigaspeech_data_prep.sh │ └── utt2spk_to_spk2utt.pl └── wenet │ ├── extract_meta.py │ └── gigaspeech_data_prep.sh └── utils ├── check_audio_md5.sh ├── check_metadata_md5.sh ├── download_gigaspeech.sh ├── extract_metadata_version.sh ├── extract_subset_segments.py ├── gigaspeech_scoring.py ├── install_jq.sh ├── internal ├── download_gigaspeech_from_oss.sh └── download_gigaspeech_with_pyspeechcolab.sh ├── ls_audios.sh ├── ls_md5.sh ├── opus_to_wav.py ├── parse_options.sh └── show_segment_info.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/README.md -------------------------------------------------------------------------------- /SAFEBOX/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/SAFEBOX/README.md -------------------------------------------------------------------------------- /env_vars.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/env_vars.sh -------------------------------------------------------------------------------- /misc/debug/debug.list: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/misc/debug/debug.list -------------------------------------------------------------------------------- /misc/metadata_versions.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/misc/metadata_versions.txt -------------------------------------------------------------------------------- /toolkits/athena/extract_meta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/toolkits/athena/extract_meta.py -------------------------------------------------------------------------------- /toolkits/athena/prepare_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/toolkits/athena/prepare_data.py -------------------------------------------------------------------------------- /toolkits/kaldi/extract_meta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/toolkits/kaldi/extract_meta.py -------------------------------------------------------------------------------- /toolkits/kaldi/gigaspeech_data_prep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/toolkits/kaldi/gigaspeech_data_prep.sh -------------------------------------------------------------------------------- /toolkits/kaldi/utt2spk_to_spk2utt.pl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/toolkits/kaldi/utt2spk_to_spk2utt.pl -------------------------------------------------------------------------------- /toolkits/wenet/extract_meta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/toolkits/wenet/extract_meta.py -------------------------------------------------------------------------------- /toolkits/wenet/gigaspeech_data_prep.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/toolkits/wenet/gigaspeech_data_prep.sh -------------------------------------------------------------------------------- /utils/check_audio_md5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/check_audio_md5.sh -------------------------------------------------------------------------------- /utils/check_metadata_md5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/check_metadata_md5.sh -------------------------------------------------------------------------------- /utils/download_gigaspeech.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/download_gigaspeech.sh -------------------------------------------------------------------------------- /utils/extract_metadata_version.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/extract_metadata_version.sh -------------------------------------------------------------------------------- /utils/extract_subset_segments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/extract_subset_segments.py -------------------------------------------------------------------------------- /utils/gigaspeech_scoring.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/gigaspeech_scoring.py -------------------------------------------------------------------------------- /utils/install_jq.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/install_jq.sh -------------------------------------------------------------------------------- /utils/internal/download_gigaspeech_from_oss.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/internal/download_gigaspeech_from_oss.sh -------------------------------------------------------------------------------- /utils/internal/download_gigaspeech_with_pyspeechcolab.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/internal/download_gigaspeech_with_pyspeechcolab.sh -------------------------------------------------------------------------------- /utils/ls_audios.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/ls_audios.sh -------------------------------------------------------------------------------- /utils/ls_md5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/ls_md5.sh -------------------------------------------------------------------------------- /utils/opus_to_wav.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/opus_to_wav.py -------------------------------------------------------------------------------- /utils/parse_options.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/parse_options.sh -------------------------------------------------------------------------------- /utils/show_segment_info.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpeechColab/GigaSpeech/HEAD/utils/show_segment_info.sh --------------------------------------------------------------------------------