├── LICENSE ├── README.md ├── bin └── asr_inference.py ├── conf ├── decode_asr.yaml ├── fast_decode_asr.yaml └── fast_decode_asr_ksponspeech.yaml ├── evalset ├── ksponspeech │ ├── data │ │ ├── text │ │ └── wav.scp │ └── wavs │ │ ├── KsponSpeech_E00001.wav │ │ ├── KsponSpeech_E00002.wav │ │ ├── KsponSpeech_E00003.wav │ │ ├── KsponSpeech_E00004.wav │ │ ├── KsponSpeech_E00005.wav │ │ ├── KsponSpeech_E00006.wav │ │ ├── KsponSpeech_E00007.wav │ │ ├── KsponSpeech_E00008.wav │ │ ├── KsponSpeech_E00009.wav │ │ └── KsponSpeech_E00010.wav ├── librispeech │ ├── data │ │ ├── text │ │ └── wav.scp │ └── wavs │ │ ├── 1089-134686-0000.flac │ │ ├── 1089-134686-0001.flac │ │ ├── 1089-134686-0002.flac │ │ ├── 1089-134686-0003.flac │ │ ├── 1089-134686-0004.flac │ │ ├── 1089-134686-0005.flac │ │ ├── 1089-134686-0006.flac │ │ ├── 1089-134686-0007.flac │ │ ├── 1089-134686-0008.flac │ │ └── 1089-134686-0009.flac └── zeroth_korean │ ├── data │ ├── text │ └── wav.scp │ └── wavs │ ├── 104_003_0019.flac │ ├── 104_003_0193.flac │ ├── 104_003_0253.flac │ ├── 104_003_0280.flac │ ├── 104_003_0294.flac │ ├── 104_003_0352.flac │ ├── 104_003_0360.flac │ ├── 104_003_0374.flac │ ├── 104_003_0437.flac │ ├── 104_003_0451.flac │ ├── 104_003_0452.flac │ ├── 104_003_0472.flac │ ├── 104_003_0526.flac │ ├── 104_003_0555.flac │ └── 104_003_0577.flac ├── recipes └── README.md ├── tools ├── download_mdl.sh └── recog_youtube.sh └── utils └── parse_options.sh /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/README.md -------------------------------------------------------------------------------- /bin/asr_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/bin/asr_inference.py -------------------------------------------------------------------------------- /conf/decode_asr.yaml: -------------------------------------------------------------------------------- 1 | ctc_weight: 0.3 2 | beam_size: 20 3 | -------------------------------------------------------------------------------- /conf/fast_decode_asr.yaml: -------------------------------------------------------------------------------- 1 | ctc_weight: 0.0 2 | beam_size: 3 3 | maxlenratio: 0.3 4 | -------------------------------------------------------------------------------- /conf/fast_decode_asr_ksponspeech.yaml: -------------------------------------------------------------------------------- 1 | ctc_weight: 0.0 2 | beam_size: 3 3 | maxlenratio: 0.6 4 | -------------------------------------------------------------------------------- /evalset/ksponspeech/data/text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/ksponspeech/data/text -------------------------------------------------------------------------------- /evalset/ksponspeech/data/wav.scp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/ksponspeech/data/wav.scp -------------------------------------------------------------------------------- /evalset/ksponspeech/wavs/KsponSpeech_E00001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/ksponspeech/wavs/KsponSpeech_E00001.wav -------------------------------------------------------------------------------- /evalset/ksponspeech/wavs/KsponSpeech_E00002.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/ksponspeech/wavs/KsponSpeech_E00002.wav -------------------------------------------------------------------------------- /evalset/ksponspeech/wavs/KsponSpeech_E00003.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/ksponspeech/wavs/KsponSpeech_E00003.wav -------------------------------------------------------------------------------- /evalset/ksponspeech/wavs/KsponSpeech_E00004.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/ksponspeech/wavs/KsponSpeech_E00004.wav -------------------------------------------------------------------------------- /evalset/ksponspeech/wavs/KsponSpeech_E00005.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/ksponspeech/wavs/KsponSpeech_E00005.wav -------------------------------------------------------------------------------- /evalset/ksponspeech/wavs/KsponSpeech_E00006.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/ksponspeech/wavs/KsponSpeech_E00006.wav -------------------------------------------------------------------------------- /evalset/ksponspeech/wavs/KsponSpeech_E00007.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/ksponspeech/wavs/KsponSpeech_E00007.wav -------------------------------------------------------------------------------- /evalset/ksponspeech/wavs/KsponSpeech_E00008.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/ksponspeech/wavs/KsponSpeech_E00008.wav -------------------------------------------------------------------------------- /evalset/ksponspeech/wavs/KsponSpeech_E00009.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/ksponspeech/wavs/KsponSpeech_E00009.wav -------------------------------------------------------------------------------- /evalset/ksponspeech/wavs/KsponSpeech_E00010.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/ksponspeech/wavs/KsponSpeech_E00010.wav -------------------------------------------------------------------------------- /evalset/librispeech/data/text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/librispeech/data/text -------------------------------------------------------------------------------- /evalset/librispeech/data/wav.scp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/librispeech/data/wav.scp -------------------------------------------------------------------------------- /evalset/librispeech/wavs/1089-134686-0000.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/librispeech/wavs/1089-134686-0000.flac -------------------------------------------------------------------------------- /evalset/librispeech/wavs/1089-134686-0001.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/librispeech/wavs/1089-134686-0001.flac -------------------------------------------------------------------------------- /evalset/librispeech/wavs/1089-134686-0002.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/librispeech/wavs/1089-134686-0002.flac -------------------------------------------------------------------------------- /evalset/librispeech/wavs/1089-134686-0003.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/librispeech/wavs/1089-134686-0003.flac -------------------------------------------------------------------------------- /evalset/librispeech/wavs/1089-134686-0004.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/librispeech/wavs/1089-134686-0004.flac -------------------------------------------------------------------------------- /evalset/librispeech/wavs/1089-134686-0005.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/librispeech/wavs/1089-134686-0005.flac -------------------------------------------------------------------------------- /evalset/librispeech/wavs/1089-134686-0006.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/librispeech/wavs/1089-134686-0006.flac -------------------------------------------------------------------------------- /evalset/librispeech/wavs/1089-134686-0007.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/librispeech/wavs/1089-134686-0007.flac -------------------------------------------------------------------------------- /evalset/librispeech/wavs/1089-134686-0008.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/librispeech/wavs/1089-134686-0008.flac -------------------------------------------------------------------------------- /evalset/librispeech/wavs/1089-134686-0009.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/librispeech/wavs/1089-134686-0009.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/data/text: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/data/text -------------------------------------------------------------------------------- /evalset/zeroth_korean/data/wav.scp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/data/wav.scp -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0019.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0019.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0193.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0193.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0253.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0253.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0280.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0280.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0294.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0294.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0352.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0352.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0360.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0360.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0374.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0374.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0437.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0437.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0451.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0451.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0452.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0452.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0472.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0472.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0526.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0526.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0555.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0555.flac -------------------------------------------------------------------------------- /evalset/zeroth_korean/wavs/104_003_0577.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/evalset/zeroth_korean/wavs/104_003_0577.flac -------------------------------------------------------------------------------- /recipes/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/recipes/README.md -------------------------------------------------------------------------------- /tools/download_mdl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/tools/download_mdl.sh -------------------------------------------------------------------------------- /tools/recog_youtube.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/tools/recog_youtube.sh -------------------------------------------------------------------------------- /utils/parse_options.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hchung12/espnet-asr/HEAD/utils/parse_options.sh --------------------------------------------------------------------------------