├── .clang-format ├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── android.yml │ ├── doc.yml │ ├── docker_image.yml │ ├── lint.yml │ ├── runtime.yml │ ├── stale-issues.yml │ ├── unit_test.yml │ ├── wheels.yml │ └── windows.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── CPPLINT.cfg ├── LICENSE ├── README.md ├── ROADMAP.md ├── docs ├── .gitignore ├── Makefile ├── UIO.md ├── conf.py ├── context.md ├── images │ ├── UIO_dataflow.png │ ├── UIO_system.png │ ├── UIO_wenetspeech_cer.png │ ├── check_detail.png │ ├── checks.png │ ├── context_graph.png │ ├── lm_system.png │ ├── runtime_android.gif │ ├── runtime_server.gif │ ├── runtime_web.png │ ├── subsampling_overalp.gif │ └── u2.gif ├── index.rst ├── jit_in_wenet.md ├── lm.md ├── make.bat ├── papers.md ├── pretrained_models.md ├── production.rst ├── python_package.md ├── reference.rst ├── requirements.txt ├── runtime.md ├── train.rst ├── tutorial_aishell.md └── tutorial_librispeech.md ├── examples ├── aishell │ ├── NST │ │ ├── README.md │ │ ├── conf │ │ │ └── train_conformer.yaml │ │ ├── local │ │ │ ├── NST_plot.png │ │ │ ├── generate_data_list.py │ │ │ ├── generate_filtered_pseudo_label.py │ │ │ ├── get_wav_labels.py │ │ │ └── split_data_list.py │ │ ├── path.sh │ │ ├── run.sh │ │ └── run_nst.sh │ ├── paraformer │ │ ├── README.md │ │ ├── conf │ │ │ ├── train_paraformer.yaml │ │ │ └── train_paraformer_dynamic.yaml │ │ ├── local │ │ ├── path.sh │ │ ├── run.sh │ │ ├── run_npu.sh │ │ ├── tools │ │ └── wenet │ ├── rnnt │ │ ├── README.md │ │ ├── conf │ │ │ ├── conformer_rnnt.yaml │ │ │ ├── conformer_u2pp_rnnt.yaml │ │ │ └── example_embedding_predictor.yaml │ │ ├── local │ │ ├── path.sh │ │ ├── run.sh │ │ ├── run_npu.sh │ │ ├── tools │ │ └── wenet │ ├── s0 │ │ ├── README.md │ │ ├── UIO_RESULT.md │ │ ├── conf │ │ │ ├── train_conformer.yaml │ │ │ ├── train_conformer_no_pos.yaml │ │ │ ├── train_ebranchformer.yaml │ │ │ ├── train_transformer.yaml │ │ │ ├── train_u2++_branchformer.yaml │ │ │ ├── train_u2++_conformer.yaml │ │ │ ├── train_u2++_efficonformer_v1.yaml │ │ │ ├── train_u2++_efficonformer_v1_stream.yaml │ │ │ ├── train_u2++_efficonformer_v2.yaml │ │ │ ├── train_u2++_lite_conformer.yaml │ │ │ ├── train_u2++_transformer.yaml │ │ │ ├── train_unified_conformer.yaml │ │ │ ├── train_unified_conformer_ctl.yaml │ │ │ └── train_unified_transformer.yaml │ │ ├── local │ │ │ ├── aishell_data_prep.sh │ │ │ ├── aishell_train_lms.sh │ │ │ └── download_and_untar.sh │ │ ├── path.sh │ │ ├── run.sh │ │ ├── run_npu.sh │ │ ├── tools │ │ └── wenet │ └── whisper │ │ ├── README.md │ │ ├── conf │ │ ├── ds_stage1.json │ │ ├── ds_stage2.json │ │ ├── ds_stage3.json │ │ ├── finetune_whisper_largev3.yaml │ │ ├── finetune_whisper_largev3_conv2d4.yaml │ │ ├── finetune_whisper_largev3_conv2d4_onlyattn.yaml │ │ └── finetune_whisper_largev3_onlyattn.yaml │ │ ├── local │ │ └── modify_ckpt.py │ │ ├── path.sh │ │ ├── run.sh │ │ ├── run_npu.sh │ │ ├── tools │ │ └── wenet ├── aishell2 │ ├── rnnt │ │ ├── README.md │ │ ├── conf │ │ │ ├── conformer_rnnt.yaml │ │ │ └── conformer_u2pp_rnnt.yaml │ │ ├── local │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet │ └── s0 │ │ ├── README.md │ │ ├── conf │ │ ├── train_u2++_conformer.yaml │ │ ├── train_u2++_transformer.yaml │ │ ├── train_unified_conformer.yaml │ │ └── train_unified_transformer.yaml │ │ ├── local │ │ ├── prepare_data.sh │ │ ├── train_lms.sh │ │ └── word_segmentation.py │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── aishell4 │ └── s0 │ │ ├── README.md │ │ ├── conf │ │ ├── train_conformer.yaml │ │ └── train_u2++_conformer.yaml │ │ ├── local │ │ ├── aishell4_process_textgrid.py │ │ ├── apply_map.pl │ │ ├── copy_data_dir.sh │ │ ├── download_and_untar.sh │ │ ├── filter_scp.pl │ │ ├── prepare_data.sh │ │ ├── spk2utt_to_utt2spk.pl │ │ ├── text_format.pl │ │ ├── text_normalize.pl │ │ ├── utt2spk_to_spk2utt.pl │ │ ├── validate_data_dir.sh │ │ └── validate_text.pl │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── chime4 │ └── s0 │ │ ├── README.md │ │ ├── conf │ │ └── train_conformer.yaml │ │ ├── local │ │ ├── chime4_format_dir.sh │ │ ├── chime4_gen_wav.sh │ │ ├── clean_wsj0_data_prep.sh │ │ ├── clean_wsj1_data_prep.sh │ │ ├── cstr_ndx2flist.pl │ │ ├── find_noisy_transcripts.pl │ │ ├── find_transcripts.pl │ │ ├── flist2scp.pl │ │ ├── ndx2flist.pl │ │ ├── normalize_transcript.pl │ │ ├── real_enhan_chime4_data_prep.sh │ │ ├── real_noisy_chime4_data_prep.sh │ │ ├── simu_enhan_chime4_data_prep.sh │ │ └── simu_noisy_chime4_data_prep.sh │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── commonvoice │ └── fr │ │ ├── README.md │ │ ├── conf │ │ └── train_conformer.yaml │ │ ├── local │ │ ├── create_scp_text.py │ │ ├── download_data.sh │ │ └── prepare_data.sh │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── csj │ └── s0 │ │ ├── README.md │ │ ├── conf │ │ └── train_conformer.yaml │ │ ├── csj_tools │ │ ├── wn.0.parse.py │ │ ├── wn.1.split_wav.py │ │ ├── wn.2.prep.text.py │ │ ├── wn.3.mincut.py │ │ └── wn.4.make_raw_list.py │ │ ├── list_files │ │ ├── 2ch.id.list │ │ ├── test.set.1.list │ │ ├── test.set.123.list │ │ ├── test.set.2.list │ │ └── test.set.3.list │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── gigaspeech │ └── s0 │ │ ├── README.md │ │ ├── conf │ │ ├── train_conformer.yaml │ │ ├── train_conformer_bidecoder.yaml │ │ └── train_u2++_conformer.yaml │ │ ├── local │ │ ├── extract_meta.py │ │ ├── gigaspeech_data_prep.sh │ │ └── gigaspeech_scoring.py │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── hkust │ └── s0 │ │ ├── README.md │ │ ├── conf │ │ ├── train_960_unigram5000.model │ │ └── train_conformer.yaml │ │ ├── local │ │ ├── hkust_data_prep.sh │ │ └── hkust_normalize.pl │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── librispeech │ ├── rnnt │ │ ├── README.md │ │ ├── conf │ │ │ └── conformer_rnnt.yaml │ │ ├── local │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet │ └── s0 │ │ ├── README.md │ │ ├── conf │ │ ├── train_conformer.yaml │ │ ├── train_conformer_bidecoder_large.yaml │ │ ├── train_squeezeformer.yaml │ │ ├── train_squeezeformer_bidecoder_large.yaml │ │ ├── train_u2++_branchformer.yaml │ │ ├── train_u2++_conformer.yaml │ │ ├── train_u2++_efficonformer_v1.yaml │ │ ├── train_u2++_efficonformer_v2.yaml │ │ ├── train_u2++_squeezeformer.yaml │ │ └── train_unified_conformer.yaml │ │ ├── local │ │ ├── data_prep_torchaudio.sh │ │ └── download_and_untar.sh │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── multi_cn │ └── s0 │ │ ├── README.md │ │ ├── conf │ │ ├── train_960_unigram5000.model │ │ ├── train_conformer.yaml │ │ ├── train_unified_conformer.yaml │ │ └── train_unified_transformer.yaml │ │ ├── local │ │ ├── aidatatang_data_prep.sh │ │ ├── aidatatang_download_and_untar.sh │ │ ├── aishell2_data_prep.sh │ │ ├── aishell_data_prep.sh │ │ ├── aishell_download_and_untar.sh │ │ ├── magicdata_badlist │ │ ├── magicdata_data_prep.sh │ │ ├── magicdata_download_and_untar.sh │ │ ├── primewords_data_prep.sh │ │ ├── primewords_download_and_untar.sh │ │ ├── primewords_parse_transcript.py │ │ ├── stcmds_data_prep.sh │ │ ├── stcmds_download_and_untar.sh │ │ ├── tal_data_prep.sh │ │ ├── tal_mix_data_prep.sh │ │ ├── thchs-30_data_prep.sh │ │ └── thchs_download_and_untar.sh │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── openasr2021 │ └── s0 │ │ ├── README.md │ │ ├── conf │ │ ├── lang.conf │ │ └── train_conformer_large_10h.yaml │ │ ├── local │ │ ├── dump_wav.sh │ │ ├── make_absolute.sh │ │ ├── make_corpus_subset.sh │ │ ├── prepare_acoustic_training_data.pl │ │ ├── prepare_data.sh │ │ └── setup_languages.sh │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── swbd │ └── s0 │ │ ├── README.md │ │ ├── conf │ │ └── train_conformer.yaml │ │ ├── local │ │ ├── MSU_single_letter.txt │ │ ├── dict.patch │ │ ├── eval2000_data_prep.sh │ │ ├── extend_segments.pl │ │ ├── format_acronyms_dict.py │ │ ├── map_acronyms_transcripts.py │ │ ├── swbd1_data_download.sh │ │ ├── swbd1_data_prep.sh │ │ ├── swbd1_fix_speakerid.pl │ │ ├── swbd1_map_words.pl │ │ └── swbd1_prepare_dict.sh │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── tedlium3 │ └── s0 │ │ ├── README.md │ │ ├── conf │ │ └── train_conformer.yaml │ │ ├── local │ │ ├── download_data.sh │ │ ├── join_suffix.py │ │ └── prepare_data.sh │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── timit │ └── s0 │ │ ├── README.md │ │ ├── conf │ │ ├── train_conformer.yaml │ │ └── train_transformer.yaml │ │ ├── local │ │ ├── dev_spk.list │ │ ├── phones.60-48-39.map │ │ ├── sph2pipe_process.py │ │ ├── test_spk.list │ │ ├── timit_data_prep.sh │ │ ├── timit_format_data.sh │ │ ├── timit_norm_trans.pl │ │ ├── utt2spk_to_spk2utt.pl │ │ └── validate_data_dir.sh │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── vkw2021 │ └── s0 │ │ ├── README.md │ │ ├── conf │ │ ├── combine_finetune_5h_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char.yaml │ │ └── train_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char.yaml │ │ ├── local │ │ ├── run_finetune_5h.sh │ │ ├── vkw_data_prep.sh │ │ └── vkw_kws_results.py │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet ├── wenetspeech │ ├── paraformer │ │ ├── README.md │ │ ├── conf │ │ │ └── fintune_paraformer_dynamic.yaml │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet │ ├── s0 │ │ ├── README.md │ │ ├── conf │ │ │ ├── train_conformer.yaml │ │ │ ├── train_conformer_bidecoder.yaml │ │ │ ├── train_u2++_conformer.yaml │ │ │ └── train_u2++_conformer_wenetspeech_aishell4.yaml │ │ ├── local │ │ │ ├── extract_meta.py │ │ │ ├── process_opus.py │ │ │ └── wenetspeech_data_prep.sh │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet │ └── whisper │ │ ├── README.md │ │ ├── conf │ │ ├── ds_stage1.json │ │ └── finetune_whisper_largev3.yaml │ │ ├── local │ │ ├── path.sh │ │ ├── run.sh │ │ ├── tools │ │ └── wenet └── wsj │ └── s0 │ ├── README.md │ ├── conf │ └── train_conformer.yaml │ ├── local │ ├── find_transcripts.pl │ ├── flist2scp.pl │ ├── ndx2flist.pl │ ├── normalize_transcript.pl │ ├── wsj_data_prep.sh │ ├── wsj_format_data.sh │ └── wsj_gen_wav.sh │ ├── path.sh │ ├── run.sh │ ├── tools │ └── wenet ├── requirements.txt ├── runtime ├── README.md ├── android │ ├── .gitignore │ ├── README.md │ ├── app │ │ ├── .gitignore │ │ ├── build.gradle │ │ ├── proguard-rules.pro │ │ ├── src │ │ │ ├── androidTest │ │ │ │ └── java │ │ │ │ │ └── com │ │ │ │ │ └── mobvoi │ │ │ │ │ └── wenet │ │ │ │ │ └── ExampleInstrumentedTest.java │ │ │ ├── main │ │ │ │ ├── AndroidManifest.xml │ │ │ │ ├── assets │ │ │ │ │ └── README.md │ │ │ │ ├── cpp │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── bin │ │ │ │ │ ├── cmake │ │ │ │ │ ├── decoder │ │ │ │ │ ├── frontend │ │ │ │ │ ├── kaldi │ │ │ │ │ ├── patch │ │ │ │ │ ├── post_processor │ │ │ │ │ ├── utils │ │ │ │ │ └── wenet.cc │ │ │ │ ├── java │ │ │ │ │ └── com │ │ │ │ │ │ └── mobvoi │ │ │ │ │ │ └── wenet │ │ │ │ │ │ ├── MainActivity.java │ │ │ │ │ │ ├── Recognize.java │ │ │ │ │ │ └── VoiceRectView.java │ │ │ │ └── res │ │ │ │ │ ├── drawable-v24 │ │ │ │ │ └── ic_launcher_foreground.xml │ │ │ │ │ ├── drawable │ │ │ │ │ └── ic_launcher_background.xml │ │ │ │ │ ├── layout │ │ │ │ │ └── activity_main.xml │ │ │ │ │ ├── mipmap-anydpi-v26 │ │ │ │ │ ├── ic_launcher.xml │ │ │ │ │ └── ic_launcher_round.xml │ │ │ │ │ ├── mipmap-hdpi │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-mdpi │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-xhdpi │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-xxhdpi │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-xxxhdpi │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── values-night │ │ │ │ │ └── themes.xml │ │ │ │ │ └── values │ │ │ │ │ ├── attrs.xml │ │ │ │ │ ├── colors.xml │ │ │ │ │ ├── strings.xml │ │ │ │ │ └── themes.xml │ │ │ └── test │ │ │ │ └── java │ │ │ │ └── com │ │ │ │ └── mobvoi │ │ │ │ └── wenet │ │ │ │ └── ExampleUnitTest.java │ │ └── wenet.keystore │ ├── build.gradle │ ├── gradle.properties │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ └── settings.gradle ├── core │ ├── api │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── wenet_api.cc │ │ └── wenet_api.h │ ├── bin │ │ ├── CMakeLists.txt │ │ ├── api_main.cc │ │ ├── decoder_main.cc │ │ ├── grpc_client_main.cc │ │ ├── grpc_server_main.cc │ │ ├── http_client_main.cc │ │ ├── http_server_main.cc │ │ ├── label_checker_main.cc │ │ ├── websocket_client_main.cc │ │ └── websocket_server_main.cc │ ├── cmake │ │ ├── boost.cmake │ │ ├── bpu.cmake │ │ ├── gflags.cmake │ │ ├── glog.cmake │ │ ├── grpc.cmake │ │ ├── gtest.cmake │ │ ├── ipex.cmake │ │ ├── libtorch.cmake │ │ ├── onnx.cmake │ │ ├── openfst.cmake │ │ ├── openvino.cmake │ │ ├── pybind11.cmake │ │ ├── wetextprocessing.cmake │ │ └── xpu.cmake │ ├── decoder │ │ ├── CMakeLists.txt │ │ ├── asr_decoder.cc │ │ ├── asr_decoder.h │ │ ├── asr_model.cc │ │ ├── asr_model.h │ │ ├── context_graph.cc │ │ ├── context_graph.h │ │ ├── ctc_endpoint.cc │ │ ├── ctc_endpoint.h │ │ ├── ctc_prefix_beam_search.cc │ │ ├── ctc_prefix_beam_search.h │ │ ├── ctc_wfst_beam_search.cc │ │ ├── ctc_wfst_beam_search.h │ │ ├── onnx_asr_model.cc │ │ ├── onnx_asr_model.h │ │ ├── params.h │ │ ├── search_interface.h │ │ ├── torch_asr_model.cc │ │ └── torch_asr_model.h │ ├── frontend │ │ ├── CMakeLists.txt │ │ ├── fbank.h │ │ ├── feature_pipeline.cc │ │ ├── feature_pipeline.h │ │ ├── fft.cc │ │ ├── fft.h │ │ └── wav.h │ ├── grpc │ │ ├── CMakeLists.txt │ │ ├── grpc_client.cc │ │ ├── grpc_client.h │ │ ├── grpc_server.cc │ │ ├── grpc_server.h │ │ └── wenet.proto │ ├── http │ │ ├── CMakeLists.txt │ │ ├── http_client.cc │ │ ├── http_client.h │ │ ├── http_server.cc │ │ └── http_server.h │ ├── kaldi │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ ├── base │ │ │ ├── io-funcs-inl.h │ │ │ ├── io-funcs.cc │ │ │ ├── io-funcs.h │ │ │ ├── kaldi-common.h │ │ │ ├── kaldi-error.cc │ │ │ ├── kaldi-error.h │ │ │ ├── kaldi-math.cc │ │ │ ├── kaldi-math.h │ │ │ ├── kaldi-types.h │ │ │ └── kaldi-utils.h │ │ ├── decoder │ │ │ ├── lattice-faster-decoder.cc │ │ │ ├── lattice-faster-decoder.h │ │ │ ├── lattice-faster-online-decoder.cc │ │ │ └── lattice-faster-online-decoder.h │ │ ├── fstbin │ │ │ ├── fstaddselfloops.cc │ │ │ ├── fstdeterminizestar.cc │ │ │ ├── fstisstochastic.cc │ │ │ ├── fstminimizeencoded.cc │ │ │ └── fsttablecompose.cc │ │ ├── fstext │ │ │ ├── determinize-lattice-inl.h │ │ │ ├── determinize-lattice.h │ │ │ ├── determinize-star-inl.h │ │ │ ├── determinize-star.h │ │ │ ├── fstext-lib.h │ │ │ ├── fstext-utils-inl.h │ │ │ ├── fstext-utils.h │ │ │ ├── kaldi-fst-io-inl.h │ │ │ ├── kaldi-fst-io.cc │ │ │ ├── kaldi-fst-io.h │ │ │ ├── lattice-utils-inl.h │ │ │ ├── lattice-utils.h │ │ │ ├── lattice-weight.h │ │ │ ├── pre-determinize-inl.h │ │ │ ├── pre-determinize.h │ │ │ ├── remove-eps-local-inl.h │ │ │ ├── remove-eps-local.h │ │ │ └── table-matcher.h │ │ ├── itf │ │ │ ├── decodable-itf.h │ │ │ └── options-itf.h │ │ ├── lat │ │ │ ├── CPPLINT.cfg │ │ │ ├── determinize-lattice-pruned.cc │ │ │ ├── determinize-lattice-pruned.h │ │ │ ├── kaldi-lattice.cc │ │ │ ├── kaldi-lattice.h │ │ │ ├── lattice-functions.cc │ │ │ └── lattice-functions.h │ │ ├── lm │ │ │ ├── arpa-file-parser.cc │ │ │ ├── arpa-file-parser.h │ │ │ ├── arpa-lm-compiler.cc │ │ │ └── arpa-lm-compiler.h │ │ ├── lmbin │ │ │ └── arpa2fst.cc │ │ └── util │ │ │ ├── basic-filebuf.h │ │ │ ├── const-integer-set-inl.h │ │ │ ├── const-integer-set.h │ │ │ ├── hash-list-inl.h │ │ │ ├── hash-list.h │ │ │ ├── kaldi-io-inl.h │ │ │ ├── kaldi-io.cc │ │ │ ├── kaldi-io.h │ │ │ ├── kaldi-pipebuf.h │ │ │ ├── parse-options.cc │ │ │ ├── parse-options.h │ │ │ ├── simple-io-funcs.cc │ │ │ ├── simple-io-funcs.h │ │ │ ├── stl-utils.h │ │ │ ├── text-utils.cc │ │ │ └── text-utils.h │ ├── patch │ │ ├── CPPLINT.cfg │ │ └── openfst │ │ │ └── src │ │ │ ├── CMakeLists.txt │ │ │ ├── extensions │ │ │ └── special │ │ │ │ └── CMakeLists.txt │ │ │ ├── include │ │ │ └── fst │ │ │ │ ├── flags.h │ │ │ │ └── log.h │ │ │ ├── lib │ │ │ └── flags.cc │ │ │ └── test │ │ │ └── CMakeLists.txt │ ├── post_processor │ │ ├── CMakeLists.txt │ │ ├── post_processor.cc │ │ └── post_processor.h │ ├── test │ │ ├── CMakeLists.txt │ │ ├── ctc_prefix_beam_search_test.cc │ │ ├── feature_pipeline_test.cc │ │ ├── post_processor_test.cc │ │ └── utils_test.cc │ ├── toolchains │ │ ├── aarch64-linux-gnu.toolchain.cmake │ │ └── ios.toolchain.cmake │ ├── utils │ │ ├── CMakeLists.txt │ │ ├── blocking_queue.h │ │ ├── file.h │ │ ├── flags.h │ │ ├── json.h │ │ ├── log.h │ │ ├── string.cc │ │ ├── string.h │ │ ├── thread_pool.h │ │ ├── timer.h │ │ ├── utils.cc │ │ └── utils.h │ └── websocket │ │ ├── CMakeLists.txt │ │ ├── websocket_client.cc │ │ ├── websocket_client.h │ │ ├── websocket_server.cc │ │ └── websocket_server.h ├── gpu │ ├── .gitmodules │ ├── Dockerfile │ │ ├── Dockerfile.client │ │ └── Dockerfile.server │ ├── Overview.JPG │ ├── README.md │ ├── client │ │ ├── client.py │ │ ├── decode_manifest_triton.py │ │ ├── decode_manifest_triton.sh │ │ ├── generate_perf_input.py │ │ ├── speech_client.py │ │ ├── stats_summary.py │ │ ├── test_wavs │ │ │ ├── long.wav │ │ │ └── mid.wav │ │ └── utils.py │ ├── cuda_decoders │ │ ├── README.md │ │ ├── build_tlg.sh │ │ ├── model_repo_cuda_decoder │ │ │ ├── attention_rescoring │ │ │ │ ├── 1 │ │ │ │ │ └── .gitkeep │ │ │ │ └── config.pbtxt.template │ │ │ ├── decoder │ │ │ │ ├── 1 │ │ │ │ │ └── .gitkeep │ │ │ │ └── config.pbtxt.template │ │ │ ├── encoder │ │ │ │ ├── 1 │ │ │ │ │ └── .gitkeep │ │ │ │ └── config.pbtxt.template │ │ │ ├── feature_extractor │ │ │ │ ├── 1 │ │ │ │ │ └── model.py │ │ │ │ └── config.pbtxt.template │ │ │ └── scoring │ │ │ │ ├── 1 │ │ │ │ ├── decoder.py │ │ │ │ ├── frame_reducer.py │ │ │ │ ├── lang │ │ │ │ │ └── .gitkeep │ │ │ │ ├── model.py │ │ │ │ └── wfst_decoding_config.yaml │ │ │ │ └── config.pbtxt.template │ │ ├── model_repo_stateful_cuda_decoder │ │ │ ├── encoder │ │ │ │ ├── 1 │ │ │ │ │ ├── .gitignore │ │ │ │ │ └── .gitkeep │ │ │ │ └── config_template.pbtxt │ │ │ ├── feature_extractor │ │ │ │ ├── 1 │ │ │ │ │ └── model.py │ │ │ │ └── config_template.pbtxt │ │ │ ├── scoring │ │ │ │ ├── 1 │ │ │ │ │ ├── decoder.py │ │ │ │ │ ├── frame_reducer.py │ │ │ │ │ ├── lang │ │ │ │ │ │ └── .gitkeep │ │ │ │ │ ├── model.py │ │ │ │ │ └── wfst_decoding_config.yaml │ │ │ │ └── config_template.pbtxt │ │ │ └── streaming_wenet │ │ │ │ ├── 1 │ │ │ │ ├── .gitignore │ │ │ │ └── .gitkeep │ │ │ │ └── config_template.pbtxt │ │ ├── requirements.txt │ │ ├── run.sh │ │ └── run_streaming.sh │ ├── model_repo │ │ ├── attention_rescoring │ │ │ ├── 1 │ │ │ │ └── .gitignore │ │ │ └── config_template.pbtxt │ │ ├── decoder │ │ │ ├── 1 │ │ │ │ └── .gitignore │ │ │ ├── config_template.pbtxt │ │ │ └── config_template2.pbtxt │ │ ├── encoder │ │ │ ├── 1 │ │ │ │ └── .gitignore │ │ │ └── config_template.pbtxt │ │ ├── feature_extractor │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ └── config_template.pbtxt │ │ └── scoring │ │ │ ├── 1 │ │ │ └── model.py │ │ │ ├── config_template.pbtxt │ │ │ └── hotwords.yaml │ ├── model_repo_stateful │ │ ├── decoder │ │ │ ├── 1 │ │ │ │ └── .gitignore │ │ │ ├── config_template.pbtxt │ │ │ └── config_template2.pbtxt │ │ ├── encoder │ │ │ ├── 1 │ │ │ │ └── .gitignore │ │ │ ├── config_template.pbtxt │ │ │ └── config_template2.pbtxt │ │ ├── feature_extractor │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ └── config_template.pbtxt │ │ ├── streaming_wenet │ │ │ ├── 1 │ │ │ │ └── .gitignore │ │ │ └── config_template.pbtxt │ │ └── wenet │ │ │ ├── 1 │ │ │ ├── model.py │ │ │ └── wenet_onnx_model.py │ │ │ ├── config_template.pbtxt │ │ │ └── hotwords.yaml │ ├── scripts │ │ ├── benchmark_onnx_throughput.py │ │ ├── compute_hotwords_f1.py │ │ ├── convert.py │ │ ├── convert_start_server.sh │ │ └── run_qa.sh │ ├── tensorrt │ │ ├── LayerNormPlugin │ │ │ ├── LayerNormPlugin.cu │ │ │ ├── LayerNormPlugin.h │ │ │ ├── Makefile │ │ │ └── testLayerNormPlugin.py │ │ ├── README.md │ │ ├── export_streaming_conformer_trt.py │ │ ├── model_repo_stateful_trt │ │ │ ├── decoder │ │ │ │ ├── 1 │ │ │ │ │ ├── .gitignore │ │ │ │ │ └── .gitkeep │ │ │ │ ├── config_template.pbtxt │ │ │ │ └── config_template2.pbtxt │ │ │ ├── encoder │ │ │ │ ├── 1 │ │ │ │ │ ├── .gitignore │ │ │ │ │ └── .gitkeep │ │ │ │ └── config_template.pbtxt │ │ │ ├── feature_extractor │ │ │ │ ├── 1 │ │ │ │ │ └── model.py │ │ │ │ └── config_template.pbtxt │ │ │ ├── streaming_wenet │ │ │ │ ├── 1 │ │ │ │ │ ├── .gitignore │ │ │ │ │ └── .gitkeep │ │ │ │ └── config_template.pbtxt │ │ │ └── wenet │ │ │ │ ├── 1 │ │ │ │ ├── model.py │ │ │ │ └── wenet_onnx_model.py │ │ │ │ └── config_template.pbtxt │ │ ├── replace_layernorm.py │ │ ├── requirements.txt │ │ └── run_streaming_small_model.sh │ ├── tensorrt_fastertransformer │ │ ├── README.md │ │ ├── decoder_plugin.JPG │ │ ├── encoder_plugin.JPG │ │ ├── extract_weights.py │ │ ├── model_repo_ft │ │ │ ├── attention_rescoring │ │ │ │ └── config.pbtxt.template │ │ │ ├── decoder │ │ │ │ ├── 1 │ │ │ │ │ └── .gitkeep │ │ │ │ └── config.pbtxt.template │ │ │ ├── encoder │ │ │ │ ├── 1 │ │ │ │ │ └── .gitkeep │ │ │ │ └── config.pbtxt.template │ │ │ ├── feature_extractor │ │ │ │ ├── 1 │ │ │ │ │ └── model.py │ │ │ │ └── config.pbtxt.template │ │ │ └── scoring │ │ │ │ ├── 1 │ │ │ │ └── model.py │ │ │ │ └── config.pbtxt.template │ │ ├── replace_plugin.py │ │ ├── requirements.txt │ │ ├── run.sh │ │ ├── run_large.sh │ │ └── utils.py │ └── test.gif ├── horizonbpu │ ├── .gitignore │ ├── CMakeLists.txt │ ├── README.md │ ├── api │ ├── bin │ ├── bpu │ │ ├── CMakeLists.txt │ │ ├── bpu_asr_model.cc │ │ └── bpu_asr_model.h │ ├── cmake │ ├── decoder │ ├── frontend │ ├── kaldi │ ├── patch │ ├── post_processor │ ├── test │ ├── toolchains │ ├── utils │ └── websocket ├── ios │ ├── CMakeLists.txt │ ├── README.md │ ├── WenetDemo │ │ ├── WenetDemo.xcodeproj │ │ │ ├── project.pbxproj │ │ │ └── project.xcworkspace │ │ │ │ ├── contents.xcworkspacedata │ │ │ │ └── xcshareddata │ │ │ │ └── IDEWorkspaceChecks.plist │ │ └── WenetDemo │ │ │ ├── AppDelegate.swift │ │ │ ├── Assets.xcassets │ │ │ ├── AccentColor.colorset │ │ │ │ └── Contents.json │ │ │ ├── AppIcon.appiconset │ │ │ │ └── Contents.json │ │ │ └── Contents.json │ │ │ ├── Base.lproj │ │ │ ├── LaunchScreen.storyboard │ │ │ └── Main.storyboard │ │ │ ├── Info.plist │ │ │ ├── SceneDelegate.swift │ │ │ ├── ViewController.swift │ │ │ ├── model │ │ │ └── .gitkeep │ │ │ └── wenet │ │ │ ├── WenetDemo-Bridging-Header.h │ │ │ ├── wenet.h │ │ │ └── wenet.mm │ ├── build │ │ └── Podfile │ ├── cmake │ ├── decoder │ ├── frontend │ ├── kaldi │ ├── patch │ ├── post_processor │ ├── test │ ├── toolchains │ └── utils ├── ipex │ ├── .gitignore │ ├── CMakeLists.txt │ ├── README.md │ ├── api │ ├── bin │ ├── cmake │ ├── decoder │ ├── docker │ │ └── Dockerfile │ ├── env_checking.sh │ ├── frontend │ ├── grpc │ ├── http │ ├── kaldi │ ├── patch │ ├── post_processor │ ├── test │ ├── utils │ ├── web │ └── websocket ├── kunlun │ ├── .gitignore │ ├── CMakeLists.txt │ ├── README.md │ ├── README_EN.md │ ├── api │ ├── bin │ ├── cmake │ ├── compile.sh │ ├── decoder │ ├── frontend │ ├── grpc │ ├── kaldi │ ├── patch │ ├── post_processor │ ├── test │ ├── utils │ ├── websocket │ └── xpu │ │ ├── CMakeLists.txt │ │ ├── conformer_test.cpp │ │ ├── xpu_asr_model.cc │ │ ├── xpu_asr_model.h │ │ ├── xpu_conformer.cpp │ │ ├── xpu_conformer.h │ │ ├── xpu_util.cpp │ │ └── xpu_util.h ├── libtorch │ ├── .gitignore │ ├── CMakeLists.txt │ ├── README.md │ ├── README_CN.md │ ├── api │ ├── bin │ ├── cmake │ ├── decoder │ ├── docker │ │ └── Dockerfile │ ├── frontend │ ├── grpc │ ├── http │ ├── kaldi │ ├── patch │ ├── post_processor │ ├── test │ ├── utils │ ├── web │ │ ├── app.py │ │ ├── static │ │ │ ├── css │ │ │ │ ├── font-awesome.min.css │ │ │ │ └── style.css │ │ │ ├── favicon.ico │ │ │ ├── fonts │ │ │ │ ├── FontAwesome.otf │ │ │ │ ├── fontawesome-webfont.eot │ │ │ │ ├── fontawesome-webfont.svg │ │ │ │ ├── fontawesome-webfont.ttf │ │ │ │ ├── fontawesome-webfont.woff │ │ │ │ └── fontawesome-webfont.woff2 │ │ │ ├── image │ │ │ │ ├── qrcode-enterprise.png │ │ │ │ ├── qrcode-official-account.png │ │ │ │ ├── voice-dictation.svg │ │ │ │ └── voice-pic.png │ │ │ └── js │ │ │ │ ├── SoundRecognizer.js │ │ │ │ ├── jquery-3.2.1.min.js │ │ │ │ └── recorder │ │ │ │ ├── engine │ │ │ │ ├── mp3.js │ │ │ │ ├── pcm.js │ │ │ │ └── wav.js │ │ │ │ ├── extensions │ │ │ │ ├── frequency.histogram.view.js │ │ │ │ └── lib.fft.js │ │ │ │ └── recorder-core.js │ │ └── templates │ │ │ └── index.html │ └── websocket ├── onnxruntime │ ├── .gitignore │ ├── CMakeLists.txt │ ├── README.md │ ├── api │ ├── bin │ ├── cmake │ ├── decoder │ ├── frontend │ ├── grpc │ ├── kaldi │ ├── patch │ ├── post_processor │ ├── test │ ├── utils │ └── websocket ├── openvino │ ├── CMakeLists.txt │ ├── README.md │ ├── api │ ├── bin │ ├── cmake │ ├── decoder │ ├── frontend │ ├── kaldi │ ├── ov │ │ ├── CMakeLists.txt │ │ ├── ov_asr_model.cc │ │ └── ov_asr_model.h │ ├── patch │ ├── post_processor │ ├── test │ ├── utils │ └── websocket ├── raspberrypi │ ├── .gitignore │ ├── CMakeLists.txt │ ├── README.md │ ├── api │ ├── bin │ ├── cmake │ ├── decoder │ ├── frontend │ ├── kaldi │ ├── patch │ ├── post_processor │ ├── test │ ├── toolchains │ └── utils └── web │ ├── README.md │ ├── app.py │ └── requirements.txt ├── setup.cfg ├── setup.py ├── test ├── resources │ ├── aishell-BAC009S0724W0121.wav │ ├── aishell2.words.txt │ ├── dataset │ │ ├── aishell-BAC009S0724W0121.wav │ │ ├── data.list │ │ ├── data.shards.list │ │ ├── librispeech-1995-1837-0001.wav │ │ ├── shards │ │ │ └── shards_000000000.tar │ │ ├── text │ │ └── wav.scp │ ├── global_cmvn │ ├── librispeech-1995-1837-0001.wav │ ├── librispeech.train_960_unigram5000.bpemodel │ ├── librispeech.words.txt │ ├── non-linguistic-symbols.invalid │ ├── non-linguistic-symbols.valid │ ├── paraformer.seg_dict.txt │ └── paraformer.words.txt ├── test_file_utils.py ├── tools │ └── test_make_shard.py └── wenet │ ├── dataset │ ├── test_datapipes.py │ ├── test_dataset.py │ └── test_processor.py │ ├── models │ ├── paraformer │ │ └── test_paraformer.py │ ├── transformer │ │ ├── test_attention.py │ │ └── test_grad_ckpt.py │ └── whisper │ │ └── test_whisper.py │ ├── text │ ├── test_bpe_tokenizer.py │ ├── test_char_tokenizer.py │ ├── test_hugging_face_tokenizer.py │ ├── test_paraformer_tokenizer.py │ ├── test_parallel.py │ └── test_whisper_tokenizer.py │ └── utils │ ├── test_init_model.py │ └── test_init_tokenizer.py ├── tools ├── alignment.sh ├── analyze_dataset.py ├── cmvn_kaldi2json.py ├── combine_data.sh ├── compute-cer.py ├── compute-wer.py ├── compute_cmvn_stats.py ├── compute_fbank_feats.py ├── compute_shard_cmvn_stats.py ├── copy_data_dir.sh ├── data │ ├── remove_dup_utts.sh │ └── split_scp.pl ├── decode.sh ├── extract_shard_data.py ├── feat_to_shape.sh ├── filter_scp.pl ├── fix_data_dir.sh ├── flake8_hook.py ├── format_data.sh ├── fst │ ├── add_lex_disambig.pl │ ├── compile_lexicon_token_fst.sh │ ├── ctc_token_fst.py │ ├── ctc_token_fst_compact.py │ ├── ctc_token_fst_corrected.py │ ├── eps2disambig.pl │ ├── make_lexicon_fst.pl │ ├── make_tlg.sh │ ├── prepare_dict.py │ ├── remove_oovs.pl │ ├── rnnt_token_fst.py │ └── s2eps.pl ├── git-pre-commit ├── install_srilm.sh ├── k2 │ ├── make_hlg.sh │ ├── prepare_char.py │ └── prepare_mmi.sh ├── latency_metrics.py ├── make_raw_list.py ├── make_shard_list.py ├── merge_scp2txt.py ├── onnx2horizonbin.py ├── parse_options.sh ├── perturb_data_dir_speed.sh ├── reduce_data_dir.sh ├── remove_longshortdata.py ├── segment.py ├── setup_anaconda.sh ├── sph2wav.sh ├── spk2utt_to_utt2spk.pl ├── spm_decode ├── spm_encode ├── spm_train ├── ssh_launcher.py ├── subset_data_dir.sh ├── subset_scp.pl ├── sym2int.pl ├── text2token.py ├── utt2spk_to_spk2utt.pl ├── validate_data_dir.sh ├── validate_dict_dir.pl ├── validate_text.pl ├── wav2dur.py ├── wav_to_duration.sh └── websocket │ └── performance-ws.py └── wenet ├── README.md ├── __init__.py ├── bin ├── __init__.py ├── alignment.py ├── average_model.py ├── export_ipex.py ├── export_jit.py ├── export_onnx_bpu.py ├── export_onnx_cpu.py ├── export_onnx_gpu.py ├── recognize.py ├── recognize_onnx_gpu.py └── train.py ├── cli ├── __init__.py ├── hub.py ├── model.py ├── punc_model.py └── transcribe.py ├── dataset ├── __init__.py ├── datapipes.py ├── dataset.py ├── deprecated │ ├── __init__.py │ ├── dataset.py │ └── processor.py ├── kaldi_io.py ├── processor.py └── wav_distortion.py ├── models ├── __init__.py ├── branchformer │ ├── __init__.py │ ├── cgmlp.py │ ├── encoder.py │ └── encoder_layer.py ├── ctl_model │ ├── __init__.py │ ├── asr_model_ctl.py │ └── encoder.py ├── e_branchformer │ ├── __init__.py │ ├── encoder.py │ └── encoder_layer.py ├── efficient_conformer │ ├── __init__.py │ ├── attention.py │ ├── convolution.py │ ├── encoder.py │ ├── encoder_layer.py │ └── subsampling.py ├── finetune │ ├── __init__.py │ └── lora │ │ ├── __init__.py │ │ ├── config.yaml │ │ ├── layers.py │ │ └── utils.py ├── firered │ ├── __init__.py │ ├── attention.py │ ├── convert_FireRed_AED_L_to_wenet_config_and_ckpt.py │ ├── encoder.py │ ├── encoder_layer.py │ ├── model.py │ └── subsampling.py ├── k2 │ ├── __init__.py │ └── model.py ├── paraformer │ ├── __init__.py │ ├── attention.py │ ├── cif.py │ ├── convert_paraformer_to_wenet_config_and_ckpt.py │ ├── embedding.py │ ├── layers.py │ ├── paraformer.py │ ├── search.py │ └── subsampling.py ├── squeezeformer │ ├── __init__.py │ ├── attention.py │ ├── conv2d.py │ ├── convolution.py │ ├── encoder.py │ ├── encoder_layer.py │ ├── positionwise_feed_forward.py │ └── subsampling.py ├── ssl │ ├── __init__.py │ ├── bestrq │ │ ├── __init__.py │ │ ├── bestrq_model.py │ │ └── mask.py │ ├── init_dataset.py │ ├── init_model.py │ ├── w2vbert │ │ ├── __init__.py │ │ ├── convert_w2vbert_to_wenet_config_and_ckpt.py │ │ └── w2vbert_model.py │ └── wav2vec2 │ │ ├── __init__.py │ │ ├── quantizer.py │ │ └── wav2vec2_model.py ├── transducer │ ├── __init__.py │ ├── joint.py │ ├── predictor.py │ ├── search │ │ ├── __init__.py │ │ ├── greedy_search.py │ │ └── prefix_beam_search.py │ └── transducer.py ├── transformer │ ├── __init__.py │ ├── asr_model.py │ ├── attention.py │ ├── cmvn.py │ ├── convolution.py │ ├── ctc.py │ ├── decoder.py │ ├── decoder_layer.py │ ├── embedding.py │ ├── encoder.py │ ├── encoder_layer.py │ ├── label_smoothing_loss.py │ ├── norm.py │ ├── positionwise_feed_forward.py │ ├── search.py │ ├── subsampling.py │ └── swish.py └── whisper │ ├── __init__.py │ ├── convert_whisper_to_wenet_config_and_ckpt.py │ └── whisper.py ├── text ├── __init__.py ├── base_tokenizer.py ├── bpe_tokenizer.py ├── char_tokenizer.py ├── hugging_face_tokenizer.py ├── paraformer_tokenizer.py ├── tokenize_utils.py └── whisper_tokenizer.py └── utils ├── __init__.py ├── checkpoint.py ├── class_utils.py ├── cmvn.py ├── common.py ├── config.py ├── context_graph.py ├── ctc_utils.py ├── executor.py ├── file_utils.py ├── fsdp_utils.py ├── init_dataset.py ├── init_model.py ├── init_tokenizer.py ├── mask.py ├── rope_utils.py ├── scheduler.py └── train_utils.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | select = B,C,E,F,P,T4,W,B9 3 | max-line-length = 80 4 | # C408 ignored because we like the dict keyword argument syntax 5 | # E501 is not flexible enough, we're using B950 instead 6 | ignore = 7 | E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303, 8 | # shebang has extra meaning in fbcode lints, so I think it's not worth trying 9 | # to line this up with executable bit 10 | EXE001, EXE002, 11 | # these ignores are from flake8-bugbear; please fix! 12 | B007,B008,B905 13 | # these ignores are from flake8-comprehensions; please fix! 14 | C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415 15 | exclude = compute-wer.py,kaldi_io.py,__torch__,docs/conf.py 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/runtime.yml: -------------------------------------------------------------------------------- 1 | name: Build Runtime 2 | 3 | on: 4 | workflow_dispatch: 5 | pull_request: 6 | paths: 7 | - 'runtime/**' 8 | 9 | env: 10 | RUNTIME_DIR: runtime/libtorch 11 | FC_BASE_DIR: runtime/libtorch/fc_base 12 | 13 | jobs: 14 | build: 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | os: [macos-latest, ubuntu-latest] 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Cache FC Base 23 | uses: actions/cache@v3 24 | with: 25 | path: ${{ env.FC_BASE_DIR }} 26 | key: ${{ runner.os }}-fc-base 27 | 28 | - uses: hendrikmuhs/ccache-action@v1.2 29 | with: 30 | key: ${{ runner.os }}-build 31 | 32 | - name: Build 33 | run: | 34 | export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" 35 | cd ${{ env.RUNTIME_DIR }} 36 | cmake -B build -DCMAKE_BUILD_TYPE=Release 37 | cmake --build build -j$(nproc) 38 | -------------------------------------------------------------------------------- /.github/workflows/stale-issues.yml: -------------------------------------------------------------------------------- 1 | name: Close Stale Issues 2 | on: 3 | schedule: 4 | - cron: '0 0 * * *' 5 | 6 | jobs: 7 | close-stale-issues: 8 | if: github.repository == 'wenet-e2e/wenet' 9 | runs-on: ubuntu-latest 10 | permissions: 11 | issues: write 12 | steps: 13 | - name: Check for Stale Issues 14 | uses: actions/stale@v5 15 | 16 | - name: Close Stale Issues 17 | uses: actions/stale@v5 18 | with: 19 | stale-issue-message: 'This issue has been automatically closed due to inactivity.' 20 | close-issue-message: "This issue was closed because it has been inactive for 7 days 21 | since being marked as stale. Please reopen if you'd like to work on this further." 22 | days-before-stale: 60 23 | days-before-close: 7 24 | stale-issue-label: stale 25 | repo-token: ${{ secrets.GITHUB_TOKEN }} 26 | -------------------------------------------------------------------------------- /.github/workflows/windows.yml: -------------------------------------------------------------------------------- 1 | name: Build Windows Runtime 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | env: 7 | RUNTIME_DIR: runtime/libtorch 8 | 9 | jobs: 10 | build: 11 | runs-on: windows-latest 12 | defaults: 13 | run: 14 | shell: bash 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: Build 18 | run: | 19 | cd ${{ env.RUNTIME_DIR }} 20 | cmake -B build -DGRAPH_TOOLS=ON -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF 21 | cmake --build build --config Release -j$(nproc) 22 | 23 | - name: Prepare Release Binary 24 | run: | 25 | cd ${{ env.RUNTIME_DIR }} 26 | mkdir -p wenet/kaldi 27 | cp build/*.dll wenet 28 | cp build/api/Release/*.dll wenet 29 | cp build/bin/Release/*.exe wenet 30 | cp build/kaldi/Release/*.exe wenet/kaldi 31 | 32 | - name: Upload WeNet Binary 33 | uses: actions/upload-artifact@v3 34 | with: 35 | name: release-wenet-binary 36 | path: ${{ env.RUNTIME_DIR }}/wenet 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Visual Studio Code files 7 | .vscode 8 | .vs 9 | 10 | # PyCharm files 11 | .idea 12 | 13 | # Eclipse Project settings 14 | *.*project 15 | .settings 16 | 17 | # Sublime Text settings 18 | *.sublime-workspace 19 | *.sublime-project 20 | 21 | # Editor temporaries 22 | *.swn 23 | *.swo 24 | *.swp 25 | *.swm 26 | *~ 27 | 28 | # IPython notebook checkpoints 29 | .ipynb_checkpoints 30 | 31 | # macOS dir files 32 | .DS_Store 33 | 34 | exp 35 | data 36 | raw_wav 37 | tensorboard 38 | **/*build* 39 | 40 | # protoc output files 41 | runtime/core/grpc/wenet.grpc.pb.cc 42 | runtime/core/grpc/wenet.grpc.pb.h 43 | runtime/core/grpc/wenet.pb.cc 44 | runtime/core/grpc/wenet.pb.h 45 | 46 | # Clangd files 47 | .cache 48 | compile_commands.json 49 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.5.0 4 | hooks: 5 | - id: trailing-whitespace 6 | exclude: 'test/resources/.*' 7 | - repo: https://github.com/pre-commit/mirrors-yapf 8 | rev: 'v0.32.0' 9 | hooks: 10 | - id: yapf 11 | - repo: https://github.com/pycqa/flake8 12 | rev: '3.8.2' 13 | hooks: 14 | - id: flake8 15 | - repo: https://github.com/pre-commit/mirrors-clang-format 16 | rev: 'v17.0.6' 17 | hooks: 18 | - id: clang-format 19 | args: ['--style=file'] 20 | exclude: 'runtime/ios/WenetDemo/WenetDemo/wenet/.*\.h$|.*\.(json|java|js|m|mm|proto)' 21 | - repo: https://github.com/cpplint/cpplint 22 | rev: '1.6.1' 23 | hooks: 24 | - id: cpplint 25 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing guidelines 2 | 3 | ## Pre-commit tidy/linting hook 4 | 5 | You'll need to install flake8 first. 6 | 7 | `pip install flake8==3.8.2` 8 | 9 | We use flake8 to perform additional formatting and semantic checking of code. 10 | We provide a pre-commit git hook for performing these checks, before a commit 11 | is created: 12 | 13 | ```bash 14 | ln -s ../../tools/git-pre-commit .git/hooks/pre-commit 15 | ``` 16 | 17 | You have to execute above command in wenet project root directory. 18 | After that, each commit will be checked by flake8. 19 | 20 | If you do not set pre-commit, just run `flake8` in wenet project root directory 21 | and fix all the problems. 22 | 23 | ## Github checks 24 | 25 | After a pull request is submitted, some checks will run to check your code style. 26 | 27 | Below is an example where some checks fail. 28 | 29 | ![github checks](docs/images/checks.png) 30 | 31 | You need to click the details to see the detailed info like the example below. 32 | 33 | ![github checks](docs/images/check_detail.png) 34 | 35 | You have to fix all style problems according to the detailed info. 36 | 37 | -------------------------------------------------------------------------------- /CPPLINT.cfg: -------------------------------------------------------------------------------- 1 | root=runtime/core 2 | filter=-build/c++11 3 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _gen/ 2 | _build/ 3 | build/ 4 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SPHINXPROJ = wenet 9 | SOURCEDIR = . 10 | BUILDDIR = _build 11 | 12 | # Put it first so that "make" without argument is like "make help". 13 | help: 14 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 15 | 16 | .PHONY: help Makefile 17 | 18 | api: 19 | sphinx-apidoc -f --separate --module-first -d 2 -o ./python_api ../wenet 20 | sed -i 's:^wenet:Python API Reference:g' ./python_api/modules.rst 21 | sed -i 's:^=====:====================:g' ./python_api/modules.rst 22 | 23 | # Catch-all target: route all unknown targets to Sphinx using the new 24 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 25 | %: Makefile 26 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 27 | -------------------------------------------------------------------------------- /docs/images/UIO_dataflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/UIO_dataflow.png -------------------------------------------------------------------------------- /docs/images/UIO_system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/UIO_system.png -------------------------------------------------------------------------------- /docs/images/UIO_wenetspeech_cer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/UIO_wenetspeech_cer.png -------------------------------------------------------------------------------- /docs/images/check_detail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/check_detail.png -------------------------------------------------------------------------------- /docs/images/checks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/checks.png -------------------------------------------------------------------------------- /docs/images/context_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/context_graph.png -------------------------------------------------------------------------------- /docs/images/lm_system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/lm_system.png -------------------------------------------------------------------------------- /docs/images/runtime_android.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/runtime_android.gif -------------------------------------------------------------------------------- /docs/images/runtime_server.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/runtime_server.gif -------------------------------------------------------------------------------- /docs/images/runtime_web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/runtime_web.png -------------------------------------------------------------------------------- /docs/images/subsampling_overalp.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/subsampling_overalp.gif -------------------------------------------------------------------------------- /docs/images/u2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/u2.gif -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. wenet documentation master file, created by 2 | sphinx-quickstart on Thu Dec 3 11:43:53 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to wenet's documentation! 7 | ================================= 8 | 9 | 10 | wenet is an tansformer-based end-to-end ASR toolkit. 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | :caption: Contents: 15 | 16 | ./python_package.md 17 | ./train.rst 18 | ./production.rst 19 | ./reference.rst 20 | 21 | Indices and tables 22 | ================== 23 | 24 | * :ref:`genindex` 25 | * :ref:`modindex` 26 | * :ref:`search` 27 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/papers.md: -------------------------------------------------------------------------------- 1 | ## Papers 2 | 3 | * [WeNet: Production Oriented Streaming and Non-streaming End-to-End Speech Recognition Toolkit](https://arxiv.org/pdf/2102.01547.pdf), accepted by InterSpeech 2021. 4 | * [WeNet 2.0: More Productive End-to-End Speech Recognition Toolkit](https://arxiv.org/pdf/2203.15455.pdf), accepted by InterSpeech 2022. 5 | 6 | -------------------------------------------------------------------------------- /docs/production.rst: -------------------------------------------------------------------------------- 1 | Production Runtime 2 | ================== 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: Contents: 7 | 8 | ./lm.md 9 | ./context.md 10 | ./runtime.md 11 | ./jit_in_wenet.md 12 | -------------------------------------------------------------------------------- /docs/python_package.md: -------------------------------------------------------------------------------- 1 | # Python Package 2 | 3 | 4 | ## Install 5 | 6 | ``` sh 7 | pip install git+https://github.com/wenet-e2e/wenet.git 8 | ``` 9 | 10 | ## Development Install 11 | 12 | ``` sh 13 | git clone https://github.com/wenet-e2e/wenet.git 14 | cd wenet 15 | pip install -e . 16 | ``` 17 | 18 | 19 | ## Command line Usage 20 | 21 | ``` sh 22 | wenet --language chinese audio.wav 23 | ``` 24 | 25 | You can specify the following parameters. 26 | 27 | * `-l` or `--language`: chinese/english are supported now. 28 | * `-m` or `--model_dir`: your own model dir 29 | * `-g` or `--gpu`: the device id of gpu, default value -1 represents for cpu. 30 | * `-t` or `--show_tokens_info`: show the token level information such as timestamp, confidence, etc. 31 | * `--align`: force align the input audio and transcript 32 | * `--label`: the input label to align 33 | * `--paraformer`: use the best Chinese model 34 | * `--device`: specify the backend accelerator (cuda/npu/cpu) 35 | 36 | ## Python Programming Usage 37 | 38 | ``` python 39 | import wenet 40 | 41 | model = wenet.load_model('chinese') 42 | # or model = wenet.load_model(model_dir='xxx') 43 | result = model.transcribe('audio.wav') 44 | print(result['text']) 45 | ``` 46 | -------------------------------------------------------------------------------- /docs/reference.rst: -------------------------------------------------------------------------------- 1 | Reference 2 | ========= 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: Contents: 7 | 8 | ./papers.md 9 | ./python_api/modules.rst 10 | 11 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | jinja2 2 | nbsphinx 3 | sphinx 4 | recommonmark 5 | sphinx-markdown-tables 6 | sphinx-rtd-theme 7 | torch 8 | torchaudio 9 | typeguard 10 | -------------------------------------------------------------------------------- /docs/train.rst: -------------------------------------------------------------------------------- 1 | How to train models? 2 | ==================== 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: Contents: 7 | 8 | ./tutorial_librispeech.md 9 | ./tutorial_aishell.md 10 | ./pretrained_models.md 11 | ./UIO.md 12 | -------------------------------------------------------------------------------- /examples/aishell/NST/local/NST_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/examples/aishell/NST/local/NST_plot.png -------------------------------------------------------------------------------- /examples/aishell/NST/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/server/x86/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/aishell/paraformer/local: -------------------------------------------------------------------------------- 1 | ../whisper/local -------------------------------------------------------------------------------- /examples/aishell/paraformer/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/aishell/paraformer/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/aishell/paraformer/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/aishell/rnnt/local: -------------------------------------------------------------------------------- 1 | ../s0/local -------------------------------------------------------------------------------- /examples/aishell/rnnt/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/aishell/rnnt/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/aishell/rnnt/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/aishell/s0/UIO_RESULT.md: -------------------------------------------------------------------------------- 1 | # Benchmark on Conformer 2 | 3 | | IO | CER | 4 | |--------------|-------| 5 | | Old | 4.61 | 6 | | UIO(Raw) | 4.63 | 7 | | UIO(Shards) | 4.67 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /examples/aishell/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/aishell/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools/ -------------------------------------------------------------------------------- /examples/aishell/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet/ -------------------------------------------------------------------------------- /examples/aishell/whisper/conf/ds_stage1.json: -------------------------------------------------------------------------------- 1 | { 2 | "train_micro_batch_size_per_gpu": 1, 3 | "gradient_accumulation_steps": 1, 4 | "steps_per_print": 100, 5 | "gradient_clipping": 5, 6 | "fp16": { 7 | "enabled": false, 8 | "auto_cast": false, 9 | "loss_scale": 0, 10 | "initial_scale_power": 16, 11 | "loss_scale_window": 1000, 12 | "hysteresis": 2, 13 | "consecutive_hysteresis": false, 14 | "min_loss_scale": 1 15 | }, 16 | "bf16": { 17 | "enabled": true 18 | }, 19 | "zero_force_ds_cpu_optimizer": false, 20 | "zero_optimization": { 21 | "stage": 1, 22 | "offload_optimizer": { 23 | "device": "none", 24 | "pin_memory": true 25 | }, 26 | "allgather_partitions": true, 27 | "allgather_bucket_size": 5e8, 28 | "overlap_comm": true, 29 | "reduce_scatter": true, 30 | "reduce_bucket_size": 5e8, 31 | "contiguous_gradients" : true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /examples/aishell/whisper/conf/ds_stage2.json: -------------------------------------------------------------------------------- 1 | { 2 | "train_micro_batch_size_per_gpu": 1, 3 | "gradient_accumulation_steps": 1, 4 | "steps_per_print": 100, 5 | "gradient_clipping": 5, 6 | "fp16": { 7 | "enabled": false, 8 | "auto_cast": false, 9 | "loss_scale": 0, 10 | "initial_scale_power": 16, 11 | "loss_scale_window": 1000, 12 | "hysteresis": 2, 13 | "consecutive_hysteresis": false, 14 | "min_loss_scale": 1 15 | }, 16 | "bf16": { 17 | "enabled": true 18 | }, 19 | "zero_force_ds_cpu_optimizer": false, 20 | "zero_optimization": { 21 | "stage": 2, 22 | "offload_optimizer": { 23 | "device": "none", 24 | "pin_memory": true 25 | }, 26 | "allgather_partitions": true, 27 | "allgather_bucket_size": 5e8, 28 | "overlap_comm": false, 29 | "reduce_scatter": true, 30 | "reduce_bucket_size": 5e8, 31 | "contiguous_gradients" : true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /examples/aishell/whisper/conf/ds_stage3.json: -------------------------------------------------------------------------------- 1 | { 2 | "train_micro_batch_size_per_gpu": 1, 3 | "gradient_accumulation_steps": 1, 4 | "steps_per_print": 100, 5 | "gradient_clipping": 5, 6 | "fp16": { 7 | "enabled": false, 8 | "auto_cast": false, 9 | "loss_scale": 0, 10 | "initial_scale_power": 16, 11 | "loss_scale_window": 1000, 12 | "hysteresis": 2, 13 | "consecutive_hysteresis": false, 14 | "min_loss_scale": 1 15 | }, 16 | "bf16": { 17 | "enabled": true 18 | }, 19 | "zero_force_ds_cpu_optimizer": false, 20 | "zero_optimization": { 21 | "stage": 3, 22 | "offload_optimizer": { 23 | "device": "none", 24 | "pin_memory": true 25 | }, 26 | "offload_param": { 27 | "device": "none", 28 | "pin_memory": true 29 | }, 30 | "allgather_partitions": true, 31 | "allgather_bucket_size": 5e8, 32 | "overlap_comm": true, 33 | "reduce_scatter": true, 34 | "reduce_bucket_size": 5e8, 35 | "contiguous_gradients" : true, 36 | "stage3_max_live_parameters": 1e9, 37 | "stage3_max_reuse_distance": 1e9, 38 | "stage3_prefetch_bucket_size": 5e8, 39 | "stage3_param_persistence_threshold": 1e5 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /examples/aishell/whisper/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/aishell/whisper/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/aishell/whisper/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/aishell2/rnnt/README.md: -------------------------------------------------------------------------------- 1 | # Performance Record 2 | 3 | ## U2++ Conformer Result 4 | 5 | * Feature info: using fbank feature, dither, cmvn, oneline speed perturb 6 | * Training info: lr 0.001, dynamic batch with max_frames_in_batch 15000, 4 gpu, acc_grad 1, 130 epochs 7 | * Training weight info: transducer_weight 0.75, ctc_weight 0.1, reverse_weight 0.30, average_num 30 8 | * Predictor type: lstm 9 | 10 | | decoding mode/chunk size | full | 16 | 11 | |---------------------------|-------|-------| 12 | | rnnt greedy search | 6.44 | 7.09 | 13 | 14 | -------------------------------------------------------------------------------- /examples/aishell2/rnnt/local: -------------------------------------------------------------------------------- 1 | ../s0/local -------------------------------------------------------------------------------- /examples/aishell2/rnnt/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/aishell2/rnnt/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/aishell2/rnnt/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/aishell2/s0/local/word_segmentation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding=utf-8 3 | # Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG) 4 | # 2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU) 5 | # Apache 2.0 6 | 7 | from __future__ import print_function 8 | import sys 9 | import jieba 10 | 11 | if len(sys.argv) < 3: 12 | sys.stderr.write( 13 | "word_segmentation.py \n") 14 | exit(1) 15 | 16 | vocab_file = sys.argv[1] 17 | trans_file = sys.argv[2] 18 | 19 | jieba.set_dictionary(vocab_file) 20 | for line in open(trans_file, 'r', encoding='utf8'): 21 | key, trans = line.strip().split(' ', 1) 22 | words = jieba.cut(trans, 23 | HMM=False) # turn off new word discovery (HMM-based) 24 | new_line = key + '\t' + " ".join(words) 25 | print(new_line) 26 | -------------------------------------------------------------------------------- /examples/aishell2/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/aishell2/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools/ -------------------------------------------------------------------------------- /examples/aishell2/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet/ -------------------------------------------------------------------------------- /examples/aishell4/s0/local/spk2utt_to_utt2spk.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | while(<>){ 19 | @A = split(" ", $_); 20 | @A > 1 || die "Invalid line in spk2utt file: $_"; 21 | $s = shift @A; 22 | foreach $u ( @A ) { 23 | print "$u $s\n"; 24 | } 25 | } 26 | 27 | 28 | -------------------------------------------------------------------------------- /examples/aishell4/s0/local/text_format.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use warnings; #sed replacement for -w perl parameter 3 | # Copyright Chao Weng 4 | 5 | # normalizations for hkust trascript 6 | # see the docs/trans-guidelines.pdf for details 7 | 8 | while () { 9 | @A = split(" ", $_); 10 | if (@A == 1) { 11 | next; 12 | } 13 | print $_ 14 | } 15 | -------------------------------------------------------------------------------- /examples/aishell4/s0/local/utt2spk_to_spk2utt.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # converts an utt2spk file to a spk2utt file. 18 | # Takes input from the stdin or from a file argument; 19 | # output goes to the standard out. 20 | 21 | if ( @ARGV > 1 ) { 22 | die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt"; 23 | } 24 | 25 | while(<>){ 26 | @A = split(" ", $_); 27 | @A == 2 || die "Invalid line in utt2spk file: $_"; 28 | ($u,$s) = @A; 29 | if(!$seen_spk{$s}) { 30 | $seen_spk{$s} = 1; 31 | push @spklist, $s; 32 | } 33 | push (@{$spk_hash{$s}}, "$u"); 34 | } 35 | foreach $s (@spklist) { 36 | $l = join(' ',@{$spk_hash{$s}}); 37 | print "$s $l\n"; 38 | } 39 | -------------------------------------------------------------------------------- /examples/aishell4/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/aishell4/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/aishell4/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/chime4/s0/README.md: -------------------------------------------------------------------------------- 1 | # Performance Record 2 | 3 | ## Conformer Result 4 | 5 | * Feature info: dither + specaug + speed perturb 6 | * Training info: lr 0.0005, batch size 8, 1 gpu, acc_grad 4, 80 epochs 7 | * Decoding info: average_num 10 8 | 9 | | decoding mode | dt05_real_1ch | dt05_simu_1ch | et05_real_1ch | et05_simu_1ch | 10 | |:----------------------:|:-------------:|:-------------:|:-------------:|:-------------:| 11 | | ctc_prefix_beam_search | 19.06% | 21.17% | 28.39% | 29.16% | 12 | | attention_rescoring | 17.92% | 20.22% | 27.40% | 28.25% | 13 | -------------------------------------------------------------------------------- /examples/chime4/s0/local/chime4_format_dir.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # wujian@2020 4 | 5 | set -eu 6 | 7 | echo "$0: Formating chime4 data dir..." 8 | 9 | track=isolated_1ch_track 10 | data_dir=data/chime4 11 | 12 | mkdir -p $data_dir/{train,dev} 13 | 14 | cat $data_dir/tr05_{simu,real}_noisy/wav.scp $data_dir/tr05_orig_clean/wav.scp \ 15 | $data_dir/train_si200_wsj1_clean/wav.scp | sort -k1 > $data_dir/train/wav.scp 16 | cat $data_dir/tr05_{simu,real}_noisy/text $data_dir/tr05_orig_clean/text \ 17 | $data_dir/train_si200_wsj1_clean/text | sort -k1 > $data_dir/train/text 18 | 19 | cat $data_dir/dt05_{real,simu}_${track}/wav.scp | sort -k1 > $data_dir/dev/wav.scp 20 | cat $data_dir/dt05_{real,simu}_${track}/text | sort -k1 > $data_dir/dev/text 21 | 22 | echo "$0: Format $data_dir done" 23 | -------------------------------------------------------------------------------- /examples/chime4/s0/local/chime4_gen_wav.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # wujian@2020 4 | 5 | set -eu 6 | 7 | [ $# -ne 2 ] && echo "Script format error: $0 " && exit 0 8 | 9 | data_dir=$1 10 | dump_dir=$2 11 | 12 | mkdir -p $dump_dir 13 | 14 | num_utts=$(cat $data_dir/wav.scp | wc -l) 15 | echo "Orginal utterances (.wav + .wv1): $num_utts" 16 | 17 | # cat $data_dir/wav.scp | grep "sph2pipe" | \ 18 | # awk -v dir=$dump_dir '{printf("%s -f wav %s %s/%s.wav\n", $2, $5, dir, $1)}' | bash 19 | 20 | cat $data_dir/wav.scp | grep -v "sph2pipe" > $data_dir/raw_wav.scp 21 | find $dump_dir -name "*.wav" | awk -F '/' '{printf("%s %s\n", $NF, $0)}' | \ 22 | sed 's:\.wav::' > $data_dir/sph_wav.scp 23 | 24 | cat $data_dir/{raw_wav,sph_wav}.scp | sort -k1 > $data_dir/wav.scp 25 | num_utts=$(cat $data_dir/wav.scp | wc -l) 26 | echo "Wave utterances (.wav): $num_utts" 27 | 28 | echo "$0: Generate wav => $dump_dir done" 29 | -------------------------------------------------------------------------------- /examples/chime4/s0/local/flist2scp.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | # takes in a file list with lines like 19 | # /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1 20 | # and outputs an scp in kaldi format with lines like 21 | # 4k0c030a /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1 22 | # (the first thing is the utterance-id, which is the same as the basename of the file. 23 | 24 | 25 | while(<>){ 26 | m:^\S+/(\w+)\.[wW][vV]1$: || die "Bad line $_"; 27 | $id = $1; 28 | $id =~ tr/A-Z/a-z/; # Necessary because of weirdness on disk 13-16.1 (uppercase filenames) 29 | print "$id $_"; 30 | } 31 | -------------------------------------------------------------------------------- /examples/chime4/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/chime4/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/chime4/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/commonvoice/fr/README.md: -------------------------------------------------------------------------------- 1 | # Performance Record 2 | # Should be installed ffmpeg , pandas !!! 3 | ## Conformer Result 4 | 5 | * Feature info: dither + specaug + speed perturb 6 | * Training info: lr 0.0005, warmup_steps 20000 batch size 8, 3 gpu, 30 epochs 7 | * Decoding info: average_num 20 8 | 9 | 10 | 11 | | decoding mode | test (wer) | 12 | | :--------------------: | :---------: | 13 | | ctc_greedy_search | 16.12% | 14 | | ctc_prefix_beam_search | 16.07% | 15 | | attention | 13.56% | 16 | | attention_rescoring | 14.01% | -------------------------------------------------------------------------------- /examples/commonvoice/fr/local/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if [ $# -le 1 ]; then 3 | echo "Args_Error:Two parameters are required." 4 | exit 1; 5 | fi 6 | download_path=$1 7 | data_France=$2 8 | wget -O ${download_path}/tmp.zip https://mozilla-common-voice-datasets.s3.dualstack.us-west-2.amazonaws.com/cv-corpus-8.0-2022-01-19/cv-corpus-8.0-2022-01-19-fr.tar.gz 9 | tar -xvf ${download_path}/tmp.zip -C ${data_France} 10 | rm -rf ${download_path}/tmp.zip -------------------------------------------------------------------------------- /examples/commonvoice/fr/local/prepare_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if [ $# -le 0 ]; then 3 | echo "Argument should be France src directory, see ../run.sh for example." 4 | exit 1; 5 | fi 6 | dir=`pwd`/data 7 | local=`pwd`/local 8 | src_path=$1 9 | if [ ! -d ${dir} ]; then 10 | mkdir ${dir} 11 | else 12 | rm -rf ${dir} 13 | mkdir ${dir} 14 | fi 15 | 16 | for x in train dev test; do 17 | if [ ! ${dir}/${x} ]; then 18 | mkdir ${dir}/${x} 19 | else 20 | rm -rf ${dir}/${x} 21 | mkdir ${dir}/${x} 22 | fi 23 | done 24 | 25 | if [ ! -d ${src_path}/wavs ]; then 26 | mkdir ${src_path}/wavs 27 | fi 28 | for x in train dev test; do 29 | python3 ${local}/create_scp_text.py ${src_path} ${x} ${dir}/${x} 30 | done 31 | -------------------------------------------------------------------------------- /examples/commonvoice/fr/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/commonvoice/fr/tools: -------------------------------------------------------------------------------- 1 | ../../../tools/ -------------------------------------------------------------------------------- /examples/commonvoice/fr/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet/ -------------------------------------------------------------------------------- /examples/csj/s0/README.md: -------------------------------------------------------------------------------- 1 | # Performance Record 2 | 3 | ## Conformer Result Bidecoder (large) 4 | 5 | 6 | ## Conformer Result 7 | 8 | * Feature info: using fbank feature, cmvn, dither, online speed perturb 9 | * Training info: train_conformer.yaml, kernel size 15, lr 0.004, batch size 12, 8 gpu, acc_grad 1, 50 epochs, dither 0.0 10 | * Decoding info: ctc_weight 0.5, average_num 10 11 | 12 | 13 | | decoding mode | test1 | test2 | test3 | 14 | |----------------------------------|------------|------------|------------| 15 | | ctc greedy search | 7.94 | 5.29 | 6.10 | 16 | | ctc prefix beam search | 7.83+ | 5.28 | 6.08 | 17 | | attention decoder | 7.83 | 5.63 | 6.37 | 18 | | attention rescoring | 7.28+ | 4.81 | 5.44 | 19 | 20 | note that "+" means we removed two <0.1s wav files in test1 before decoding. 21 | 22 | 23 | 24 | 25 | ## Conformer U2++ Result 26 | 27 | 28 | ## Conformer U2 Result 29 | 30 | -------------------------------------------------------------------------------- /examples/csj/s0/csj_tools/wn.3.mincut.py: -------------------------------------------------------------------------------- 1 | import librosa 2 | # import os 3 | import sys 4 | 5 | 6 | def mincut(wavscpfn, minsec): 7 | outfn = wavscpfn + "_" + str(minsec) 8 | 9 | with open(outfn, 'w') as bw: 10 | with open(wavscpfn) as br: 11 | for aline in br.readlines(): 12 | aline = aline.strip() 13 | afn = aline.split('\t')[1] 14 | # print(afn) 15 | dur = librosa.get_duration(filename=afn) 16 | if dur >= minsec: 17 | bw.write(aline + '\n') 18 | 19 | 20 | # wn.3.mincut.py 21 | if __name__ == '__main__': 22 | if len(sys.argv) < 3: 23 | print('{} '.format(sys.argv[0])) 24 | exit() 25 | 26 | wavscpfn = sys.argv[1] 27 | minsec = float(sys.argv[2]) 28 | 29 | mincut(wavscpfn, minsec) 30 | -------------------------------------------------------------------------------- /examples/csj/s0/list_files/2ch.id.list: -------------------------------------------------------------------------------- 1 | D01F0002 2 | D01F0003 3 | D01F0023 4 | D01F0030 5 | D01F0046 6 | D01F0049 7 | D01F0055 8 | D01F0057 9 | D01M0005 10 | D01M0009 11 | D01M0012 12 | D01M0019 13 | D01M0020 14 | D01M0042 15 | D01M0043 16 | D01M0047 17 | D02F0015 18 | D02F0018 19 | D02F0025 20 | D02F0027 21 | D02F0031 22 | D02F0032 23 | D02F0033 24 | D02F0054 25 | D02M0014 26 | D02M0016 27 | D02M0024 28 | D02M0026 29 | D02M0028 30 | D02M0035 31 | D02M0039 32 | D02M0051 33 | D03F0001 34 | D03F0006 35 | D03F0008 36 | D03F0034 37 | D03F0036 38 | D03F0040 39 | D03F0045 40 | D03F0058 41 | D03M0004 42 | D03M0007 43 | D03M0013 44 | D03M0017 45 | D03M0037 46 | D03M0038 47 | D03M0048 48 | D03M0053 49 | D04F0011 50 | D04F0022 51 | D04F0029 52 | D04F0044 53 | D04F0050 54 | D04M0010 55 | D04M0021 56 | D04M0041 57 | D04M0052 58 | D04M0056 59 | -------------------------------------------------------------------------------- /examples/csj/s0/list_files/test.set.1.list: -------------------------------------------------------------------------------- 1 | A01M0097 2 | A04M0051 3 | A04M0121 4 | A03M0156 5 | A03M0112 6 | A01M0110 7 | A05M0011 8 | A03M0106 9 | A01M0137 10 | A04M0123 11 | 12 | -------------------------------------------------------------------------------- /examples/csj/s0/list_files/test.set.123.list: -------------------------------------------------------------------------------- 1 | A01M0097 2 | A04M0051 3 | A04M0121 4 | A03M0156 5 | A03M0112 6 | A01M0110 7 | A05M0011 8 | A03M0106 9 | A01M0137 10 | A04M0123 11 | 12 | A01F0063 13 | A01M0056 14 | A06F0135 15 | A02M0012 16 | A06M0064 17 | A01M0141 18 | A01F0034 19 | A03M0016 20 | A03F0072 21 | A01F0001 22 | 23 | S00F0066 24 | S00M0213 25 | S00M0070 26 | S00M0008 27 | S01F0105 28 | S00F0148 29 | S00F0019 30 | S00M0112 31 | S00F0152 32 | S00M0079 33 | 34 | -------------------------------------------------------------------------------- /examples/csj/s0/list_files/test.set.2.list: -------------------------------------------------------------------------------- 1 | A01F0063 2 | A01M0056 3 | A06F0135 4 | A02M0012 5 | A06M0064 6 | A01M0141 7 | A01F0034 8 | A03M0016 9 | A03F0072 10 | A01F0001 11 | 12 | -------------------------------------------------------------------------------- /examples/csj/s0/list_files/test.set.3.list: -------------------------------------------------------------------------------- 1 | S00F0066 2 | S00M0213 3 | S00M0070 4 | S00M0008 5 | S01F0105 6 | S00F0148 7 | S00F0019 8 | S00M0112 9 | S00F0152 10 | S00M0079 11 | 12 | -------------------------------------------------------------------------------- /examples/csj/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/csj/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/csj/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/gigaspeech/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/gigaspeech/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/gigaspeech/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/hkust/s0/README.md: -------------------------------------------------------------------------------- 1 | # Performance Record 2 | 3 | ## Conformer Result (Old IO) 4 | 5 | * Feature info: using fbank feature, with cmvn, with speed perturb. 6 | * Training info: lr 0.002, batch size 16, 1 machines, 1*4 = 4 gpu, acc_grad 4, 240 epochs, dither 0.1 7 | * Decoding info: ctc_weight 0.5, average_num 30 8 | 9 | | decoding mode | | 10 | |--------------------------|-------| 11 | | attention decoder | 21.9 | 12 | | ctc greedy search | 21.15 | 13 | | ctc prefix beam search | 21.13 | 14 | | attention rescoring | 20.47 | 15 | 16 | ## Conformer Result (New IO) 17 | 18 | * Feature info: using fbank feature, with cmvn, with speed perturb. 19 | * Training info: lr 0.002, batch size 16, 1 machines, 1*4 = 4 gpu, acc_grad 4, 133 epochs, dither 0.1 20 | * Decoding info: ctc_weight 0.5, average_num 30 21 | 22 | | decoding mode | | 23 | |--------------------------|-------| 24 | | attention decoder | 21.42 | 25 | | ctc greedy search | 21.16 | 26 | | ctc prefix beam search | 21.18 | 27 | | attention rescoring | 20.42 | 28 | -------------------------------------------------------------------------------- /examples/hkust/s0/conf/train_960_unigram5000.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/examples/hkust/s0/conf/train_960_unigram5000.model -------------------------------------------------------------------------------- /examples/hkust/s0/local/hkust_normalize.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use warnings; #sed replacement for -w perl parameter 3 | # Copyright Chao Weng 4 | 5 | # normalizations for hkust trascript 6 | # see the docs/trans-guidelines.pdf for details 7 | 8 | while () { 9 | @A = split(" ", $_); 10 | print "$A[0] "; 11 | for ($n = 1; $n < @A; $n++) { 12 | $a = $A[$n]; 13 | if (($a eq "{breath}")||($a eq "{cough}")||($a eq "{sneeze}") 14 | || ($a eq "{lipsmack}")) {next;} 15 | if (($a eq "{laugh}")) {next;} 16 | if (($a eq "")) {next;} 17 | $tmp = $a; 18 | if ($tmp =~ /[^.,?+-]{0,}[.,?+-]+/) { $tmp =~ s:([^.,?+-]{0,})[.,?+-]+:$1:g; } 19 | if ($tmp =~ /\~[A-Z]/) { $tmp =~ s:\~([A-Z]):$1:; } 20 | if ($tmp =~ /%\S/) { $tmp =~ s:%(\S):$1:; } 21 | if ($tmp =~ /[a-zA-Z]/) {$tmp=uc($tmp);} 22 | print "$tmp "; 23 | } 24 | print "\n"; 25 | } 26 | -------------------------------------------------------------------------------- /examples/hkust/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/hkust/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/hkust/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/librispeech/rnnt/README.md: -------------------------------------------------------------------------------- 1 | # Performance Record 2 | 3 | ## Conformer Bidecoder Transducer Result 4 | 5 | * Feature info: using fbank feature, dither, cmvn, online speed perturb 6 | * Training info: lr 0.001, dynamic batch with max_frames_in_batch 4000, 8 gpu, acc_grad 1, 60 epochs 7 | * Training weight info: transducer_weight 0.75, ctc_weight 0.1, reverse_weight 0.30, average_num 10 8 | * Predictor type: lstm 9 | 10 | | decoding mode | dev_clean | dev_other | test_clean | test_other | 11 | |-----------------------|------------|-----------|------------|------------| 12 | | rnnt_greedy_search | 3.42% | 8.99% | 3.56% | 9.15% | 13 | | rnnt_beam_search | 3.35% | 8.77% | 3.45% | 8.78% | 14 | | rnnt_beam_att_rescore | 3.25% | 8.66% | 3.41% | 8.68% | 15 | 16 | Pretrained model: https://huggingface.co/yuekai/wenet-asr-librispeech-conformer-transducer-mtl/blob/main/exp/conformer_transducer/avg_10.pt 17 | 18 | -------------------------------------------------------------------------------- /examples/librispeech/rnnt/local: -------------------------------------------------------------------------------- 1 | ../s0/local/ -------------------------------------------------------------------------------- /examples/librispeech/rnnt/path.sh: -------------------------------------------------------------------------------- 1 | ../s0/path.sh -------------------------------------------------------------------------------- /examples/librispeech/rnnt/tools: -------------------------------------------------------------------------------- 1 | ../../../tools/ -------------------------------------------------------------------------------- /examples/librispeech/rnnt/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet/ -------------------------------------------------------------------------------- /examples/librispeech/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/librispeech/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/librispeech/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/multi_cn/s0/conf/train_960_unigram5000.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/examples/multi_cn/s0/conf/train_960_unigram5000.model -------------------------------------------------------------------------------- /examples/multi_cn/s0/local/magicdata_badlist: -------------------------------------------------------------------------------- 1 | 16_4013_20170819121429.wav 2 | 18_1565_20170712000170.wav 3 | -------------------------------------------------------------------------------- /examples/multi_cn/s0/local/primewords_data_prep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2019 Xingyu Na 4 | # Apache 2.0 5 | 6 | . ./path.sh || exit 1; 7 | 8 | if [ $# != 2 ]; then 9 | echo "Usage: $0 " 10 | echo " $0 /export/a05/xna/data/primewords data/primewords" 11 | exit 1; 12 | fi 13 | 14 | corpus=$1/primewords_md_2018_set1 15 | data=$2 16 | 17 | if [ ! -d $corpus/audio_files ] || [ ! -f $corpus/set1_transcript.json ]; then 18 | echo "Error: $0 requires complete corpus" 19 | exit 1; 20 | fi 21 | 22 | echo "**** Creating primewords data folder ****" 23 | 24 | mkdir -p $data/train 25 | 26 | # find wav audio file for train 27 | 28 | find $corpus -iname "*.wav" > $data/wav.flist 29 | n=`cat $data/wav.flist | wc -l` 30 | [ $n -ne 50384 ] && \ 31 | echo Warning: expected 50384 data files, found $n 32 | 33 | echo "Filtering data using found wav list and provided transcript" 34 | local/primewords_parse_transcript.py $data/wav.flist $corpus/set1_transcript.json $data/train 35 | cat $data/train/transcripts.txt |\ 36 | awk '{if (NF > 1) print $0;}' > $data/train/text 37 | 38 | for file in wav.scp utt2spk text; do 39 | sort $data/train/$file -o $data/train/$file 40 | done 41 | tools/utt2spk_to_spk2utt.pl $data/train/utt2spk > $data/train/spk2utt 42 | 43 | # rm -r $data/wav.flist 44 | 45 | tools/validate_data_dir.sh --no-feats $data/train || exit 1; 46 | -------------------------------------------------------------------------------- /examples/multi_cn/s0/local/primewords_parse_transcript.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import json 5 | 6 | 7 | def main(argv): 8 | fp = open(argv[1], encoding="utf-8") 9 | js = json.load(fp) 10 | fp.close() 11 | metas = {} 12 | for ele in js: 13 | fname = ele['file'] 14 | metas[fname] = ele 15 | 16 | fWavScp = open(os.path.join(argv[2], 'wav.scp'), 'w') 17 | fText = open(os.path.join(argv[2], 'transcripts.txt'), 18 | 'w', 19 | encoding="utf-8") 20 | fUtt2Spk = open(os.path.join(argv[2], 'utt2spk'), 'w') 21 | for line in open(argv[0]): 22 | fpath = line.strip('\r\n') 23 | wname = os.path.basename(fpath) 24 | meta = metas[wname] 25 | spkid = 'P' + meta['user_id'] 26 | uttid = spkid + '-' + meta['id'] 27 | fWavScp.write(uttid + ' ' + fpath + '\n') 28 | fText.write(uttid + ' ' + meta['text'] + '\n') 29 | fUtt2Spk.write(uttid + ' ' + spkid + '\n') 30 | fWavScp.close() 31 | fText.close() 32 | fUtt2Spk.close() 33 | 34 | 35 | if __name__ == "__main__": 36 | main(sys.argv[1:]) 37 | -------------------------------------------------------------------------------- /examples/multi_cn/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/multi_cn/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/multi_cn/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/openasr2021/s0/local/make_absolute.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script replaces the command readlink -f (which is not portable). 4 | # It turns a pathname into an absolute pathname, including following soft links. 5 | target_file=$1 6 | 7 | cd $(dirname $target_file) 8 | target_file=$(basename $target_file) 9 | 10 | # Iterate down a (possible) chain of symlinks 11 | while [ -L "$target_file" ]; do 12 | target_file=$(readlink $target_file) 13 | cd $(dirname $target_file) 14 | target_file=$(basename $target_file) 15 | done 16 | 17 | # Compute the canonicalized name by finding the physical path 18 | # for the directory we're in and appending the target file. 19 | phys_dir=$(pwd -P) 20 | result=$phys_dir/$target_file 21 | echo $result 22 | -------------------------------------------------------------------------------- /examples/openasr2021/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/openasr2021/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/openasr2021/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet/ -------------------------------------------------------------------------------- /examples/swbd/s0/README.md: -------------------------------------------------------------------------------- 1 | # Performance Record 2 | 3 | ## Conformer Result 4 | 5 | * Feature info: dither + specaug + speed perturb 6 | * Training info: lr 0.001, warmup_steps 25000, batch size 16, 1 gpu, acc_grad 4, 240 epochs 7 | * Decoding info: average_num 10 8 | 9 | | decoding mode | eval2000 (wer) | 10 | |:----------------------:|:----------------:| 11 | | ctc_greedy_search | 32.39% | 12 | | ctc_prefix_beam_search | 32.39% | 13 | | attention | 31.28% | 14 | | attention_rescoring | 31.36% | -------------------------------------------------------------------------------- /examples/swbd/s0/local/MSU_single_letter.txt: -------------------------------------------------------------------------------- 1 | A ey 2 | B b iy 3 | C s iy 4 | D d iy 5 | E iy 6 | F eh f 7 | G jh iy 8 | H ey ch 9 | I ay 10 | J jh ey 11 | K k ey 12 | L eh l 13 | M eh m 14 | N eh n 15 | O ow 16 | P p iy 17 | Q k y uw 18 | R aa r 19 | S eh s 20 | T t iy 21 | U y uw 22 | V v iy 23 | W d ah b ax l y uw 24 | X eh k s 25 | Y w ay 26 | Z z iy 27 | -------------------------------------------------------------------------------- /examples/swbd/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/swbd/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools/ -------------------------------------------------------------------------------- /examples/swbd/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet/ -------------------------------------------------------------------------------- /examples/tedlium3/s0/README.md: -------------------------------------------------------------------------------- 1 | # Performance Record 2 | 3 | ## Conformer Result 4 | 5 | * Feature info: using fbank feature, dither, cmvn, without speed perturb (not supported segments yet) 6 | * Training info: lr 0.001, batch size 20, 8 gpu, acc_grad 1, 240 epochs, dither 0.1 7 | * Decoding info: ctc_weight 0.5, average_num 10 8 | 9 | 10 | | decoding mode | Dev WER | Test WER | 11 | |---------------------|---------|----------| 12 | | attention rescoring | 9.54% | 8.66% | -------------------------------------------------------------------------------- /examples/tedlium3/s0/local/join_suffix.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright 2014 Nickolay V. Shmyrev 4 | # 2016 Johns Hopkins University (author: Daniel Povey) 5 | # Apache 2.0 6 | 7 | import sys 8 | 9 | # This script joins together pairs of split-up words like "you 're" -> "you're". 10 | # The TEDLIUM transcripts are normalized in a way that's not traditional for 11 | # speech recognition. 12 | 13 | prev_line = "" 14 | for line in sys.stdin: 15 | if line == prev_line: 16 | continue 17 | items = line.split() 18 | new_items = [] 19 | i = 0 20 | while i < len(items): 21 | if i < len(items) - 1 and items[i + 1][0] == "'": 22 | new_items.append(items[i] + items[i + 1]) 23 | i = i + 1 24 | else: 25 | new_items.append(items[i]) 26 | i = i + 1 27 | print(" ".join(new_items)) 28 | prev_line = line 29 | -------------------------------------------------------------------------------- /examples/tedlium3/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/tedlium3/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools/ -------------------------------------------------------------------------------- /examples/tedlium3/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet/ -------------------------------------------------------------------------------- /examples/timit/s0/README.md: -------------------------------------------------------------------------------- 1 | # Performance Record 2 | 3 | ## Conformer Result 4 | 5 | * Feature info: dither + specaug + speed perturb 6 | * Training info: lr 0.002, warmup_steps 5000 batch size 16, 1 gpu, acc_grad 4, 120 epochs 7 | * Decoding info: average_num 20 8 | * trans_type: phn 9 | 10 | 11 | | decoding mode | test (wer) | 12 | | :--------------------: | :---------: | 13 | | ctc_greedy_search | 16.70% | 14 | | ctc_prefix_beam_search | 16.60% | 15 | | attention | 22.37% | 16 | | attention_rescoring | 16.60% | 17 | 18 | ## transformer Result 19 | 20 | * Feature info: dither + specaug + speed perturb 21 | * Training info: lr 0.002, warmup_steps 5000 batch size 16, 1 gpu, acc_grad 4, 120 epochs 22 | * Decoding info: average_num 20 23 | * trans_type: phn 24 | 25 | 26 | | decoding mode | test (wer) | 27 | | :--------------------: | :---------: | 28 | | ctc_greedy_search | 17.78% | 29 | | ctc_prefix_beam_search | 17.46% | 30 | | attention | 21.77% | 31 | | attention_rescoring | 17.06% | -------------------------------------------------------------------------------- /examples/timit/s0/local/dev_spk.list: -------------------------------------------------------------------------------- 1 | faks0 2 | fdac1 3 | fjem0 4 | mgwt0 5 | mjar0 6 | mmdb1 7 | mmdm2 8 | mpdf0 9 | fcmh0 10 | fkms0 11 | mbdg0 12 | mbwm0 13 | mcsh0 14 | fadg0 15 | fdms0 16 | fedw0 17 | mgjf0 18 | mglb0 19 | mrtk0 20 | mtaa0 21 | mtdt0 22 | mthc0 23 | mwjg0 24 | fnmr0 25 | frew0 26 | fsem0 27 | mbns0 28 | mmjr0 29 | mdls0 30 | mdlf0 31 | mdvc0 32 | mers0 33 | fmah0 34 | fdrw0 35 | mrcs0 36 | mrjm4 37 | fcal1 38 | mmwh0 39 | fjsj0 40 | majc0 41 | mjsw0 42 | mreb0 43 | fgjd0 44 | fjmg0 45 | mroa0 46 | mteb0 47 | mjfc0 48 | mrjr0 49 | fmml0 50 | mrws1 51 | -------------------------------------------------------------------------------- /examples/timit/s0/local/phones.60-48-39.map: -------------------------------------------------------------------------------- 1 | aa aa aa 2 | ae ae ae 3 | ah ah ah 4 | ao ao aa 5 | aw aw aw 6 | ax ax ah 7 | ax-h ax ah 8 | axr er er 9 | ay ay ay 10 | b b b 11 | bcl vcl sil 12 | ch ch ch 13 | d d d 14 | dcl vcl sil 15 | dh dh dh 16 | dx dx dx 17 | eh eh eh 18 | el el l 19 | em m m 20 | en en n 21 | eng ng ng 22 | epi epi sil 23 | er er er 24 | ey ey ey 25 | f f f 26 | g g g 27 | gcl vcl sil 28 | h# sil sil 29 | hh hh hh 30 | hv hh hh 31 | ih ih ih 32 | ix ix ih 33 | iy iy iy 34 | jh jh jh 35 | k k k 36 | kcl cl sil 37 | l l l 38 | m m m 39 | n n n 40 | ng ng ng 41 | nx n n 42 | ow ow ow 43 | oy oy oy 44 | p p p 45 | pau sil sil 46 | pcl cl sil 47 | q 48 | r r r 49 | s s s 50 | sh sh sh 51 | t t t 52 | tcl cl sil 53 | th th th 54 | uh uh uh 55 | uw uw uw 56 | ux uw uw 57 | v v v 58 | w w w 59 | y y y 60 | z z z 61 | zh zh sh 62 | -------------------------------------------------------------------------------- /examples/timit/s0/local/sph2pipe_process.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | import os 6 | 7 | 8 | def sph2pipe_wav(in_wav, tmp_out_wav, out_wav): 9 | with open(in_wav, 'r', encoding='utf-8') as in_f: 10 | with open(tmp_out_wav, 'w', encoding='utf-8') as tmp_out_f: 11 | with open(out_wav, 'w', encoding='utf-8') as out_f: 12 | for line in in_f: 13 | _tmp = line.strip().split(' ') 14 | wav_out_path = _tmp[4] 15 | wav_out_path = wav_out_path.split('/') 16 | wav_out_path[-4] = wav_out_path[-4] + '_pipe' 17 | if not os.path.exists('/'.join(wav_out_path[:-1])): 18 | os.makedirs('/'.join(wav_out_path[:-1])) 19 | wav_out_path = '/'.join(wav_out_path) 20 | tmp_out_f.write(' '.join(_tmp[1:5]) + ' ' + wav_out_path + 21 | '\n') 22 | out_f.write(_tmp[0] + ' ' + wav_out_path + '\n') 23 | 24 | 25 | if __name__ == '__main__': 26 | if len(sys.argv) != 4: 27 | print('wrong input parameter') 28 | raise NotImplementedError(len(sys.argv)) 29 | in_wav = sys.argv[1] 30 | tmp_out_wav = sys.argv[2] 31 | out_wav = sys.argv[3] 32 | sph2pipe_wav(in_wav, tmp_out_wav, out_wav) 33 | -------------------------------------------------------------------------------- /examples/timit/s0/local/test_spk.list: -------------------------------------------------------------------------------- 1 | mdab0 2 | mwbt0 3 | felc0 4 | mtas1 5 | mwew0 6 | fpas0 7 | mjmp0 8 | mlnt0 9 | fpkt0 10 | mlll0 11 | mtls0 12 | fjlm0 13 | mbpm0 14 | mklt0 15 | fnlp0 16 | mcmj0 17 | mjdh0 18 | fmgd0 19 | mgrt0 20 | mnjm0 21 | fdhc0 22 | mjln0 23 | mpam0 24 | fmld0 25 | -------------------------------------------------------------------------------- /examples/timit/s0/local/timit_format_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright 2013 (Author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # This script takes data prepared in a corpus-dependent way 7 | # in data/local/, and converts it into the "canonical" form, 8 | # in various subdirectories of data/, e.g. data/lang, data/train, etc. 9 | 10 | . ./path.sh || exit 1; 11 | 12 | echo "Preparing train, dev and test data" 13 | srcdir=data/local/data 14 | 15 | 16 | for x in train dev test; do 17 | mkdir -p data/$x 18 | # cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1; 19 | local/sph2pipe_process.py $srcdir/${x}_wav.scp data/${x}/tmp_wav.scp data/${x}/wav.scp || exit 1; 20 | while read line 21 | do 22 | echo $line 23 | $line 24 | done < data/${x}/tmp_wav.scp 25 | rm data/${x}/tmp_wav.scp 26 | 27 | cp $srcdir/$x.text data/$x/text || exit 1; 28 | cp $srcdir/$x.spk2utt data/$x/spk2utt || exit 1; 29 | cp $srcdir/$x.utt2spk data/$x/utt2spk || exit 1; 30 | tools/filter_scp.pl data/$x/spk2utt $srcdir/$x.spk2gender > data/$x/spk2gender || exit 1; 31 | [ -e $srcdir/${x}.stm ] && cp $srcdir/${x}.stm data/$x/stm 32 | [ -e $srcdir/${x}.glm ] && cp $srcdir/${x}.glm data/$x/glm 33 | # tools/validate_data_dir.sh --no-feats data/$x || exit 1 34 | done -------------------------------------------------------------------------------- /examples/timit/s0/local/utt2spk_to_spk2utt.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # converts an utt2spk file to a spk2utt file. 18 | # Takes input from the stdin or from a file argument; 19 | # output goes to the standard out. 20 | 21 | if ( @ARGV > 1 ) { 22 | die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt"; 23 | } 24 | 25 | while(<>){ 26 | @A = split(" ", $_); 27 | @A == 2 || die "Invalid line in utt2spk file: $_"; 28 | ($u,$s) = @A; 29 | if(!$seen_spk{$s}) { 30 | $seen_spk{$s} = 1; 31 | push @spklist, $s; 32 | } 33 | push (@{$spk_hash{$s}}, "$u"); 34 | } 35 | foreach $s (@spklist) { 36 | $l = join(' ',@{$spk_hash{$s}}); 37 | print "$s $l\n"; 38 | } 39 | -------------------------------------------------------------------------------- /examples/timit/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/timit/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/timit/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/vkw2021/s0/local/vkw_data_prep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2021 Tencent Inc. (Author: Yougen Yuan). 3 | # Apach 2.0 4 | 5 | current_dir=$(pwd) 6 | stage=0 7 | stop_stage=0 8 | . ./path.sh || exit 1; 9 | 10 | if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then 11 | cd $current_dir/data/ 12 | [ ! -z vkw_v1.1.zip ] && echo "wget vkw challenge data to this directory" && exit 0 13 | [ ! -z vkw ] && unzip vkw_v1.1.zip 14 | cd $current_dir 15 | fi 16 | 17 | if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then 18 | x=train 19 | [ ! -f data/${x}/text ] && echo "vkw trainset is missing, wget to this directory" && exit 0 20 | fi 21 | 22 | echo "$0: vkw data preparation succeeded" 23 | -------------------------------------------------------------------------------- /examples/vkw2021/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/vkw2021/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools/ -------------------------------------------------------------------------------- /examples/vkw2021/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet/ -------------------------------------------------------------------------------- /examples/wenetspeech/paraformer/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/wenetspeech/paraformer/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/wenetspeech/paraformer/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/wenetspeech/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/wenetspeech/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools/ -------------------------------------------------------------------------------- /examples/wenetspeech/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet/ -------------------------------------------------------------------------------- /examples/wenetspeech/whisper/conf/ds_stage1.json: -------------------------------------------------------------------------------- 1 | { 2 | "train_micro_batch_size_per_gpu": 1, 3 | "gradient_accumulation_steps": 8, 4 | "steps_per_print": 100, 5 | "gradient_clipping": 5, 6 | "fp16": { 7 | "enabled": false, 8 | "auto_cast": false, 9 | "loss_scale": 0, 10 | "initial_scale_power": 16, 11 | "loss_scale_window": 1000, 12 | "hysteresis": 2, 13 | "consecutive_hysteresis": false, 14 | "min_loss_scale": 1 15 | }, 16 | "bf16": { 17 | "enabled": true 18 | }, 19 | "zero_force_ds_cpu_optimizer": false, 20 | "zero_optimization": { 21 | "stage": 1, 22 | "offload_optimizer": { 23 | "device": "none", 24 | "pin_memory": true 25 | }, 26 | "allgather_partitions": true, 27 | "allgather_bucket_size": 5e8, 28 | "overlap_comm": true, 29 | "reduce_scatter": true, 30 | "reduce_bucket_size": 5e8, 31 | "contiguous_gradients" : true 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /examples/wenetspeech/whisper/local: -------------------------------------------------------------------------------- 1 | ../../aishell/whisper/local -------------------------------------------------------------------------------- /examples/wenetspeech/whisper/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/wenetspeech/whisper/tools: -------------------------------------------------------------------------------- 1 | ../../../tools -------------------------------------------------------------------------------- /examples/wenetspeech/whisper/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet -------------------------------------------------------------------------------- /examples/wsj/s0/README.md: -------------------------------------------------------------------------------- 1 | # Performance Record 2 | 3 | ## Conformer Result 4 | 5 | * Feature info: dither + specaug + speed perturb 6 | * Training info: lr 0.002, warmup_steps 20000 batch size 16, 1 gpu, acc_grad 4, 120 epochs 7 | * Decoding info: average_num 20 8 | 9 | | decoding mode | dev93 (cer) | dev93 (wer) | 10 | |:----------------------:|:-------------:|:-------------:| 11 | | ctc_greedy_search | 5.25% | 13.16% | 12 | | ctc_prefix_beam_search | 5.17% | 13.10% | 13 | | attention_rescoring | 5.11% | 12.17% | -------------------------------------------------------------------------------- /examples/wsj/s0/local/flist2scp.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | # takes in a file list with lines like 19 | # /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1 20 | # and outputs an scp in kaldi format with lines like 21 | # 4k0c030a /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1 22 | # (the first thing is the utterance-id, which is the same as the basename of the file. 23 | 24 | 25 | while(<>){ 26 | m:^\S+/(\w+)\.[wW][vV]1$: || die "Bad line $_"; 27 | $id = $1; 28 | $id =~ tr/A-Z/a-z/; # Necessary because of weirdness on disk 13-16.1 (uppercase filenames) 29 | print "$id $_"; 30 | } 31 | -------------------------------------------------------------------------------- /examples/wsj/s0/local/wsj_format_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright 2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) 4 | # 2015 Guoguo Chen 5 | # Apache 2.0 6 | 7 | # This script takes data prepared in a corpus-dependent way 8 | # in data/local/, and converts it into the "canonical" form, 9 | # in various subdirectories of data/, e.g. data/lang, data/lang_test_ug, 10 | # data/train_si284, data/train_si84, etc. 11 | 12 | # Don't bother doing train_si84 separately (although we have the file lists 13 | # in data/local/) because it's just the first 7138 utterances in train_si284. 14 | # We'll create train_si84 after doing the feature extraction. 15 | 16 | echo "$0 $@" # Print the command line for logging 17 | . ./tools/parse_options.sh || exit 1; 18 | 19 | . ./path.sh || exit 1; 20 | 21 | echo "Preparing train and test data" 22 | srcdir=data/local/data 23 | 24 | for x in train_si284 test_eval92 test_dev93; do 25 | mkdir -p data/$x 26 | cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1; 27 | cp $srcdir/$x.txt data/$x/text || exit 1; 28 | done 29 | 30 | echo "Succeeded in formatting data." -------------------------------------------------------------------------------- /examples/wsj/s0/local/wsj_gen_wav.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eu 4 | 5 | [ $# -ne 2 ] && echo "Script format error: $0 " && exit 0 6 | 7 | data_dir=$1 8 | dump_dir=$2 9 | 10 | mkdir -p $dump_dir 11 | 12 | num_utts=$(cat $data_dir/wav.scp | wc -l) 13 | echo "Orginal utterances (.wav + .wv1): $num_utts" 14 | 15 | # cat $data_dir/wav.scp | grep "sph2pipe" | \ 16 | # awk -v dir=$dump_dir '{printf("%s -f wav %s %s/%s.wav\n", $2, $5, dir, $1)}' | bash 17 | 18 | awk '{print $1,$5}' $data_dir/wav.scp > $data_dir/raw_wav.scp 19 | find $dump_dir -name "*.wav" | awk -F '/' '{printf("%s %s\n", $NF, $0)}' | \ 20 | sed 's:\.wav::' > $data_dir/wav.scp 21 | 22 | num_utts=$(cat $data_dir/wav.scp | wc -l) 23 | echo "Wave utterances (.wav): $num_utts" 24 | 25 | echo "$0: Generate wav => $dump_dir done" 26 | -------------------------------------------------------------------------------- /examples/wsj/s0/path.sh: -------------------------------------------------------------------------------- 1 | export WENET_DIR=$PWD/../../.. 2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build 3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix 4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH 5 | 6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C 7 | export PYTHONIOENCODING=UTF-8 8 | export PYTHONPATH=../../../:$PYTHONPATH 9 | -------------------------------------------------------------------------------- /examples/wsj/s0/tools: -------------------------------------------------------------------------------- 1 | ../../../tools/ -------------------------------------------------------------------------------- /examples/wsj/s0/wenet: -------------------------------------------------------------------------------- 1 | ../../../wenet/ -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Pillow 2 | pyyaml>=5.1 3 | sentencepiece 4 | tensorboard 5 | tensorboardX 6 | textgrid 7 | pytest 8 | flake8==3.8.2 9 | flake8-bugbear 10 | flake8-comprehensions 11 | flake8-executable 12 | flake8-pyi==20.5.0 13 | mccabe 14 | pycodestyle==2.6.0 15 | pyflakes==2.2.0 16 | clang-format==17.0.6 17 | cpplint==1.6.1 18 | torch>=2.1.2 19 | torchaudio>=2.1.2 20 | tqdm 21 | deepspeed>=0.14.0 22 | librosa 23 | openai-whisper==20231117 24 | pre-commit==3.5.0 25 | langid 26 | -------------------------------------------------------------------------------- /runtime/android/.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | .gradle 3 | /local.properties 4 | /.idea/caches 5 | /.idea/libraries 6 | /.idea/modules.xml 7 | /.idea/workspace.xml 8 | /.idea/navEditor.xml 9 | /.idea/assetWizardSettings.xml 10 | .DS_Store 11 | /build 12 | /captures 13 | .externalNativeBuild 14 | .cxx 15 | local.properties 16 | -------------------------------------------------------------------------------- /runtime/android/app/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /release 3 | -------------------------------------------------------------------------------- /runtime/android/app/proguard-rules.pro: -------------------------------------------------------------------------------- 1 | # Add project specific ProGuard rules here. 2 | # You can control the set of applied configuration files using the 3 | # proguardFiles setting in build.gradle. 4 | # 5 | # For more details, see 6 | # http://developer.android.com/guide/developing/tools/proguard.html 7 | 8 | # If your project uses WebView with JS, uncomment the following 9 | # and specify the fully qualified class name to the JavaScript interface 10 | # class: 11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview { 12 | # public *; 13 | #} 14 | 15 | # Uncomment this to preserve the line number information for 16 | # debugging stack traces. 17 | #-keepattributes SourceFile,LineNumberTable 18 | 19 | # If you keep the line number information, uncomment this to 20 | # hide the original source file name. 21 | #-renamesourcefileattribute SourceFile -------------------------------------------------------------------------------- /runtime/android/app/src/androidTest/java/com/mobvoi/wenet/ExampleInstrumentedTest.java: -------------------------------------------------------------------------------- 1 | package com.mobvoi.wenet; 2 | 3 | import android.content.Context; 4 | 5 | import androidx.test.platform.app.InstrumentationRegistry; 6 | import androidx.test.ext.junit.runners.AndroidJUnit4; 7 | 8 | import org.junit.Test; 9 | import org.junit.runner.RunWith; 10 | 11 | import static org.junit.Assert.*; 12 | 13 | /** 14 | * Instrumented test, which will execute on an Android device. 15 | * 16 | * @see Testing documentation 17 | */ 18 | @RunWith(AndroidJUnit4.class) 19 | public class ExampleInstrumentedTest { 20 | @Test 21 | public void useAppContext() { 22 | // Context of the app under test. 23 | Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext(); 24 | assertEquals("com.mobvoi.wenet", appContext.getPackageName()); 25 | } 26 | } -------------------------------------------------------------------------------- /runtime/android/app/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /runtime/android/app/src/main/assets/README.md: -------------------------------------------------------------------------------- 1 | put final.zip and units.txt here. 2 | -------------------------------------------------------------------------------- /runtime/android/app/src/main/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.4.1) 2 | set(TARGET wenet) 3 | project(${TARGET} CXX) 4 | set(CMAKE_CXX_STANDARD 14) 5 | include(ExternalProject) 6 | 7 | option(TORCH "whether to build with Torch" ON) 8 | option(ONNX "whether to build with ONNX" OFF) 9 | option(ITN "whether to use WeTextProcessing" ON) 10 | set(CMAKE_VERBOSE_MAKEFILE on) 11 | set(build_DIR ${CMAKE_SOURCE_DIR}/../../../build) 12 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) 13 | string(REPLACE "-Wl,--exclude-libs,libgcc_real.a" "" CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") 14 | 15 | include(libtorch) 16 | include(openfst) 17 | include(wetextprocessing) 18 | 19 | include_directories( 20 | ${CMAKE_SOURCE_DIR} 21 | ${CMAKE_SOURCE_DIR}/kaldi 22 | ) 23 | 24 | add_subdirectory(utils) 25 | add_subdirectory(frontend) 26 | add_subdirectory(post_processor) 27 | add_subdirectory(kaldi) # kaldi: wfst based decoder 28 | add_subdirectory(decoder) 29 | add_dependencies(post_processor wetextprocessing) 30 | 31 | link_libraries(frontend decoder android) 32 | add_library(${TARGET} SHARED wenet.cc) 33 | 34 | add_executable(decoder_main bin/decoder_main.cc) 35 | target_link_libraries(decoder_main PUBLIC libc++_shared.so) 36 | -------------------------------------------------------------------------------- /runtime/android/app/src/main/cpp/bin: -------------------------------------------------------------------------------- 1 | ../../../../../core/bin -------------------------------------------------------------------------------- /runtime/android/app/src/main/cpp/cmake: -------------------------------------------------------------------------------- 1 | ../../../../../core/cmake -------------------------------------------------------------------------------- /runtime/android/app/src/main/cpp/decoder: -------------------------------------------------------------------------------- 1 | ../../../../../core/decoder -------------------------------------------------------------------------------- /runtime/android/app/src/main/cpp/frontend: -------------------------------------------------------------------------------- 1 | ../../../../../core/frontend -------------------------------------------------------------------------------- /runtime/android/app/src/main/cpp/kaldi: -------------------------------------------------------------------------------- 1 | ../../../../../core/kaldi -------------------------------------------------------------------------------- /runtime/android/app/src/main/cpp/patch: -------------------------------------------------------------------------------- 1 | ../../../../../core/patch -------------------------------------------------------------------------------- /runtime/android/app/src/main/cpp/post_processor: -------------------------------------------------------------------------------- 1 | ../../../../../core/post_processor -------------------------------------------------------------------------------- /runtime/android/app/src/main/cpp/utils: -------------------------------------------------------------------------------- 1 | ../../../../../core/utils -------------------------------------------------------------------------------- /runtime/android/app/src/main/java/com/mobvoi/wenet/Recognize.java: -------------------------------------------------------------------------------- 1 | package com.mobvoi.wenet; 2 | 3 | public class Recognize { 4 | 5 | static { 6 | System.loadLibrary("wenet"); 7 | } 8 | 9 | public static native void init(String modelDir); 10 | public static native void reset(); 11 | public static native void acceptWaveform(short[] waveform); 12 | public static native void setInputFinished(); 13 | public static native boolean getFinished(); 14 | public static native void startDecode(); 15 | public static native String getResult(); 16 | } 17 | -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher.png -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher.png -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/values-night/themes.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/values/attrs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/values/colors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | #FFBB86FC 4 | #FF6200EE 5 | #FF3700B3 6 | #FF03DAC5 7 | #FF018786 8 | #FF000000 9 | #FFFFFFFF 10 | 11 | #f16d7a 12 | #b7d28d 13 | #b8f1ed 14 | #b7d28d 15 | #b8f1ed 16 | -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | wenet 3 | -------------------------------------------------------------------------------- /runtime/android/app/src/main/res/values/themes.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | -------------------------------------------------------------------------------- /runtime/android/app/src/test/java/com/mobvoi/wenet/ExampleUnitTest.java: -------------------------------------------------------------------------------- 1 | package com.mobvoi.wenet; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.*; 6 | 7 | /** 8 | * Example local unit test, which will execute on the development machine (host). 9 | * 10 | * @see Testing documentation 11 | */ 12 | public class ExampleUnitTest { 13 | @Test 14 | public void addition_isCorrect() { 15 | assertEquals(4, 2 + 2); 16 | } 17 | } -------------------------------------------------------------------------------- /runtime/android/app/wenet.keystore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/wenet.keystore -------------------------------------------------------------------------------- /runtime/android/build.gradle: -------------------------------------------------------------------------------- 1 | buildscript { 2 | repositories { 3 | google() 4 | jcenter() 5 | } 6 | dependencies { 7 | classpath 'com.android.tools.build:gradle:7.4.2' 8 | } 9 | } 10 | 11 | allprojects { 12 | repositories { 13 | google() 14 | jcenter() 15 | maven { url 'https://jitpack.io' } 16 | } 17 | } 18 | 19 | task clean(type: Delete) { 20 | delete rootProject.buildDir 21 | } -------------------------------------------------------------------------------- /runtime/android/gradle.properties: -------------------------------------------------------------------------------- 1 | # Project-wide Gradle settings. 2 | # IDE (e.g. Android Studio) users: 3 | # Gradle settings configured through the IDE *will override* 4 | # any settings specified in this file. 5 | # For more details on how to configure your build environment visit 6 | # http://www.gradle.org/docs/current/userguide/build_environment.html 7 | # Specifies the JVM arguments used for the daemon process. 8 | # The setting is particularly useful for tweaking memory settings. 9 | org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8 10 | # When configured, Gradle will run in incubating parallel mode. 11 | # This option should only be used with decoupled projects. More details, visit 12 | # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects 13 | # org.gradle.parallel=true 14 | # AndroidX package structure to make it clearer which packages are bundled with the 15 | # Android operating system, and which are packaged with your app"s APK 16 | # https://developer.android.com/topic/libraries/support-library/androidx-rn 17 | android.useAndroidX=true 18 | # Automatically convert third-party libraries to use AndroidX 19 | android.enableJetifier=true -------------------------------------------------------------------------------- /runtime/android/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /runtime/android/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Tue Jan 12 17:33:20 CST 2021 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.5-bin.zip 7 | -------------------------------------------------------------------------------- /runtime/android/settings.gradle: -------------------------------------------------------------------------------- 1 | include ':app' 2 | rootProject.name = "wenet" -------------------------------------------------------------------------------- /runtime/core/api/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(TORCH OR ONNX) 2 | add_library(wenet_api SHARED wenet_api.cc) 3 | target_link_libraries(wenet_api PUBLIC decoder) 4 | endif() 5 | -------------------------------------------------------------------------------- /runtime/core/api/README.md: -------------------------------------------------------------------------------- 1 | # WeNet API 2 | 3 | We refer [vosk](https://github.com/alphacep/vosk-api/blob/master/src/vosk_api.h) 4 | for the interface design. 5 | 6 | 7 | We are going to implement the following interfaces: 8 | 9 | - [x] non-streaming recognition 10 | - [] streaming recognition 11 | - [] nbest 12 | - [] contextual biasing word 13 | - [] alignment 14 | - [] language support(post processor) 15 | - [] label check 16 | -------------------------------------------------------------------------------- /runtime/core/bin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(decoder_main decoder_main.cc) 2 | target_link_libraries(decoder_main PUBLIC decoder) 3 | if(IPEX) 4 | target_link_libraries(decoder_main PUBLIC "${TORCH_IPEX_LIBRARIES}") 5 | endif() 6 | 7 | add_executable(label_checker_main label_checker_main.cc) 8 | target_link_libraries(label_checker_main PUBLIC decoder) 9 | 10 | if(TORCH) 11 | add_executable(api_main api_main.cc) 12 | target_link_libraries(api_main PUBLIC wenet_api) 13 | endif() 14 | 15 | if(WEBSOCKET) 16 | add_executable(websocket_client_main websocket_client_main.cc) 17 | target_link_libraries(websocket_client_main PUBLIC websocket) 18 | add_executable(websocket_server_main websocket_server_main.cc) 19 | target_link_libraries(websocket_server_main PUBLIC websocket) 20 | endif() 21 | 22 | if(GRPC) 23 | add_executable(grpc_server_main grpc_server_main.cc) 24 | target_link_libraries(grpc_server_main PUBLIC wenet_grpc) 25 | add_executable(grpc_client_main grpc_client_main.cc) 26 | target_link_libraries(grpc_client_main PUBLIC wenet_grpc) 27 | endif() 28 | 29 | if(HTTP) 30 | add_executable(http_client_main http_client_main.cc) 31 | target_link_libraries(http_client_main PUBLIC http) 32 | add_executable(http_server_main http_server_main.cc) 33 | target_link_libraries(http_server_main PUBLIC http) 34 | endif() 35 | -------------------------------------------------------------------------------- /runtime/core/bin/http_server_main.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu) 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "decoder/params.h" 16 | #include "http/http_server.h" 17 | #include "utils/log.h" 18 | 19 | DEFINE_int32(port, 10086, "http listening port"); 20 | 21 | int main(int argc, char* argv[]) { 22 | gflags::ParseCommandLineFlags(&argc, &argv, false); 23 | google::InitGoogleLogging(argv[0]); 24 | 25 | auto decode_config = wenet::InitDecodeOptionsFromFlags(); 26 | auto feature_config = wenet::InitFeaturePipelineConfigFromFlags(); 27 | auto decode_resource = wenet::InitDecodeResourceFromFlags(); 28 | 29 | wenet::HttpServer server(FLAGS_port, feature_config, decode_config, 30 | decode_resource); 31 | LOG(INFO) << "Listening at port " << FLAGS_port; 32 | server.Start(); 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /runtime/core/bin/websocket_server_main.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 Mobvoi Inc (Binbin Zhang) 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "decoder/params.h" 16 | #include "utils/log.h" 17 | #include "websocket/websocket_server.h" 18 | 19 | DEFINE_int32(port, 10086, "websocket listening port"); 20 | 21 | int main(int argc, char* argv[]) { 22 | gflags::ParseCommandLineFlags(&argc, &argv, false); 23 | google::InitGoogleLogging(argv[0]); 24 | 25 | auto decode_config = wenet::InitDecodeOptionsFromFlags(); 26 | auto feature_config = wenet::InitFeaturePipelineConfigFromFlags(); 27 | auto decode_resource = wenet::InitDecodeResourceFromFlags(); 28 | 29 | wenet::WebSocketServer server(FLAGS_port, feature_config, decode_config, 30 | decode_resource); 31 | LOG(INFO) << "Listening at port " << FLAGS_port; 32 | server.Start(); 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /runtime/core/cmake/boost.cmake: -------------------------------------------------------------------------------- 1 | FetchContent_Declare(boost 2 | URL https://archives.boost.io/release/1.75.0/source/boost_1_75_0.tar.gz 3 | URL_HASH SHA256=aeb26f80e80945e82ee93e5939baebdca47b9dee80a07d3144be1e1a6a66dd6a 4 | ) 5 | FetchContent_MakeAvailable(boost) 6 | include_directories(${boost_SOURCE_DIR}) 7 | 8 | if(MSVC) 9 | add_definitions(-DBOOST_ALL_DYN_LINK -DBOOST_ALL_NO_LIB) 10 | endif() 11 | -------------------------------------------------------------------------------- /runtime/core/cmake/gflags.cmake: -------------------------------------------------------------------------------- 1 | FetchContent_Declare(gflags 2 | URL https://github.com/gflags/gflags/archive/v2.2.2.zip 3 | URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5 4 | ) 5 | FetchContent_MakeAvailable(gflags) 6 | include_directories(${gflags_BINARY_DIR}/include) -------------------------------------------------------------------------------- /runtime/core/cmake/glog.cmake: -------------------------------------------------------------------------------- 1 | FetchContent_Declare(glog 2 | URL https://github.com/google/glog/archive/v0.4.0.zip 3 | URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc 4 | ) 5 | FetchContent_MakeAvailable(glog) 6 | include_directories(${glog_SOURCE_DIR}/src ${glog_BINARY_DIR}) -------------------------------------------------------------------------------- /runtime/core/cmake/grpc.cmake: -------------------------------------------------------------------------------- 1 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/grpc) 2 | # third_party: grpc 3 | # On how to build grpc, you may refer to https://github.com/grpc/grpc 4 | # We recommend manually recursive clone the repo to avoid internet connection problem 5 | FetchContent_Declare(gRPC 6 | GIT_REPOSITORY https://github.com/grpc/grpc 7 | GIT_TAG v1.37.1 8 | ) 9 | FetchContent_MakeAvailable(gRPC) -------------------------------------------------------------------------------- /runtime/core/cmake/gtest.cmake: -------------------------------------------------------------------------------- 1 | FetchContent_Declare(googletest 2 | URL https://github.com/google/googletest/archive/release-1.11.0.zip 3 | URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a 4 | ) 5 | if(MSVC) 6 | set(gtest_force_shared_crt ON CACHE BOOL "Always use msvcrt.dll" FORCE) 7 | endif() 8 | FetchContent_MakeAvailable(googletest) -------------------------------------------------------------------------------- /runtime/core/cmake/pybind11.cmake: -------------------------------------------------------------------------------- 1 | FetchContent_Declare(pybind11 2 | URL https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.zip 3 | URL_HASH SHA256=d1646e6f70d8a3acb2ddd85ce1ed543b5dd579c68b8fb8e9638282af20edead8 4 | ) 5 | FetchContent_MakeAvailable(pybind11) 6 | -------------------------------------------------------------------------------- /runtime/core/decoder/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(decoder_srcs 2 | asr_decoder.cc 3 | asr_model.cc 4 | context_graph.cc 5 | ctc_prefix_beam_search.cc 6 | ctc_wfst_beam_search.cc 7 | ctc_endpoint.cc 8 | ) 9 | 10 | if(NOT TORCH AND NOT ONNX AND NOT XPU AND NOT IOS AND NOT BPU AND NOT OPENVINO) 11 | message(FATAL_ERROR "Please build with TORCH or ONNX or OPENVINO or XPU or IOS or BPU!!!") 12 | endif() 13 | if(TORCH OR IOS) 14 | list(APPEND decoder_srcs torch_asr_model.cc) 15 | endif() 16 | if(ONNX) 17 | list(APPEND decoder_srcs onnx_asr_model.cc) 18 | endif() 19 | 20 | add_library(decoder STATIC ${decoder_srcs}) 21 | target_link_libraries(decoder PUBLIC kaldi-decoder frontend 22 | post_processor utils) 23 | 24 | if(ANDROID) 25 | target_link_libraries(decoder PUBLIC ${PYTORCH_LIBRARY} ${FBJNI_LIBRARY}) 26 | else() 27 | if(TORCH) 28 | target_link_libraries(decoder PUBLIC ${TORCH_LIBRARIES}) 29 | endif() 30 | if(ONNX) 31 | target_link_libraries(decoder PUBLIC onnxruntime) 32 | endif() 33 | if(BPU) 34 | target_link_libraries(decoder PUBLIC bpu_asr_model) 35 | endif() 36 | if(XPU) 37 | target_link_libraries(decoder PUBLIC xpu_conformer) 38 | endif() 39 | if(OPENVINO) 40 | target_link_libraries(decoder PUBLIC ov_asr_model) 41 | endif() 42 | endif() 43 | -------------------------------------------------------------------------------- /runtime/core/frontend/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(frontend STATIC 2 | feature_pipeline.cc 3 | fft.cc 4 | ) 5 | target_link_libraries(frontend PUBLIC utils) -------------------------------------------------------------------------------- /runtime/core/frontend/fft.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Network 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef FRONTEND_FFT_H_ 16 | #define FRONTEND_FFT_H_ 17 | 18 | #ifndef M_PI 19 | #define M_PI 3.1415926535897932384626433832795 20 | #endif 21 | #ifndef M_2PI 22 | #define M_2PI 6.283185307179586476925286766559005 23 | #endif 24 | 25 | namespace wenet { 26 | 27 | // Fast Fourier Transform 28 | 29 | void make_sintbl(int n, float* sintbl); 30 | 31 | void make_bitrev(int n, int* bitrev); 32 | 33 | int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n); 34 | 35 | } // namespace wenet 36 | 37 | #endif // FRONTEND_FFT_H_ 38 | -------------------------------------------------------------------------------- /runtime/core/grpc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # compile wenet.proto 2 | set(PROTO_DIR "${CMAKE_CURRENT_SOURCE_DIR}") 3 | add_custom_command( 4 | OUTPUT ${PROTO_DIR}/wenet.pb.cc 5 | ${PROTO_DIR}/wenet.pb.h 6 | ${PROTO_DIR}/wenet.grpc.pb.cc 7 | ${PROTO_DIR}/wenet.grpc.pb.h 8 | COMMAND ${protobuf_BINARY_DIR}/protoc 9 | ARGS --grpc_out "${PROTO_DIR}" 10 | --cpp_out "${PROTO_DIR}" 11 | -I "${PROTO_DIR}" 12 | --plugin=protoc-gen-grpc=${grpc_BINARY_DIR}/grpc_cpp_plugin 13 | wenet.proto) 14 | 15 | # grpc_server/client 16 | link_directories(${protobuf_BINARY_DIR}/lib) 17 | add_library(wenet_grpc STATIC 18 | grpc_client.cc 19 | grpc_server.cc 20 | wenet.pb.cc 21 | wenet.grpc.pb.cc 22 | ) 23 | target_link_libraries(wenet_grpc PUBLIC grpc++ grpc++_reflection decoder) 24 | -------------------------------------------------------------------------------- /runtime/core/http/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(http STATIC 2 | http_client.cc 3 | http_server.cc 4 | ) 5 | target_link_libraries(http PUBLIC decoder) 6 | -------------------------------------------------------------------------------- /runtime/core/kaldi/README.md: -------------------------------------------------------------------------------- 1 | We use Kaldi decoder to implement TLG based language model integration, 2 | so we copied related files to this directory. 3 | The main changes are: 4 | 5 | 1. To minimize the change, we use the same directories tree as Kaldi. 6 | 7 | 2. We replace Kaldi log system with glog in the following way. 8 | 9 | ``` c++ 10 | #define KALDI_WARN \ 11 | google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream() 12 | #define KALDI_ERR \ 13 | google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream() 14 | #define KALDI_INFO \ 15 | google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream() 16 | #define KALDI_VLOG(v) VLOG(v) 17 | 18 | #define KALDI_ASSERT(condition) CHECK(condition) 19 | ``` 20 | 21 | 3. We lint all the files to satisfy the lint in WeNet. 22 | -------------------------------------------------------------------------------- /runtime/core/kaldi/fstext/fstext-lib.h: -------------------------------------------------------------------------------- 1 | // fstext/fstext-lib.h 2 | 3 | // Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (author: 4 | // Daniel Povey) 5 | 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | 21 | #ifndef KALDI_FSTEXT_FSTEXT_LIB_H_ 22 | #define KALDI_FSTEXT_FSTEXT_LIB_H_ 23 | 24 | #include "fst/fstlib.h" 25 | #include "fstext/determinize-lattice.h" 26 | #include "fstext/determinize-star.h" 27 | #include "fstext/fstext-utils.h" 28 | #include "fstext/kaldi-fst-io.h" 29 | #include "fstext/lattice-utils.h" 30 | #include "fstext/lattice-weight.h" 31 | #include "fstext/pre-determinize.h" 32 | #include "fstext/table-matcher.h" 33 | 34 | #endif // KALDI_FSTEXT_FSTEXT_LIB_H_ 35 | -------------------------------------------------------------------------------- /runtime/core/kaldi/lat/CPPLINT.cfg: -------------------------------------------------------------------------------- 1 | # So many lint errors now, we just ignore it now. 2 | # We will try to fix it in the future. 3 | exclude_files=.* 4 | -------------------------------------------------------------------------------- /runtime/core/kaldi/util/kaldi-io-inl.h: -------------------------------------------------------------------------------- 1 | // util/kaldi-io-inl.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | #ifndef KALDI_UTIL_KALDI_IO_INL_H_ 20 | #define KALDI_UTIL_KALDI_IO_INL_H_ 21 | 22 | #include 23 | 24 | namespace kaldi { 25 | 26 | bool Input::Open(const std::string& rxfilename, bool* binary) { 27 | return OpenInternal(rxfilename, true, binary); 28 | } 29 | 30 | bool Input::OpenTextMode(const std::string& rxfilename) { 31 | return OpenInternal(rxfilename, false, NULL); 32 | } 33 | 34 | bool Input::IsOpen() { return impl_ != NULL; } 35 | 36 | bool Output::IsOpen() { return impl_ != NULL; } 37 | 38 | } // end namespace kaldi. 39 | 40 | #endif // KALDI_UTIL_KALDI_IO_INL_H_ 41 | -------------------------------------------------------------------------------- /runtime/core/patch/CPPLINT.cfg: -------------------------------------------------------------------------------- 1 | exclude_files=.* 2 | -------------------------------------------------------------------------------- /runtime/core/patch/openfst/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | #-DHAVE_CONFIG_H -I./../include -fno-exceptions -funsigned-char -std=c++11 -MT symbol-table.lo -MD -MP -MF .deps/symbol-table.Tpo -c symbol-table.cc -fno-common -DPIC -o .libs/symbol-table.o 3 | 4 | include_directories(./include/) 5 | install(DIRECTORY include/ DESTINATION include/ 6 | FILES_MATCHING PATTERN "*.h") 7 | 8 | add_subdirectory(lib) 9 | 10 | if(HAVE_SCRIPT) 11 | add_subdirectory(script) 12 | endif(HAVE_SCRIPT) 13 | 14 | if(HAVE_BIN) 15 | add_subdirectory(bin) 16 | endif(HAVE_BIN) 17 | 18 | add_subdirectory(extensions) 19 | 20 | if(BUILD_TESTING) 21 | enable_testing() 22 | add_subdirectory(test) 23 | endif(BUILD_TESTING) 24 | -------------------------------------------------------------------------------- /runtime/core/post_processor/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(post_processor STATIC 2 | post_processor.cc 3 | ) 4 | target_link_libraries(post_processor PUBLIC utils wetext_processor wetext_utils) 5 | 6 | -------------------------------------------------------------------------------- /runtime/core/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | link_libraries(gtest_main gmock) 2 | 3 | add_executable(utils_test utils_test.cc) 4 | target_link_libraries(utils_test PUBLIC utils) 5 | add_test(UTILS_TEST utils_test) 6 | 7 | add_executable(ctc_prefix_beam_search_test ctc_prefix_beam_search_test.cc) 8 | target_link_libraries(ctc_prefix_beam_search_test PUBLIC decoder) 9 | add_test(CTC_PREFIX_BEAM_SEARCH_TEST ctc_prefix_beam_search_test) 10 | 11 | add_executable(post_processor_test post_processor_test.cc) 12 | target_link_libraries(post_processor_test PUBLIC post_processor) 13 | add_test(POST_PROCESSOR_TEST post_processor_test) 14 | 15 | 16 | add_executable(feature_pipeline_test feature_pipeline_test.cc) 17 | target_link_libraries(feature_pipeline_test PUBLIC frontend) 18 | add_test(FEATURE_PIPELINE_TEST feature_pipeline_test) -------------------------------------------------------------------------------- /runtime/core/test/utils_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Binbin Zhang (binbzha@qq.com) 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "utils/utils.h" 16 | 17 | #include 18 | 19 | #include "gmock/gmock.h" 20 | #include "gtest/gtest.h" 21 | 22 | TEST(UtilsTest, TopKTest) { 23 | using ::testing::ElementsAre; 24 | using ::testing::FloatNear; 25 | using ::testing::Pointwise; 26 | std::vector data = {1, 3, 5, 7, 9, 2, 4, 6, 8, 10}; 27 | std::vector values; 28 | std::vector indices; 29 | wenet::TopK(data, 3, &values, &indices); 30 | EXPECT_THAT(values, Pointwise(FloatNear(1e-8), {10, 9, 8})); 31 | ASSERT_THAT(indices, ElementsAre(9, 4, 8)); 32 | } 33 | -------------------------------------------------------------------------------- /runtime/core/toolchains/aarch64-linux-gnu.toolchain.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_SYSTEM_NAME Linux) 2 | SET (CMAKE_SYSTEM_PROCESSOR aarch64) 3 | 4 | set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc) 5 | set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++) 6 | -------------------------------------------------------------------------------- /runtime/core/utils/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(utils STATIC 2 | string.cc 3 | utils.cc 4 | ) 5 | 6 | if(NOT ANDROID) 7 | if(MSVC) 8 | target_link_libraries(utils PUBLIC fst) 9 | else() 10 | target_link_libraries(utils PUBLIC fst dl) 11 | endif() 12 | endif() -------------------------------------------------------------------------------- /runtime/core/utils/file.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Binbin Zhang (binbzha@qq.com) 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef UTILS_FILE_H_ 16 | #define UTILS_FILE_H_ 17 | 18 | #include 19 | #include 20 | 21 | namespace wenet { 22 | 23 | inline bool FileExists(const std::string& path) { 24 | std::ifstream f(path.c_str()); 25 | return f.good(); 26 | } 27 | 28 | } // namespace wenet 29 | 30 | #endif // UTILS_FILE_H_ 31 | -------------------------------------------------------------------------------- /runtime/core/utils/flags.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Mobvoi Inc (Binbin Zhang) 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef UTILS_FLAGS_H_ 16 | #define UTILS_FLAGS_H_ 17 | 18 | // Because openfst is a dynamic library compiled with gflags/glog, we must use 19 | // the gflags/glog from openfst to avoid them linked both statically and 20 | // dynamically into the executable. 21 | #include "fst/flags.h" 22 | 23 | #endif // UTILS_FLAGS_H_ 24 | -------------------------------------------------------------------------------- /runtime/core/utils/log.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Mobvoi Inc (Binbin Zhang) 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef UTILS_LOG_H_ 16 | #define UTILS_LOG_H_ 17 | 18 | // Because openfst is a dynamic library compiled with gflags/glog, we must use 19 | // the gflags/glog from openfst to avoid them linked both statically and 20 | // dynamically into the executable. 21 | #include "fst/log.h" 22 | 23 | #endif // UTILS_LOG_H_ 24 | -------------------------------------------------------------------------------- /runtime/core/utils/timer.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Mobvoi Inc (Binbin Zhang) 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef UTILS_TIMER_H_ 16 | #define UTILS_TIMER_H_ 17 | 18 | #include 19 | 20 | namespace wenet { 21 | 22 | class Timer { 23 | public: 24 | Timer() : time_start_(std::chrono::steady_clock::now()) {} 25 | void Reset() { time_start_ = std::chrono::steady_clock::now(); } 26 | // return int in milliseconds 27 | int Elapsed() const { 28 | auto time_now = std::chrono::steady_clock::now(); 29 | return std::chrono::duration_cast(time_now - 30 | time_start_) 31 | .count(); 32 | } 33 | 34 | private: 35 | std::chrono::time_point time_start_; 36 | }; 37 | } // namespace wenet 38 | 39 | #endif // UTILS_TIMER_H_ 40 | -------------------------------------------------------------------------------- /runtime/core/utils/utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 Mobvoi Inc (Binbin Zhang) 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef UTILS_UTILS_H_ 16 | #define UTILS_UTILS_H_ 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | namespace wenet { 23 | 24 | #define WENET_DISALLOW_COPY_AND_ASSIGN(Type) \ 25 | Type(const Type&) = delete; \ 26 | Type& operator=(const Type&) = delete; 27 | 28 | const float kFloatMax = std::numeric_limits::max(); 29 | // kSpaceSymbol in UTF-8 is: ▁ 30 | const char kSpaceSymbol[] = "\xe2\x96\x81"; 31 | 32 | // Return the sum of two probabilities in log scale 33 | float LogAdd(float x, float y); 34 | 35 | template 36 | void TopK(const std::vector& data, int32_t k, std::vector* values, 37 | std::vector* indices); 38 | 39 | } // namespace wenet 40 | 41 | #endif // UTILS_UTILS_H_ 42 | -------------------------------------------------------------------------------- /runtime/core/websocket/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(websocket STATIC 2 | websocket_client.cc 3 | websocket_server.cc 4 | ) 5 | target_link_libraries(websocket PUBLIC decoder) 6 | -------------------------------------------------------------------------------- /runtime/gpu/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tensorrt_fastertransformer/FasterTransformer"] 2 | path = tensorrt_fastertransformer/FasterTransformer 3 | url = https://github.com/NVIDIA/FasterTransformer.git 4 | -------------------------------------------------------------------------------- /runtime/gpu/Dockerfile/Dockerfile.client: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/tritonserver:23.01-py3-sdk 2 | LABEL maintainer="NVIDIA" 3 | LABEL repository="tritonserver" 4 | 5 | RUN apt-get update && apt-get install -y libsndfile1 6 | RUN pip3 install soundfile 7 | WORKDIR /workspace 8 | -------------------------------------------------------------------------------- /runtime/gpu/Dockerfile/Dockerfile.server: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/tritonserver:23.01-py3 2 | LABEL maintainer="NVIDIA" 3 | LABEL repository="tritonserver" 4 | 5 | RUN apt-get update && apt-get -y install swig && apt-get -y install python3-dev && apt-get install -y cmake 6 | RUN pip3 install torch torchaudio 7 | RUN pip3 install -v kaldifeat pyyaml onnx 8 | 9 | WORKDIR /workspace 10 | RUN git clone https://github.com/Slyne/ctc_decoder.git && cd ctc_decoder/swig && bash setup.sh 11 | COPY ./scripts scripts 12 | -------------------------------------------------------------------------------- /runtime/gpu/Overview.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/Overview.JPG -------------------------------------------------------------------------------- /runtime/gpu/client/test_wavs/long.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/client/test_wavs/long.wav -------------------------------------------------------------------------------- /runtime/gpu/client/test_wavs/mid.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/client/test_wavs/mid.wav -------------------------------------------------------------------------------- /runtime/gpu/cuda_decoders/model_repo_cuda_decoder/attention_rescoring/1/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_cuda_decoder/attention_rescoring/1/.gitkeep -------------------------------------------------------------------------------- /runtime/gpu/cuda_decoders/model_repo_cuda_decoder/decoder/1/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_cuda_decoder/decoder/1/.gitkeep -------------------------------------------------------------------------------- /runtime/gpu/cuda_decoders/model_repo_cuda_decoder/encoder/1/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_cuda_decoder/encoder/1/.gitkeep -------------------------------------------------------------------------------- /runtime/gpu/cuda_decoders/model_repo_cuda_decoder/scoring/1/lang/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_cuda_decoder/scoring/1/lang/.gitkeep -------------------------------------------------------------------------------- /runtime/gpu/cuda_decoders/model_repo_cuda_decoder/scoring/1/wfst_decoding_config.yaml: -------------------------------------------------------------------------------- 1 | acoustic_scale: 10.0 2 | n_input_per_chunk: 50 3 | default_beam: 17.0 4 | lattice_beam: 4.0 5 | max_active: 7000 6 | determinize_lattice: True 7 | max_batch_size: 200 8 | num_channels: 400 9 | frame_shift_seconds: 0.04 10 | lm_scale: 5.0 11 | word_ins_penalty: 0.0 12 | -------------------------------------------------------------------------------- /runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/encoder/1/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/encoder/1/.gitignore -------------------------------------------------------------------------------- /runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/encoder/1/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/encoder/1/.gitkeep -------------------------------------------------------------------------------- /runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/scoring/1/lang/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/scoring/1/lang/.gitkeep -------------------------------------------------------------------------------- /runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/scoring/1/wfst_decoding_config.yaml: -------------------------------------------------------------------------------- 1 | acoustic_scale: 10.0 2 | n_input_per_chunk: 50 3 | default_beam: 17.0 4 | lattice_beam: 4.0 5 | max_active: 7000 6 | determinize_lattice: True 7 | max_batch_size: 200 8 | num_channels: 400 9 | frame_shift_seconds: 0.04 10 | lm_scale: 5.0 11 | word_ins_penalty: 0.0 12 | -------------------------------------------------------------------------------- /runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/streaming_wenet/1/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/streaming_wenet/1/.gitignore -------------------------------------------------------------------------------- /runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/streaming_wenet/1/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/streaming_wenet/1/.gitkeep -------------------------------------------------------------------------------- /runtime/gpu/cuda_decoders/requirements.txt: -------------------------------------------------------------------------------- 1 | riva-asrlib-decoder==0.4.0 2 | onnxmltools 3 | -------------------------------------------------------------------------------- /runtime/gpu/model_repo/attention_rescoring/1/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/model_repo/attention_rescoring/1/.gitignore -------------------------------------------------------------------------------- /runtime/gpu/model_repo/decoder/1/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/model_repo/decoder/1/.gitignore -------------------------------------------------------------------------------- /runtime/gpu/model_repo/encoder/1/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/model_repo/encoder/1/.gitignore -------------------------------------------------------------------------------- /runtime/gpu/model_repo_stateful/decoder/1/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/model_repo_stateful/decoder/1/.gitignore -------------------------------------------------------------------------------- /runtime/gpu/model_repo_stateful/encoder/1/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/model_repo_stateful/encoder/1/.gitignore -------------------------------------------------------------------------------- /runtime/gpu/model_repo_stateful/streaming_wenet/1/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/model_repo_stateful/streaming_wenet/1/.gitignore -------------------------------------------------------------------------------- /runtime/gpu/scripts/convert_start_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | onnx_model_dir=/ws/onnx_model 19 | model_repo=/ws/model_repo 20 | 21 | # Convert config.pbtxt in model_repo and move models 22 | python3 scripts/convert.py --config=$onnx_model_dir/train.yaml --vocab=$onnx_model_dir/words.txt \ 23 | --model_repo=$model_repo --onnx_model_dir=$onnx_model_dir 24 | 25 | # Start server 26 | tritonserver --model-repository=${model_repo} --pinned-memory-pool-byte-size=1024000000 --cuda-memory-pool-byte-size=0:1024000000 27 | -------------------------------------------------------------------------------- /runtime/gpu/tensorrt/LayerNormPlugin/Makefile: -------------------------------------------------------------------------------- 1 | CUDA_PATH = /usr/local/cuda 2 | TRT_PATH = /usr/lib/x86_64-linux-gnu 3 | NVCC = $(CUDA_PATH)/bin/nvcc 4 | #SM = 61 5 | # 61 for GTX1070, 75 for T4,80 for A30 6 | GENCODE = -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 7 | CUFLAG = -w -std=c++14 -O3 -UDEBUG -Xcompiler -fPIC $(GENCODE) 8 | CPPFLAG = -w -std=c++14 -O3 -use_fast_math 9 | SOFLAG = $(CUFLAG) -shared 10 | INCLUDE = -I. -I$(CUDA_PATH)/include 11 | LDFLAG = -L$(CUDA_PATH)/lib64 -lcudart -lcublas -lcublasLt -L$(TRT_PATH)/lib -lnvinfer 12 | 13 | SRC_CU = $(shell find ./ -name '*.cu') 14 | 15 | all: LayerNorm.so 16 | 17 | %.o: %.cu 18 | $(NVCC) $(CUFLAG) $(INCLUDE) -o $@ -c $< 19 | 20 | LayerNorm.so: $(SRC_CU:.cu=.o) 21 | $(NVCC) $(SOFLAG) $(LDFLAG) -o $@ $^ 22 | 23 | .PHONY: clean 24 | clean: 25 | rm -rf ./*.so ./*.o ./*.d ./*.trt 26 | 27 | .PHONY: test 28 | test: 29 | clear 30 | python testLayerNormPlugin.py 31 | 32 | -------------------------------------------------------------------------------- /runtime/gpu/tensorrt/README.md: -------------------------------------------------------------------------------- 1 | ### Using Tensorrt for Triton ASR Server 2 | 3 | ```sh 4 | # using docker image runtime/gpu/Dockerfile/Dockerfile.server 5 | docker pull soar97/triton-wenet:22.12 6 | docker run -it --rm --name "wenet_trt_test" --gpus all --shm-size 1g --net host soar97/triton-wenet:22.12 7 | # inside the docker container 8 | git clone https://github.com/wenet-e2e/wenet.git 9 | cd wenet/runtime/gpu/tensorrt 10 | pip3 install nvidia-pyindex 11 | # Use pip3 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple if you encounter network issue 12 | pip3 install -r requirements.txt 13 | 14 | bash run_streaming_small_model.sh 15 | ``` 16 | 17 | #### Performance of Small u2pp Model for Streaming ASR 18 | 19 | Benchmark(small u2pp onnx) based on Aishell1 test set with server-A10 (16vCPU 60GB Memory)/client(4vCPU 16GB Memory), the total audio duration is 36108.919 seconds. 20 | 21 | (Note: using non-simulate-streaming mode) 22 | |concurrent-tasks | processing time(s) | 23 | |----------|--------------------| 24 | | 20 (onnx fp16) | 123.796 | 25 | | 40 (onnx fp16) | 84.557 | 26 | | 60 (onnx fp16) | 73.232 | 27 | | 80 (onnx fp16) | 66.862 | 28 | | 20 (trt fp16+layernorm plugin)| 90.582 | 29 | | 40 (trt fp16+layernorm plugin)| 75.411 | 30 | | 60 (trt fp16+layernorm plugin)| 69.602 | 31 | | 80 (trt fp16+layernorm plugin)| 65.603 | -------------------------------------------------------------------------------- /runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/1/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/1/.gitignore -------------------------------------------------------------------------------- /runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/1/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/1/.gitkeep -------------------------------------------------------------------------------- /runtime/gpu/tensorrt/model_repo_stateful_trt/encoder/1/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt/model_repo_stateful_trt/encoder/1/.gitignore -------------------------------------------------------------------------------- /runtime/gpu/tensorrt/model_repo_stateful_trt/encoder/1/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt/model_repo_stateful_trt/encoder/1/.gitkeep -------------------------------------------------------------------------------- /runtime/gpu/tensorrt/model_repo_stateful_trt/streaming_wenet/1/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt/model_repo_stateful_trt/streaming_wenet/1/.gitignore -------------------------------------------------------------------------------- /runtime/gpu/tensorrt/model_repo_stateful_trt/streaming_wenet/1/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt/model_repo_stateful_trt/streaming_wenet/1/.gitkeep -------------------------------------------------------------------------------- /runtime/gpu/tensorrt/requirements.txt: -------------------------------------------------------------------------------- 1 | nvidia-pyindex 2 | tensorrt==8.5.1.7 3 | onnx 4 | onnxruntime-gpu 5 | onnx_graphsurgeon>=0.3.21 --index-url https://pypi.ngc.nvidia.com 6 | polygraphy 7 | cuda-python 8 | onnxmltools 9 | -------------------------------------------------------------------------------- /runtime/gpu/tensorrt_fastertransformer/decoder_plugin.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt_fastertransformer/decoder_plugin.JPG -------------------------------------------------------------------------------- /runtime/gpu/tensorrt_fastertransformer/encoder_plugin.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt_fastertransformer/encoder_plugin.JPG -------------------------------------------------------------------------------- /runtime/gpu/tensorrt_fastertransformer/model_repo_ft/decoder/1/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/decoder/1/.gitkeep -------------------------------------------------------------------------------- /runtime/gpu/tensorrt_fastertransformer/model_repo_ft/encoder/1/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/encoder/1/.gitkeep -------------------------------------------------------------------------------- /runtime/gpu/tensorrt_fastertransformer/requirements.txt: -------------------------------------------------------------------------------- 1 | onnx 2 | nvidia-pyindex 3 | onnx-graphsurgeon 4 | cuda-python 5 | onnxruntime-gpu 6 | onnxmltools 7 | -------------------------------------------------------------------------------- /runtime/gpu/test.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/test.gif -------------------------------------------------------------------------------- /runtime/horizonbpu/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | fc_base/ 3 | wheels* 4 | -------------------------------------------------------------------------------- /runtime/horizonbpu/api: -------------------------------------------------------------------------------- 1 | ../core/api -------------------------------------------------------------------------------- /runtime/horizonbpu/bin: -------------------------------------------------------------------------------- 1 | ../core/bin -------------------------------------------------------------------------------- /runtime/horizonbpu/bpu/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | message("cmake build type is ${CMAKE_BUILD_TYPE} .") 2 | 3 | if(BPU) 4 | list(APPEND bpu_asr_model_srcs ./bpu_asr_model.cc) 5 | message(STATUS "Use src_files: [ ${bpu_asr_model_srcs} ] to compile bpu_asr_model .") 6 | 7 | # compile bpu_asr_model 8 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) 9 | add_library(bpu_asr_model STATIC ${bpu_asr_model_srcs}) 10 | target_link_libraries(bpu_asr_model PUBLIC easy_dnn dnn) 11 | endif() 12 | -------------------------------------------------------------------------------- /runtime/horizonbpu/cmake: -------------------------------------------------------------------------------- 1 | ../core/cmake -------------------------------------------------------------------------------- /runtime/horizonbpu/decoder: -------------------------------------------------------------------------------- 1 | ../core/decoder -------------------------------------------------------------------------------- /runtime/horizonbpu/frontend: -------------------------------------------------------------------------------- 1 | ../core/frontend -------------------------------------------------------------------------------- /runtime/horizonbpu/kaldi: -------------------------------------------------------------------------------- 1 | ../core/kaldi -------------------------------------------------------------------------------- /runtime/horizonbpu/patch: -------------------------------------------------------------------------------- 1 | ../core/patch -------------------------------------------------------------------------------- /runtime/horizonbpu/post_processor: -------------------------------------------------------------------------------- 1 | ../core/post_processor -------------------------------------------------------------------------------- /runtime/horizonbpu/test: -------------------------------------------------------------------------------- 1 | ../core/test -------------------------------------------------------------------------------- /runtime/horizonbpu/toolchains: -------------------------------------------------------------------------------- 1 | ../core/toolchains -------------------------------------------------------------------------------- /runtime/horizonbpu/utils: -------------------------------------------------------------------------------- 1 | ../core/utils -------------------------------------------------------------------------------- /runtime/horizonbpu/websocket: -------------------------------------------------------------------------------- 1 | ../core/websocket -------------------------------------------------------------------------------- /runtime/ios/WenetDemo/WenetDemo.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /runtime/ios/WenetDemo/WenetDemo.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEDidComputeMac32BitWarning 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /runtime/ios/WenetDemo/WenetDemo/Assets.xcassets/AccentColor.colorset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "colors" : [ 3 | { 4 | "idiom" : "universal" 5 | } 6 | ], 7 | "info" : { 8 | "author" : "xcode", 9 | "version" : 1 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /runtime/ios/WenetDemo/WenetDemo/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "universal", 5 | "platform" : "ios", 6 | "size" : "1024x1024" 7 | } 8 | ], 9 | "info" : { 10 | "author" : "xcode", 11 | "version" : 1 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /runtime/ios/WenetDemo/WenetDemo/Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "author" : "xcode", 4 | "version" : 1 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /runtime/ios/WenetDemo/WenetDemo/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | UIApplicationSceneManifest 6 | 7 | UIApplicationSupportsMultipleScenes 8 | 9 | UISceneConfigurations 10 | 11 | UIWindowSceneSessionRoleApplication 12 | 13 | 14 | UISceneConfigurationName 15 | Default Configuration 16 | UISceneDelegateClassName 17 | $(PRODUCT_MODULE_NAME).SceneDelegate 18 | UISceneStoryboardFile 19 | Main 20 | 21 | 22 | 23 | 24 | NSMicrophoneUsageDescription 25 | Need microphone access for recording speech 26 | 27 | 28 | -------------------------------------------------------------------------------- /runtime/ios/WenetDemo/WenetDemo/model/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/ios/WenetDemo/WenetDemo/model/.gitkeep -------------------------------------------------------------------------------- /runtime/ios/WenetDemo/WenetDemo/wenet/WenetDemo-Bridging-Header.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Dan Ma (1067837450@qq.com) 2 | // 3 | // Use this file to import your target's public headers 4 | // that you would like to expose to Swift. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | 18 | #ifndef RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENETDEMO_BRIDGING_HEADER_H_ 19 | #define RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENETDEMO_BRIDGING_HEADER_H_ 20 | 21 | #import "wenet.h" 22 | 23 | #endif // RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENETDEMO_BRIDGING_HEADER_H_ 24 | -------------------------------------------------------------------------------- /runtime/ios/WenetDemo/WenetDemo/wenet/wenet.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2022 Dan Ma (1067837450@qq.com) 2 | // 3 | // wenet.h 4 | // WenetDemo 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | 18 | #ifndef RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENET_H_ 19 | #define RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENET_H_ 20 | 21 | #include 22 | 23 | #import 24 | 25 | @interface Wenet : NSObject 26 | 27 | - (nullable instancetype)initWithModelPath: 28 | (NSString*)modelPath DictPath:(NSString*)dictPath; // NOLINT 29 | 30 | - (void)reset; 31 | 32 | - (void)acceptWaveForm: (float*)pcm: (int)size; // NOLINT 33 | 34 | - (void)decode; 35 | 36 | - (NSString*)get_result; // NOLINT 37 | 38 | @end 39 | 40 | #endif // RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENET_H_ 41 | -------------------------------------------------------------------------------- /runtime/ios/build/Podfile: -------------------------------------------------------------------------------- 1 | platform :ios, '14.3' 2 | pod 'LibTorch', '~>1.11.0' 3 | -------------------------------------------------------------------------------- /runtime/ios/cmake: -------------------------------------------------------------------------------- 1 | ../core/cmake -------------------------------------------------------------------------------- /runtime/ios/decoder: -------------------------------------------------------------------------------- 1 | ../core/decoder -------------------------------------------------------------------------------- /runtime/ios/frontend: -------------------------------------------------------------------------------- 1 | ../core/frontend -------------------------------------------------------------------------------- /runtime/ios/kaldi: -------------------------------------------------------------------------------- 1 | ../core/kaldi -------------------------------------------------------------------------------- /runtime/ios/patch: -------------------------------------------------------------------------------- 1 | ../core/patch -------------------------------------------------------------------------------- /runtime/ios/post_processor: -------------------------------------------------------------------------------- 1 | ../core/post_processor -------------------------------------------------------------------------------- /runtime/ios/test: -------------------------------------------------------------------------------- 1 | ../core/test -------------------------------------------------------------------------------- /runtime/ios/toolchains: -------------------------------------------------------------------------------- 1 | ../core/toolchains -------------------------------------------------------------------------------- /runtime/ios/utils: -------------------------------------------------------------------------------- 1 | ../core/utils -------------------------------------------------------------------------------- /runtime/ipex/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | fc_base/ 3 | -------------------------------------------------------------------------------- /runtime/ipex/api: -------------------------------------------------------------------------------- 1 | ../core/api -------------------------------------------------------------------------------- /runtime/ipex/bin: -------------------------------------------------------------------------------- 1 | ../core/bin -------------------------------------------------------------------------------- /runtime/ipex/cmake: -------------------------------------------------------------------------------- 1 | ../core/cmake -------------------------------------------------------------------------------- /runtime/ipex/decoder: -------------------------------------------------------------------------------- 1 | ../core/decoder -------------------------------------------------------------------------------- /runtime/ipex/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | RUN apt-get update && apt-get install -y git cmake wget build-essential python-is-python3 python3-pip google-perftools 5 | RUN pip install torch==2.3.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cpu 6 | RUN pip install intel_extension_for_pytorch==2.3.0 pyyaml six intel-openmp 7 | RUN ln -s /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4 /usr/lib/x86_64-linux-gnu/libtcmalloc.so 8 | 9 | RUN git clone https://github.com/wenet-e2e/wenet.git /home/wenet 10 | ENV OMP_NUM_THREADS=1 11 | ARG src=/home/wenet/runtime/ipex 12 | RUN cmake -B $src/build -S $src -DCMAKE_BUILD_TYPE=Release && cmake --build $src/build -j32 13 | ENV LD_LIBRARY_PATH=$src/fc_base/libtorch-src/lib/:$LD_LIBRARY_PATH 14 | WORKDIR /home/wenet/ 15 | -------------------------------------------------------------------------------- /runtime/ipex/frontend: -------------------------------------------------------------------------------- 1 | ../core/frontend -------------------------------------------------------------------------------- /runtime/ipex/grpc: -------------------------------------------------------------------------------- 1 | ../core/grpc -------------------------------------------------------------------------------- /runtime/ipex/http: -------------------------------------------------------------------------------- 1 | ../core/http -------------------------------------------------------------------------------- /runtime/ipex/kaldi: -------------------------------------------------------------------------------- 1 | ../core/kaldi -------------------------------------------------------------------------------- /runtime/ipex/patch: -------------------------------------------------------------------------------- 1 | ../core/patch -------------------------------------------------------------------------------- /runtime/ipex/post_processor: -------------------------------------------------------------------------------- 1 | ../core/post_processor -------------------------------------------------------------------------------- /runtime/ipex/test: -------------------------------------------------------------------------------- 1 | ../core/test -------------------------------------------------------------------------------- /runtime/ipex/utils: -------------------------------------------------------------------------------- 1 | ../core/utils -------------------------------------------------------------------------------- /runtime/ipex/web: -------------------------------------------------------------------------------- 1 | ../libtorch/web -------------------------------------------------------------------------------- /runtime/ipex/websocket: -------------------------------------------------------------------------------- 1 | ../core/websocket -------------------------------------------------------------------------------- /runtime/kunlun/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | fc_base/ 3 | -------------------------------------------------------------------------------- /runtime/kunlun/api: -------------------------------------------------------------------------------- 1 | ../core/api -------------------------------------------------------------------------------- /runtime/kunlun/bin: -------------------------------------------------------------------------------- 1 | ../core/bin -------------------------------------------------------------------------------- /runtime/kunlun/cmake: -------------------------------------------------------------------------------- 1 | ../core/cmake -------------------------------------------------------------------------------- /runtime/kunlun/decoder: -------------------------------------------------------------------------------- 1 | ../core/decoder -------------------------------------------------------------------------------- /runtime/kunlun/frontend: -------------------------------------------------------------------------------- 1 | ../core/frontend -------------------------------------------------------------------------------- /runtime/kunlun/grpc: -------------------------------------------------------------------------------- 1 | ../core/grpc -------------------------------------------------------------------------------- /runtime/kunlun/kaldi: -------------------------------------------------------------------------------- 1 | ../core/kaldi -------------------------------------------------------------------------------- /runtime/kunlun/patch: -------------------------------------------------------------------------------- 1 | ../core/patch -------------------------------------------------------------------------------- /runtime/kunlun/post_processor: -------------------------------------------------------------------------------- 1 | ../core/post_processor -------------------------------------------------------------------------------- /runtime/kunlun/test: -------------------------------------------------------------------------------- 1 | ../core/test -------------------------------------------------------------------------------- /runtime/kunlun/utils: -------------------------------------------------------------------------------- 1 | ../core/utils -------------------------------------------------------------------------------- /runtime/kunlun/websocket: -------------------------------------------------------------------------------- 1 | ../core/websocket -------------------------------------------------------------------------------- /runtime/kunlun/xpu/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | message("cmake build type is ${CMAKE_BUILD_TYPE} .") 2 | 3 | if(XPU) 4 | list(APPEND xpu_conformer_srcs ./xpu_asr_model.cc) 5 | list(APPEND xpu_conformer_srcs ./xpu_conformer.cpp) 6 | list(APPEND xpu_conformer_srcs ./xpu_util.cpp) 7 | message(STATUS "Use src_files: [ ${xpu_conformer_srcs} ] to compile xpu_conformer.a .") 8 | 9 | # compile xpu_conformer.a 10 | add_library(xpu_conformer STATIC ${xpu_conformer_srcs}) 11 | target_link_libraries(xpu_conformer PUBLIC xpuapi xpurt) 12 | endif() 13 | 14 | set(CMAKE_VERBOSE_MAKEFILE OFF) 15 | 16 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") 17 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2") 18 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive") 19 | set(CMAKE_EXE_LINKER_FLAGS "-lpthread -lrt -lm -ldl") 20 | 21 | set(SRC_FILES ./conformer_test.cpp ./xpu_conformer.cpp ./xpu_util.cpp) 22 | message(STATUS "Use src_files: [ ${SRC_FILES} ] to compile xpu_conformer_test.") 23 | 24 | add_executable(xpu_conformer_test ${SRC_FILES}) 25 | target_link_libraries(xpu_conformer_test -lxpuapi -lxpurt) 26 | -------------------------------------------------------------------------------- /runtime/libtorch/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | fc_base/ 3 | -------------------------------------------------------------------------------- /runtime/libtorch/api: -------------------------------------------------------------------------------- 1 | ../core/api -------------------------------------------------------------------------------- /runtime/libtorch/bin: -------------------------------------------------------------------------------- 1 | ../core/bin -------------------------------------------------------------------------------- /runtime/libtorch/cmake: -------------------------------------------------------------------------------- 1 | ../core/cmake -------------------------------------------------------------------------------- /runtime/libtorch/decoder: -------------------------------------------------------------------------------- 1 | ../core/decoder -------------------------------------------------------------------------------- /runtime/libtorch/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:latest 2 | MAINTAINER 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | RUN apt-get update && apt-get install -y git cmake wget build-essential 5 | RUN git clone https://github.com/wenet-e2e/wenet.git /home/wenet 6 | ARG model=20210618_u2pp_conformer_libtorch.tar.gz 7 | RUN wget -P /home https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell2/$model 8 | RUN tar -xzf /home/$model -C /home 9 | ARG src=/home/wenet/runtime/libtorch 10 | RUN cmake -B $src/build -S $src -DCMAKE_BUILD_TYPE=Release -DGRAPH_TOOLS=ON && cmake --build $src/build 11 | -------------------------------------------------------------------------------- /runtime/libtorch/frontend: -------------------------------------------------------------------------------- 1 | ../core/frontend -------------------------------------------------------------------------------- /runtime/libtorch/grpc: -------------------------------------------------------------------------------- 1 | ../core/grpc -------------------------------------------------------------------------------- /runtime/libtorch/http: -------------------------------------------------------------------------------- 1 | ../core/http/ -------------------------------------------------------------------------------- /runtime/libtorch/kaldi: -------------------------------------------------------------------------------- 1 | ../core/kaldi -------------------------------------------------------------------------------- /runtime/libtorch/patch: -------------------------------------------------------------------------------- 1 | ../core/patch -------------------------------------------------------------------------------- /runtime/libtorch/post_processor: -------------------------------------------------------------------------------- 1 | ../core/post_processor -------------------------------------------------------------------------------- /runtime/libtorch/test: -------------------------------------------------------------------------------- 1 | ../core/test -------------------------------------------------------------------------------- /runtime/libtorch/utils: -------------------------------------------------------------------------------- 1 | ../core/utils -------------------------------------------------------------------------------- /runtime/libtorch/web/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2021 Mobvoi Inc. All Rights Reserved. 5 | # Author: zhendong.peng@mobvoi.com (Zhendong Peng) 6 | 7 | import argparse 8 | 9 | from flask import Flask, render_template 10 | 11 | parser = argparse.ArgumentParser(description='training your network') 12 | parser.add_argument('--port', default=19999, type=int, help='port id') 13 | args = parser.parse_args() 14 | 15 | app = Flask(__name__) 16 | 17 | 18 | @app.route('/') 19 | def index(): 20 | return render_template('index.html') 21 | 22 | 23 | if __name__ == '__main__': 24 | app.run(host='0.0.0.0', port=args.port, debug=True) 25 | -------------------------------------------------------------------------------- /runtime/libtorch/web/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/favicon.ico -------------------------------------------------------------------------------- /runtime/libtorch/web/static/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /runtime/libtorch/web/static/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /runtime/libtorch/web/static/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /runtime/libtorch/web/static/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /runtime/libtorch/web/static/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /runtime/libtorch/web/static/image/qrcode-enterprise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/image/qrcode-enterprise.png -------------------------------------------------------------------------------- /runtime/libtorch/web/static/image/qrcode-official-account.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/image/qrcode-official-account.png -------------------------------------------------------------------------------- /runtime/libtorch/web/static/image/voice-pic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/image/voice-pic.png -------------------------------------------------------------------------------- /runtime/libtorch/web/static/js/recorder/engine/pcm.js: -------------------------------------------------------------------------------- 1 | /* 2 | 录音 3 | https://github.com/xiangyuecn/Recorder 4 | src: engine/pcm.js 5 | */ 6 | !function(){"use strict";Recorder.prototype.enc_pcm={stable:!0,testmsg:"pcm为未封装的原始音频数据,pcm数据文件无法直接播放;支持位数8位、16位(填在比特率里面),采样率取值无限制"},Recorder.prototype.pcm=function(e,t,r){var a=this.set,n=e.length,o=8==a.bitRate?8:16,c=new ArrayBuffer(n*(o/8)),s=new DataView(c),l=0;if(8==o)for(var p=0;p>8);s.setInt8(l,i,!0)}else for(p=0;p>8);c.setInt8(u,d,!0)}else for(p=0;p>>=1;m[o]=f}var n,u=2*Math.PI/v;for(o=(v>>1)-1;0>1;var c=new Float64Array(t);for(n=-(u=d),o=t;0!=o;o--)e=l[o],h=F[o],c[o-1]=n&1 | tee log.txt 41 | ``` 42 | -------------------------------------------------------------------------------- /runtime/onnxruntime/api: -------------------------------------------------------------------------------- 1 | ../core/api -------------------------------------------------------------------------------- /runtime/onnxruntime/bin: -------------------------------------------------------------------------------- 1 | ../core/bin -------------------------------------------------------------------------------- /runtime/onnxruntime/cmake: -------------------------------------------------------------------------------- 1 | ../core/cmake -------------------------------------------------------------------------------- /runtime/onnxruntime/decoder: -------------------------------------------------------------------------------- 1 | ../core/decoder -------------------------------------------------------------------------------- /runtime/onnxruntime/frontend: -------------------------------------------------------------------------------- 1 | ../core/frontend -------------------------------------------------------------------------------- /runtime/onnxruntime/grpc: -------------------------------------------------------------------------------- 1 | ../core/grpc -------------------------------------------------------------------------------- /runtime/onnxruntime/kaldi: -------------------------------------------------------------------------------- 1 | ../core/kaldi -------------------------------------------------------------------------------- /runtime/onnxruntime/patch: -------------------------------------------------------------------------------- 1 | ../core/patch -------------------------------------------------------------------------------- /runtime/onnxruntime/post_processor: -------------------------------------------------------------------------------- 1 | ../core/post_processor -------------------------------------------------------------------------------- /runtime/onnxruntime/test: -------------------------------------------------------------------------------- 1 | ../core/test -------------------------------------------------------------------------------- /runtime/onnxruntime/utils: -------------------------------------------------------------------------------- 1 | ../core/utils -------------------------------------------------------------------------------- /runtime/onnxruntime/websocket: -------------------------------------------------------------------------------- 1 | ../core/websocket -------------------------------------------------------------------------------- /runtime/openvino/api: -------------------------------------------------------------------------------- 1 | ../core/api -------------------------------------------------------------------------------- /runtime/openvino/bin: -------------------------------------------------------------------------------- 1 | ../core/bin -------------------------------------------------------------------------------- /runtime/openvino/cmake: -------------------------------------------------------------------------------- 1 | ../core/cmake -------------------------------------------------------------------------------- /runtime/openvino/decoder: -------------------------------------------------------------------------------- 1 | ../core/decoder -------------------------------------------------------------------------------- /runtime/openvino/frontend: -------------------------------------------------------------------------------- 1 | ../core/frontend -------------------------------------------------------------------------------- /runtime/openvino/kaldi: -------------------------------------------------------------------------------- 1 | ../core/kaldi -------------------------------------------------------------------------------- /runtime/openvino/ov/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | message("cmake build type is ${CMAKE_BUILD_TYPE} .") 2 | 3 | if(OPENVINO) 4 | list(APPEND ov_asr_model_srcs ./ov_asr_model.cc) 5 | message(STATUS "Use src_files: [ ${ov_asr_model_srcs} ] to compile ov_asr_model .") 6 | 7 | add_library(ov_asr_model STATIC ${ov_asr_model_srcs}) 8 | include(openvino) 9 | set(OpenVINO_DIR ${openvino_SOURCE_DIR}/runtime/cmake) 10 | set(TBB_DIR ${openvino_SOURCE_DIR}/runtime/3rdparty/tbb/cmake) 11 | find_package(OpenVINO REQUIRED) 12 | find_package(TBB REQUIRED) 13 | if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") 14 | target_link_libraries(ov_asr_model PUBLIC openvino::runtime) 15 | else() 16 | target_link_libraries(ov_asr_model PUBLIC openvino openvino_intel_cpu_plugin) 17 | endif() 18 | endif() -------------------------------------------------------------------------------- /runtime/openvino/patch: -------------------------------------------------------------------------------- 1 | ../core/patch -------------------------------------------------------------------------------- /runtime/openvino/post_processor: -------------------------------------------------------------------------------- 1 | ../core/post_processor -------------------------------------------------------------------------------- /runtime/openvino/test: -------------------------------------------------------------------------------- 1 | ../core/test -------------------------------------------------------------------------------- /runtime/openvino/utils: -------------------------------------------------------------------------------- 1 | ../core/utils -------------------------------------------------------------------------------- /runtime/openvino/websocket: -------------------------------------------------------------------------------- 1 | ../core/websocket -------------------------------------------------------------------------------- /runtime/raspberrypi/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | fc_base/ 3 | -------------------------------------------------------------------------------- /runtime/raspberrypi/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ../libtorch/CMakeLists.txt -------------------------------------------------------------------------------- /runtime/raspberrypi/api: -------------------------------------------------------------------------------- 1 | ../core/api -------------------------------------------------------------------------------- /runtime/raspberrypi/bin: -------------------------------------------------------------------------------- 1 | ../core/bin -------------------------------------------------------------------------------- /runtime/raspberrypi/cmake: -------------------------------------------------------------------------------- 1 | ../core/cmake -------------------------------------------------------------------------------- /runtime/raspberrypi/decoder: -------------------------------------------------------------------------------- 1 | ../core/decoder -------------------------------------------------------------------------------- /runtime/raspberrypi/frontend: -------------------------------------------------------------------------------- 1 | ../core/frontend -------------------------------------------------------------------------------- /runtime/raspberrypi/kaldi: -------------------------------------------------------------------------------- 1 | ../core/kaldi -------------------------------------------------------------------------------- /runtime/raspberrypi/patch: -------------------------------------------------------------------------------- 1 | ../core/patch -------------------------------------------------------------------------------- /runtime/raspberrypi/post_processor: -------------------------------------------------------------------------------- 1 | ../core/post_processor -------------------------------------------------------------------------------- /runtime/raspberrypi/test: -------------------------------------------------------------------------------- 1 | ../core/test -------------------------------------------------------------------------------- /runtime/raspberrypi/toolchains: -------------------------------------------------------------------------------- 1 | ../core/toolchains -------------------------------------------------------------------------------- /runtime/raspberrypi/utils: -------------------------------------------------------------------------------- 1 | ../core/utils -------------------------------------------------------------------------------- /runtime/web/README.md: -------------------------------------------------------------------------------- 1 | ## WeNet Web Demo 2 | 3 | * How to install? `pip install -r requirements.txt` 4 | * How to start? 5 | - Non-streaming: `python app.py` 6 | -------------------------------------------------------------------------------- /runtime/web/requirements.txt: -------------------------------------------------------------------------------- 1 | wenet @ git+https://github.com/wenet-e2e/wenet.git 2 | gradio==3.14.0 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = wenet 3 | version = 0.0.0 4 | license = Apache Software License 5 | description = End to end speech recognition toolkit 6 | long_description = file: README.md 7 | classifiers = 8 | License :: OSI Approved :: Apache Software License 9 | Operating System :: OS Independent 10 | Programming Language :: Python :: 3 11 | 12 | [options] 13 | packages = find: 14 | include_package_data = True 15 | python_requires = >= 3.8 16 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import platform 2 | from setuptools import setup, find_packages 3 | 4 | requirements = [ 5 | "numpy", 6 | "requests", 7 | "tqdm", 8 | "torch>=1.13.0", 9 | "torchaudio>=0.13.0", 10 | "openai-whisper", 11 | "librosa", 12 | "pyyaml", 13 | "jieba" 14 | ] 15 | 16 | extra_require = { 17 | "torch-npu": [ 18 | "torch==2.2.0", "torch-npu==2.2.0", "torchaudio==2.2.0", "decorator", 19 | "numpy<2.0.0", "attrs", "psutil" 20 | ], 21 | } 22 | 23 | if platform.system() == 'Windows': 24 | requirements += ['PySoundFile'] 25 | 26 | setup( 27 | name="wenet", 28 | install_requires=requirements, 29 | packages=find_packages(), 30 | entry_points={"console_scripts": [ 31 | "wenet = wenet.cli.transcribe:main", 32 | ]}, 33 | extras_require=extra_require, 34 | ) 35 | -------------------------------------------------------------------------------- /test/resources/aishell-BAC009S0724W0121.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/test/resources/aishell-BAC009S0724W0121.wav -------------------------------------------------------------------------------- /test/resources/dataset/aishell-BAC009S0724W0121.wav: -------------------------------------------------------------------------------- 1 | ../aishell-BAC009S0724W0121.wav -------------------------------------------------------------------------------- /test/resources/dataset/data.list: -------------------------------------------------------------------------------- 1 | {"key": "test/resources/dataset/aishell-BAC009S0724W0121", "wav": "test/resources/dataset/aishell-BAC009S0724W0121.wav", "txt": "广州市房地产中介协会分析"} 2 | {"key": "test/resources/dataset/librispeech-1995-1837-0001", "wav": "test/resources/dataset/librispeech-1995-1837-0001.wav", "txt": "IT WAS THE FIRST GREAT SORROW OF HIS LIFE IT WAS NOT SO MUCH THE LOSS OF THE COTTON ITSELF BUT THE FANTASY THE HOPES THE DREAMS BUILT AROUND IT"} 3 | -------------------------------------------------------------------------------- /test/resources/dataset/data.shards.list: -------------------------------------------------------------------------------- 1 | test/resources/dataset/shards/shards_000000000.tar 2 | -------------------------------------------------------------------------------- /test/resources/dataset/librispeech-1995-1837-0001.wav: -------------------------------------------------------------------------------- 1 | ../librispeech-1995-1837-0001.wav -------------------------------------------------------------------------------- /test/resources/dataset/shards/shards_000000000.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/test/resources/dataset/shards/shards_000000000.tar -------------------------------------------------------------------------------- /test/resources/dataset/text: -------------------------------------------------------------------------------- 1 | test/resources/dataset/aishell-BAC009S0724W0121 广州市房地产中介协会分析 2 | test/resources/dataset/librispeech-1995-1837-0001 IT WAS THE FIRST GREAT SORROW OF HIS LIFE IT WAS NOT SO MUCH THE LOSS OF THE COTTON ITSELF BUT THE FANTASY THE HOPES THE DREAMS BUILT AROUND IT 3 | -------------------------------------------------------------------------------- /test/resources/dataset/wav.scp: -------------------------------------------------------------------------------- 1 | test/resources/dataset/aishell-BAC009S0724W0121 test/resources/dataset/aishell-BAC009S0724W0121.wav 2 | test/resources/dataset/librispeech-1995-1837-0001 test/resources/dataset/librispeech-1995-1837-0001.wav 3 | -------------------------------------------------------------------------------- /test/resources/librispeech-1995-1837-0001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/test/resources/librispeech-1995-1837-0001.wav -------------------------------------------------------------------------------- /test/resources/librispeech.train_960_unigram5000.bpemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/test/resources/librispeech.train_960_unigram5000.bpemodel -------------------------------------------------------------------------------- /test/resources/non-linguistic-symbols.invalid: -------------------------------------------------------------------------------- 1 | #1 2 | <> 3 | {{BBB}} 4 | [[ccc]] 5 | -------------------------------------------------------------------------------- /test/resources/non-linguistic-symbols.valid: -------------------------------------------------------------------------------- 1 | {~!@#$%^&*()_+`1234567890-=[]|\\:;"'<>,./?} 2 | [~!@#$%^&*()_+`1234567890-={}|\\:;"'<>,./?] 3 | <~!@#$%^&*()_+`1234567890-={}|\\:;"'[],./?> 4 | {qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM} 5 | [qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM] 6 | 7 | -------------------------------------------------------------------------------- /test/test_file_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright [2021-12-04] 4 | 5 | import pytest 6 | 7 | from wenet.utils.file_utils import read_non_lang_symbols 8 | 9 | 10 | @pytest.mark.parametrize("non_lang_symbol_table_path", [ 11 | "test/resources/non-linguistic-symbols.valid", 12 | "test/resources/non-linguistic-symbols.invalid" 13 | ]) 14 | def test_read_non_lang_symbols(non_lang_symbol_table_path): 15 | path = non_lang_symbol_table_path 16 | try: 17 | syms = read_non_lang_symbols(path) 18 | assert syms[0] == "{~!@#$%^&*()_+`1234567890-=[]|\\\\:;\"'<>,./?}" 19 | assert syms[1] == "[~!@#$%^&*()_+`1234567890-={}|\\\\:;\"'<>,./?]" 20 | assert syms[2] == "<~!@#$%^&*()_+`1234567890-={}|\\\\:;\"'[],./?>" 21 | assert syms[ 22 | 3] == "{qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM}" 23 | assert syms[ 24 | 4] == "[qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM]" 25 | assert syms[ 26 | 5] == "" 27 | except Exception as e: 28 | assert path == "test/resources/non-linguistic-symbols.invalid" 29 | -------------------------------------------------------------------------------- /test/tools/test_make_shard.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import io 3 | import torch 4 | from torchaudio._extension import torchaudio 5 | 6 | 7 | def test_save_load_consistently(): 8 | wav_paths = glob.glob("test/resources/*.wav") 9 | for wav_path in wav_paths: 10 | wav, sr = torchaudio.load(wav_path) 11 | with io.BytesIO() as f: 12 | wav = torchaudio.transforms.Resample(sr, sr)(wav) 13 | wav_short = (wav * (1 << 15)) 14 | wav_short = wav_short.to(torch.int16) 15 | torchaudio.save(f, wav_short, sr, format="wav", bits_per_sample=16) 16 | f.seek(0) 17 | b = f.read() 18 | 19 | with io.BytesIO(b) as f: 20 | new_wav, new_sr = torchaudio.load(f) 21 | assert new_sr == sr 22 | torch.allclose(new_wav, wav) 23 | 24 | 25 | def test_sox_set_buffer(): 26 | torchaudio.utils.sox_utils.set_buffer_size(16500) 27 | 28 | 29 | def test_make_shards(): 30 | # TODO(MDdct): add make shards 31 | pass 32 | -------------------------------------------------------------------------------- /tools/cmvn_kaldi2json.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import logging 4 | import sys 5 | import json 6 | 7 | 8 | def kaldi2json(kaldi_cmvn_file): 9 | means = [] 10 | variance = [] 11 | with open(kaldi_cmvn_file, 'r') as fid: 12 | # kaldi binary file start with '\0B' 13 | if fid.read(2) == '\0B': 14 | logging.error('kaldi cmvn binary file is not supported, please ' 15 | 'recompute it by: compute-cmvn-stats --binary=false ' 16 | ' scp:feats.scp global_cmvn') 17 | sys.exit(1) 18 | fid.seek(0) 19 | arr = fid.read().split() 20 | assert (arr[0] == '[') 21 | assert (arr[-2] == '0') 22 | assert (arr[-1] == ']') 23 | feat_dim = int((len(arr) - 2 - 2) / 2) 24 | for i in range(1, feat_dim + 1): 25 | means.append(float(arr[i])) 26 | count = float(arr[feat_dim + 1]) 27 | for i in range(feat_dim + 2, 2 * feat_dim + 2): 28 | variance.append(float(arr[i])) 29 | 30 | cmvn_info = {'mean_stat': means, 'var_stat': variance, 'frame_num': count} 31 | return cmvn_info 32 | 33 | 34 | if __name__ == '__main__': 35 | with open(sys.argv[2], 'w') as fout: 36 | cmvn = kaldi2json(sys.argv[1]) 37 | fout.write(json.dumps(cmvn)) 38 | -------------------------------------------------------------------------------- /tools/flake8_hook.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # encoding: utf-8 3 | import sys 4 | 5 | from flake8.main import git 6 | 7 | if __name__ == '__main__': 8 | sys.exit(git.hook( 9 | strict=True, 10 | lazy=git.config_for('lazy'), 11 | )) 12 | -------------------------------------------------------------------------------- /tools/fst/ctc_token_fst.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | print('0 1 ') 6 | print('1 1 ') 7 | print('2 2 ') 8 | print('2 0 ') 9 | 10 | with open(sys.argv[1], 'r', encoding='utf8') as fin: 11 | node = 3 12 | for entry in fin: 13 | fields = entry.strip().split(' ') 14 | phone = fields[0] 15 | if phone == '' or phone == '': 16 | continue 17 | elif '#' in phone: # disambiguous phone 18 | print('{} {} {} {}'.format(0, 0, '', phone)) 19 | else: 20 | print('{} {} {} {}'.format(1, node, phone, phone)) 21 | print('{} {} {} {}'.format(node, node, phone, '')) 22 | print('{} {} {} {}'.format(node, 2, '', '')) 23 | node += 1 24 | print('0') 25 | -------------------------------------------------------------------------------- /tools/fst/ctc_token_fst_compact.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | print('0 0 ') 6 | 7 | with open(sys.argv[1], 'r', encoding='utf8') as fin: 8 | node = 1 9 | for entry in fin: 10 | fields = entry.strip().split(' ') 11 | phone = fields[0] 12 | if phone == '' or phone == '': 13 | continue 14 | elif '#' in phone: # disambiguous phone 15 | print('{} {} {} {}'.format(0, 0, '', phone)) 16 | else: 17 | print('{} {} {} {}'.format(0, node, phone, phone)) 18 | print('{} {} {} {}'.format(node, node, phone, '')) 19 | print('{} {} {} {}'.format(node, 0, '', '')) 20 | node += 1 21 | print('0') 22 | -------------------------------------------------------------------------------- /tools/fst/eps2disambig.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | # 2015 Guoguo Chen 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # This script replaces epsilon with #0 on the input side only, of the G.fst 19 | # acceptor. 20 | 21 | while(<>){ 22 | if (/\s+#0\s+/) { 23 | print STDERR "$0: ERROR: LM has word #0, " . 24 | "which is reserved as disambiguation symbol\n"; 25 | exit 1; 26 | } 27 | s:^(\d+\s+\d+\s+)\(\s+):$1#0$2:; 28 | print; 29 | } 30 | -------------------------------------------------------------------------------- /tools/fst/rnnt_token_fst.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | print('0 0 ') 6 | 7 | with open(sys.argv[1], 'r', encoding='utf8') as fin: 8 | for entry in fin: 9 | fields = entry.strip().split(' ') 10 | phone = fields[0] 11 | if phone == '' or phone == '': 12 | continue 13 | elif '#' in phone: # disambiguous phone 14 | print('{} {} {} {}'.format(0, 0, '', phone)) 15 | else: 16 | print('{} {} {} {}'.format(0, 0, phone, phone)) 17 | print('0') 18 | -------------------------------------------------------------------------------- /tools/fst/s2eps.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # This script replaces and with (on both input and output sides), 18 | # for the G.fst acceptor. 19 | 20 | while(<>){ 21 | @A = split(" ", $_); 22 | if ( @A >= 4 ) { 23 | if ($A[2] eq "" || $A[2] eq "") { $A[2] = ""; } 24 | if ($A[3] eq "" || $A[3] eq "") { $A[3] = ""; } 25 | } 26 | print join("\t", @A) . "\n"; 27 | } 28 | -------------------------------------------------------------------------------- /tools/git-pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | echo "Running pre-commit flake8" 5 | python tools/flake8_hook.py 6 | -------------------------------------------------------------------------------- /tools/spk2utt_to_utt2spk.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | while(<>){ 19 | @A = split(" ", $_); 20 | @A > 1 || die "Invalid line in spk2utt file: $_"; 21 | $s = shift @A; 22 | foreach $u ( @A ) { 23 | print "$u $s\n"; 24 | } 25 | } 26 | 27 | 28 | -------------------------------------------------------------------------------- /tools/spm_train: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under the license found in the 6 | # https://github.com/pytorch/fairseq/blob/master/LICENSE 7 | import sys 8 | 9 | import sentencepiece as spm 10 | 11 | if __name__ == "__main__": 12 | spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:])) 13 | -------------------------------------------------------------------------------- /tools/utt2spk_to_spk2utt.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # converts an utt2spk file to a spk2utt file. 18 | # Takes input from the stdin or from a file argument; 19 | # output goes to the standard out. 20 | 21 | if ( @ARGV > 1 ) { 22 | die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt"; 23 | } 24 | 25 | while(<>){ 26 | @A = split(" ", $_); 27 | @A == 2 || die "Invalid line in utt2spk file: $_"; 28 | ($u,$s) = @A; 29 | if(!$seen_spk{$s}) { 30 | $seen_spk{$s} = 1; 31 | push @spklist, $s; 32 | } 33 | push (@{$spk_hash{$s}}, "$u"); 34 | } 35 | foreach $s (@spklist) { 36 | $l = join(' ',@{$spk_hash{$s}}); 37 | print "$s $l\n"; 38 | } 39 | -------------------------------------------------------------------------------- /tools/wav2dur.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # encoding: utf-8 3 | 4 | import sys 5 | 6 | import torchaudio 7 | 8 | scp = sys.argv[1] 9 | dur_scp = sys.argv[2] 10 | 11 | with open(scp, 'r') as f, open(dur_scp, 'w') as fout: 12 | cnt = 0 13 | total_duration = 0 14 | for l in f: 15 | items = l.strip().split() 16 | wav_id = items[0] 17 | fname = items[1] 18 | cnt += 1 19 | waveform, rate = torchaudio.load(fname) 20 | frames = len(waveform[0]) 21 | duration = frames / float(rate) 22 | total_duration += duration 23 | fout.write('{} {}\n'.format(wav_id, duration)) 24 | print('process {} utts'.format(cnt)) 25 | print('total {} s'.format(total_duration)) 26 | -------------------------------------------------------------------------------- /tools/wav_to_duration.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # split the wav scp, calculate duration and merge 3 | nj=4 4 | . tools/parse_options.sh || exit 1; 5 | 6 | inscp=$1 7 | outscp=$2 8 | data=$(dirname ${inscp}) 9 | if [ $# -eq 3 ]; then 10 | logdir=$3 11 | else 12 | logdir=${data}/log 13 | fi 14 | mkdir -p ${logdir} 15 | 16 | rm -f $logdir/wav_*.slice 17 | rm -f $logdir/wav_*.shape 18 | split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_ 19 | 20 | for slice in `ls $logdir/wav_*.slice`; do 21 | { 22 | name=`basename -s .slice $slice` 23 | tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log 24 | } & 25 | done 26 | wait 27 | cat $logdir/wav_*.shape > $outscp 28 | -------------------------------------------------------------------------------- /wenet/__init__.py: -------------------------------------------------------------------------------- 1 | from wenet.cli.model import load_feature, load_model, load_tokenizer # noqa 2 | -------------------------------------------------------------------------------- /wenet/bin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/bin/__init__.py -------------------------------------------------------------------------------- /wenet/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/cli/__init__.py -------------------------------------------------------------------------------- /wenet/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/dataset/__init__.py -------------------------------------------------------------------------------- /wenet/dataset/deprecated/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/dataset/deprecated/__init__.py -------------------------------------------------------------------------------- /wenet/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/__init__.py -------------------------------------------------------------------------------- /wenet/models/branchformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/branchformer/__init__.py -------------------------------------------------------------------------------- /wenet/models/ctl_model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/ctl_model/__init__.py -------------------------------------------------------------------------------- /wenet/models/e_branchformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/e_branchformer/__init__.py -------------------------------------------------------------------------------- /wenet/models/efficient_conformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/efficient_conformer/__init__.py -------------------------------------------------------------------------------- /wenet/models/finetune/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/finetune/__init__.py -------------------------------------------------------------------------------- /wenet/models/finetune/lora/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/finetune/lora/__init__.py -------------------------------------------------------------------------------- /wenet/models/finetune/lora/config.yaml: -------------------------------------------------------------------------------- 1 | init_batch_size: 2 2 | init_iters: 8 3 | init_config: 4 | mode: "gradient" # option: "simple", "svd", "gradient" 5 | lora_A: "unit" # option: "gaussian", "kaiming", "fan_out_kaiming", "xavier", "zeros", "unit", "orthogonal" 6 | lora_A_std: 0.01 # only needed when lora_A is "gaussian" 7 | lora_B: "unit" # option: "gaussian", "kaiming", "fan_out_kaiming", "xavier", "zeros", "unit", "orthogonal" 8 | lora_B_std: 0.01 # only needed when lora_B is "gaussian" 9 | scale: "stable" # option: "default", "stable", "unit", "normalized", "gd", "weightS" 10 | stable_gamma: 2 # only needed when scale is "stable" 11 | direction: "ArB2r" # option: "ArBr", "A2rBr", "ArB2r"(only needed when mode is "gradient") 12 | dtype: "fp32" # option: "bf16", "fp32" 13 | norm_clip: false # norm clipping 14 | -------------------------------------------------------------------------------- /wenet/models/firered/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/firered/__init__.py -------------------------------------------------------------------------------- /wenet/models/k2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/k2/__init__.py -------------------------------------------------------------------------------- /wenet/models/paraformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/paraformer/__init__.py -------------------------------------------------------------------------------- /wenet/models/paraformer/embedding.py: -------------------------------------------------------------------------------- 1 | from wenet.models.transformer.embedding import WhisperPositionalEncoding 2 | 3 | 4 | class ParaformerPositinoalEncoding(WhisperPositionalEncoding): 5 | """ Sinusoids position encoding used in paraformer.encoder 6 | """ 7 | 8 | def __init__(self, 9 | depth: int, 10 | d_model: int, 11 | dropout_rate: float = 0.1, 12 | max_len: int = 1500): 13 | super().__init__(depth, dropout_rate, max_len) 14 | self.xscale = d_model**0.5 15 | -------------------------------------------------------------------------------- /wenet/models/squeezeformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/squeezeformer/__init__.py -------------------------------------------------------------------------------- /wenet/models/ssl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/ssl/__init__.py -------------------------------------------------------------------------------- /wenet/models/ssl/bestrq/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/ssl/bestrq/__init__.py -------------------------------------------------------------------------------- /wenet/models/ssl/init_model.py: -------------------------------------------------------------------------------- 1 | from wenet.models.ssl.bestrq.bestrq_model import BestRQModel 2 | from wenet.models.ssl.w2vbert.w2vbert_model import W2VBERTModel 3 | from wenet.models.ssl.wav2vec2.wav2vec2_model import Wav2vec2Model 4 | 5 | WENET_SSL_MODEL_CLASS = { 6 | "w2vbert_model": W2VBERTModel, 7 | "wav2vec_model": Wav2vec2Model, 8 | "bestrq_model": BestRQModel 9 | } 10 | 11 | 12 | def init_model(configs, encoder): 13 | 14 | assert 'model' in configs 15 | model_type = configs['model'] 16 | assert model_type in WENET_SSL_MODEL_CLASS.keys() 17 | model = WENET_SSL_MODEL_CLASS[model_type](encoder=encoder, 18 | **configs['model_conf']) 19 | return model 20 | -------------------------------------------------------------------------------- /wenet/models/ssl/w2vbert/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/ssl/w2vbert/__init__.py -------------------------------------------------------------------------------- /wenet/models/ssl/wav2vec2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/ssl/wav2vec2/__init__.py -------------------------------------------------------------------------------- /wenet/models/transducer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/transducer/__init__.py -------------------------------------------------------------------------------- /wenet/models/transducer/search/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/transducer/search/__init__.py -------------------------------------------------------------------------------- /wenet/models/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/transformer/__init__.py -------------------------------------------------------------------------------- /wenet/models/transformer/norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class RMSNorm(torch.nn.Module): 5 | """ https://arxiv.org/pdf/1910.07467.pdf 6 | """ 7 | 8 | def __init__( 9 | self, 10 | dim: int, 11 | eps: float = 1e-6, 12 | add_unit_offset: bool = True, 13 | ): 14 | super().__init__() 15 | self.eps = eps 16 | self.weight = torch.nn.Parameter(torch.ones(dim)) 17 | self.add_unit_offset = add_unit_offset 18 | 19 | def _norm(self, x): 20 | return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) 21 | 22 | def forward(self, x): 23 | x = self._norm(x.float()).type_as(x) 24 | if self.add_unit_offset: 25 | return x * (1 + self.weight) 26 | else: 27 | return x * self.weight 28 | -------------------------------------------------------------------------------- /wenet/models/transformer/swish.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe) 2 | # 2020 Northwestern Polytechnical University (Pengcheng Guo) 3 | # 2020 Mobvoi Inc (Binbin Zhang) 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """Swish() activation function for Conformer.""" 17 | 18 | import torch 19 | 20 | 21 | class Swish(torch.nn.Module): 22 | """Construct an Swish object.""" 23 | 24 | def forward(self, x: torch.Tensor) -> torch.Tensor: 25 | """Return Swish activation function.""" 26 | return x * torch.sigmoid(x) 27 | -------------------------------------------------------------------------------- /wenet/models/whisper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/whisper/__init__.py -------------------------------------------------------------------------------- /wenet/text/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/text/__init__.py -------------------------------------------------------------------------------- /wenet/text/base_tokenizer.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod, abstractproperty 2 | from typing import Dict, List, Tuple, Union 3 | 4 | T = Union[str, bytes] 5 | 6 | 7 | class BaseTokenizer(ABC): 8 | 9 | def tokenize(self, line: str) -> Tuple[List[T], List[int]]: 10 | tokens = self.text2tokens(line) 11 | ids = self.tokens2ids(tokens) 12 | return tokens, ids 13 | 14 | def detokenize(self, ids: List[int]) -> Tuple[str, List[T]]: 15 | tokens = self.ids2tokens(ids) 16 | text = self.tokens2text(tokens) 17 | return text, tokens 18 | 19 | @abstractmethod 20 | def text2tokens(self, line: str) -> List[T]: 21 | raise NotImplementedError("abstract method") 22 | 23 | @abstractmethod 24 | def tokens2text(self, tokens: List[T]) -> str: 25 | raise NotImplementedError("abstract method") 26 | 27 | @abstractmethod 28 | def tokens2ids(self, tokens: List[T]) -> List[int]: 29 | raise NotImplementedError("abstract method") 30 | 31 | @abstractmethod 32 | def ids2tokens(self, ids: List[int]) -> List[T]: 33 | raise NotImplementedError("abstract method") 34 | 35 | @abstractmethod 36 | def vocab_size(self) -> int: 37 | raise NotImplementedError("abstract method") 38 | 39 | @abstractproperty 40 | def symbol_table(self) -> Dict[T, int]: 41 | raise NotImplementedError("abstract method") 42 | -------------------------------------------------------------------------------- /wenet/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/utils/__init__.py --------------------------------------------------------------------------------