├── .clang-format
├── .flake8
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   ├── android.yml
    │   ├── doc.yml
    │   ├── docker_image.yml
    │   ├── lint.yml
    │   ├── runtime.yml
    │   ├── stale-issues.yml
    │   ├── unit_test.yml
    │   ├── wheels.yml
    │   └── windows.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── CPPLINT.cfg
├── LICENSE
├── README.md
├── ROADMAP.md
├── docs
    ├── .gitignore
    ├── Makefile
    ├── UIO.md
    ├── conf.py
    ├── context.md
    ├── images
    │   ├── UIO_dataflow.png
    │   ├── UIO_system.png
    │   ├── UIO_wenetspeech_cer.png
    │   ├── check_detail.png
    │   ├── checks.png
    │   ├── context_graph.png
    │   ├── lm_system.png
    │   ├── runtime_android.gif
    │   ├── runtime_server.gif
    │   ├── runtime_web.png
    │   ├── subsampling_overalp.gif
    │   └── u2.gif
    ├── index.rst
    ├── jit_in_wenet.md
    ├── lm.md
    ├── make.bat
    ├── papers.md
    ├── pretrained_models.md
    ├── production.rst
    ├── python_package.md
    ├── reference.rst
    ├── requirements.txt
    ├── runtime.md
    ├── train.rst
    ├── tutorial_aishell.md
    └── tutorial_librispeech.md
├── examples
    ├── aishell
    │   ├── NST
    │   │   ├── README.md
    │   │   ├── conf
    │   │   │   └── train_conformer.yaml
    │   │   ├── local
    │   │   │   ├── NST_plot.png
    │   │   │   ├── generate_data_list.py
    │   │   │   ├── generate_filtered_pseudo_label.py
    │   │   │   ├── get_wav_labels.py
    │   │   │   └── split_data_list.py
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   └── run_nst.sh
    │   ├── paraformer
    │   │   ├── README.md
    │   │   ├── conf
    │   │   │   ├── train_paraformer.yaml
    │   │   │   └── train_paraformer_dynamic.yaml
    │   │   ├── local
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── run_npu.sh
    │   │   ├── tools
    │   │   └── wenet
    │   ├── rnnt
    │   │   ├── README.md
    │   │   ├── conf
    │   │   │   ├── conformer_rnnt.yaml
    │   │   │   ├── conformer_u2pp_rnnt.yaml
    │   │   │   └── example_embedding_predictor.yaml
    │   │   ├── local
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── run_npu.sh
    │   │   ├── tools
    │   │   └── wenet
    │   ├── s0
    │   │   ├── README.md
    │   │   ├── UIO_RESULT.md
    │   │   ├── conf
    │   │   │   ├── train_conformer.yaml
    │   │   │   ├── train_conformer_no_pos.yaml
    │   │   │   ├── train_ebranchformer.yaml
    │   │   │   ├── train_transformer.yaml
    │   │   │   ├── train_u2++_branchformer.yaml
    │   │   │   ├── train_u2++_conformer.yaml
    │   │   │   ├── train_u2++_efficonformer_v1.yaml
    │   │   │   ├── train_u2++_efficonformer_v1_stream.yaml
    │   │   │   ├── train_u2++_efficonformer_v2.yaml
    │   │   │   ├── train_u2++_lite_conformer.yaml
    │   │   │   ├── train_u2++_transformer.yaml
    │   │   │   ├── train_unified_conformer.yaml
    │   │   │   ├── train_unified_conformer_ctl.yaml
    │   │   │   └── train_unified_transformer.yaml
    │   │   ├── local
    │   │   │   ├── aishell_data_prep.sh
    │   │   │   ├── aishell_train_lms.sh
    │   │   │   └── download_and_untar.sh
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── run_npu.sh
    │   │   ├── tools
    │   │   └── wenet
    │   └── whisper
    │   │   ├── README.md
    │   │   ├── conf
    │   │       ├── ds_stage1.json
    │   │       ├── ds_stage2.json
    │   │       ├── ds_stage3.json
    │   │       ├── finetune_whisper_largev3.yaml
    │   │       ├── finetune_whisper_largev3_conv2d4.yaml
    │   │       ├── finetune_whisper_largev3_conv2d4_onlyattn.yaml
    │   │       └── finetune_whisper_largev3_onlyattn.yaml
    │   │   ├── local
    │   │       └── modify_ckpt.py
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── run_npu.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── aishell2
    │   ├── rnnt
    │   │   ├── README.md
    │   │   ├── conf
    │   │   │   ├── conformer_rnnt.yaml
    │   │   │   └── conformer_u2pp_rnnt.yaml
    │   │   ├── local
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    │   └── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │       ├── train_u2++_conformer.yaml
    │   │       ├── train_u2++_transformer.yaml
    │   │       ├── train_unified_conformer.yaml
    │   │       └── train_unified_transformer.yaml
    │   │   ├── local
    │   │       ├── prepare_data.sh
    │   │       ├── train_lms.sh
    │   │       └── word_segmentation.py
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── aishell4
    │   └── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │       ├── train_conformer.yaml
    │   │       └── train_u2++_conformer.yaml
    │   │   ├── local
    │   │       ├── aishell4_process_textgrid.py
    │   │       ├── apply_map.pl
    │   │       ├── copy_data_dir.sh
    │   │       ├── download_and_untar.sh
    │   │       ├── filter_scp.pl
    │   │       ├── prepare_data.sh
    │   │       ├── spk2utt_to_utt2spk.pl
    │   │       ├── text_format.pl
    │   │       ├── text_normalize.pl
    │   │       ├── utt2spk_to_spk2utt.pl
    │   │       ├── validate_data_dir.sh
    │   │       └── validate_text.pl
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── chime4
    │   └── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │       └── train_conformer.yaml
    │   │   ├── local
    │   │       ├── chime4_format_dir.sh
    │   │       ├── chime4_gen_wav.sh
    │   │       ├── clean_wsj0_data_prep.sh
    │   │       ├── clean_wsj1_data_prep.sh
    │   │       ├── cstr_ndx2flist.pl
    │   │       ├── find_noisy_transcripts.pl
    │   │       ├── find_transcripts.pl
    │   │       ├── flist2scp.pl
    │   │       ├── ndx2flist.pl
    │   │       ├── normalize_transcript.pl
    │   │       ├── real_enhan_chime4_data_prep.sh
    │   │       ├── real_noisy_chime4_data_prep.sh
    │   │       ├── simu_enhan_chime4_data_prep.sh
    │   │       └── simu_noisy_chime4_data_prep.sh
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── commonvoice
    │   └── fr
    │   │   ├── README.md
    │   │   ├── conf
    │   │       └── train_conformer.yaml
    │   │   ├── local
    │   │       ├── create_scp_text.py
    │   │       ├── download_data.sh
    │   │       └── prepare_data.sh
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── csj
    │   └── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │       └── train_conformer.yaml
    │   │   ├── csj_tools
    │   │       ├── wn.0.parse.py
    │   │       ├── wn.1.split_wav.py
    │   │       ├── wn.2.prep.text.py
    │   │       ├── wn.3.mincut.py
    │   │       └── wn.4.make_raw_list.py
    │   │   ├── list_files
    │   │       ├── 2ch.id.list
    │   │       ├── test.set.1.list
    │   │       ├── test.set.123.list
    │   │       ├── test.set.2.list
    │   │       └── test.set.3.list
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── gigaspeech
    │   └── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │       ├── train_conformer.yaml
    │   │       ├── train_conformer_bidecoder.yaml
    │   │       └── train_u2++_conformer.yaml
    │   │   ├── local
    │   │       ├── extract_meta.py
    │   │       ├── gigaspeech_data_prep.sh
    │   │       └── gigaspeech_scoring.py
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── hkust
    │   └── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │       ├── train_960_unigram5000.model
    │   │       └── train_conformer.yaml
    │   │   ├── local
    │   │       ├── hkust_data_prep.sh
    │   │       └── hkust_normalize.pl
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── librispeech
    │   ├── rnnt
    │   │   ├── README.md
    │   │   ├── conf
    │   │   │   └── conformer_rnnt.yaml
    │   │   ├── local
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    │   └── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │       ├── train_conformer.yaml
    │   │       ├── train_conformer_bidecoder_large.yaml
    │   │       ├── train_squeezeformer.yaml
    │   │       ├── train_squeezeformer_bidecoder_large.yaml
    │   │       ├── train_u2++_branchformer.yaml
    │   │       ├── train_u2++_conformer.yaml
    │   │       ├── train_u2++_efficonformer_v1.yaml
    │   │       ├── train_u2++_efficonformer_v2.yaml
    │   │       ├── train_u2++_squeezeformer.yaml
    │   │       └── train_unified_conformer.yaml
    │   │   ├── local
    │   │       ├── data_prep_torchaudio.sh
    │   │       └── download_and_untar.sh
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── multi_cn
    │   └── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │       ├── train_960_unigram5000.model
    │   │       ├── train_conformer.yaml
    │   │       ├── train_unified_conformer.yaml
    │   │       └── train_unified_transformer.yaml
    │   │   ├── local
    │   │       ├── aidatatang_data_prep.sh
    │   │       ├── aidatatang_download_and_untar.sh
    │   │       ├── aishell2_data_prep.sh
    │   │       ├── aishell_data_prep.sh
    │   │       ├── aishell_download_and_untar.sh
    │   │       ├── magicdata_badlist
    │   │       ├── magicdata_data_prep.sh
    │   │       ├── magicdata_download_and_untar.sh
    │   │       ├── primewords_data_prep.sh
    │   │       ├── primewords_download_and_untar.sh
    │   │       ├── primewords_parse_transcript.py
    │   │       ├── stcmds_data_prep.sh
    │   │       ├── stcmds_download_and_untar.sh
    │   │       ├── tal_data_prep.sh
    │   │       ├── tal_mix_data_prep.sh
    │   │       ├── thchs-30_data_prep.sh
    │   │       └── thchs_download_and_untar.sh
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── openasr2021
    │   └── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │       ├── lang.conf
    │   │       └── train_conformer_large_10h.yaml
    │   │   ├── local
    │   │       ├── dump_wav.sh
    │   │       ├── make_absolute.sh
    │   │       ├── make_corpus_subset.sh
    │   │       ├── prepare_acoustic_training_data.pl
    │   │       ├── prepare_data.sh
    │   │       └── setup_languages.sh
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── swbd
    │   └── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │       └── train_conformer.yaml
    │   │   ├── local
    │   │       ├── MSU_single_letter.txt
    │   │       ├── dict.patch
    │   │       ├── eval2000_data_prep.sh
    │   │       ├── extend_segments.pl
    │   │       ├── format_acronyms_dict.py
    │   │       ├── map_acronyms_transcripts.py
    │   │       ├── swbd1_data_download.sh
    │   │       ├── swbd1_data_prep.sh
    │   │       ├── swbd1_fix_speakerid.pl
    │   │       ├── swbd1_map_words.pl
    │   │       └── swbd1_prepare_dict.sh
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── tedlium3
    │   └── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │       └── train_conformer.yaml
    │   │   ├── local
    │   │       ├── download_data.sh
    │   │       ├── join_suffix.py
    │   │       └── prepare_data.sh
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── timit
    │   └── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │       ├── train_conformer.yaml
    │   │       └── train_transformer.yaml
    │   │   ├── local
    │   │       ├── dev_spk.list
    │   │       ├── phones.60-48-39.map
    │   │       ├── sph2pipe_process.py
    │   │       ├── test_spk.list
    │   │       ├── timit_data_prep.sh
    │   │       ├── timit_format_data.sh
    │   │       ├── timit_norm_trans.pl
    │   │       ├── utt2spk_to_spk2utt.pl
    │   │       └── validate_data_dir.sh
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── vkw2021
    │   └── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │       ├── combine_finetune_5h_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char.yaml
    │   │       └── train_vkw_bidirect_12conformer_hs2048_output256_att4_conv2d_char.yaml
    │   │   ├── local
    │   │       ├── run_finetune_5h.sh
    │   │       ├── vkw_data_prep.sh
    │   │       └── vkw_kws_results.py
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    ├── wenetspeech
    │   ├── paraformer
    │   │   ├── README.md
    │   │   ├── conf
    │   │   │   └── fintune_paraformer_dynamic.yaml
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    │   ├── s0
    │   │   ├── README.md
    │   │   ├── conf
    │   │   │   ├── train_conformer.yaml
    │   │   │   ├── train_conformer_bidecoder.yaml
    │   │   │   ├── train_u2++_conformer.yaml
    │   │   │   └── train_u2++_conformer_wenetspeech_aishell4.yaml
    │   │   ├── local
    │   │   │   ├── extract_meta.py
    │   │   │   ├── process_opus.py
    │   │   │   └── wenetspeech_data_prep.sh
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    │   └── whisper
    │   │   ├── README.md
    │   │   ├── conf
    │   │       ├── ds_stage1.json
    │   │       └── finetune_whisper_largev3.yaml
    │   │   ├── local
    │   │   ├── path.sh
    │   │   ├── run.sh
    │   │   ├── tools
    │   │   └── wenet
    └── wsj
    │   └── s0
    │       ├── README.md
    │       ├── conf
    │           └── train_conformer.yaml
    │       ├── local
    │           ├── find_transcripts.pl
    │           ├── flist2scp.pl
    │           ├── ndx2flist.pl
    │           ├── normalize_transcript.pl
    │           ├── wsj_data_prep.sh
    │           ├── wsj_format_data.sh
    │           └── wsj_gen_wav.sh
    │       ├── path.sh
    │       ├── run.sh
    │       ├── tools
    │       └── wenet
├── requirements.txt
├── runtime
    ├── README.md
    ├── android
    │   ├── .gitignore
    │   ├── README.md
    │   ├── app
    │   │   ├── .gitignore
    │   │   ├── build.gradle
    │   │   ├── proguard-rules.pro
    │   │   ├── src
    │   │   │   ├── androidTest
    │   │   │   │   └── java
    │   │   │   │   │   └── com
    │   │   │   │   │       └── mobvoi
    │   │   │   │   │           └── wenet
    │   │   │   │   │               └── ExampleInstrumentedTest.java
    │   │   │   ├── main
    │   │   │   │   ├── AndroidManifest.xml
    │   │   │   │   ├── assets
    │   │   │   │   │   └── README.md
    │   │   │   │   ├── cpp
    │   │   │   │   │   ├── CMakeLists.txt
    │   │   │   │   │   ├── bin
    │   │   │   │   │   ├── cmake
    │   │   │   │   │   ├── decoder
    │   │   │   │   │   ├── frontend
    │   │   │   │   │   ├── kaldi
    │   │   │   │   │   ├── patch
    │   │   │   │   │   ├── post_processor
    │   │   │   │   │   ├── utils
    │   │   │   │   │   └── wenet.cc
    │   │   │   │   ├── java
    │   │   │   │   │   └── com
    │   │   │   │   │   │   └── mobvoi
    │   │   │   │   │   │       └── wenet
    │   │   │   │   │   │           ├── MainActivity.java
    │   │   │   │   │   │           ├── Recognize.java
    │   │   │   │   │   │           └── VoiceRectView.java
    │   │   │   │   └── res
    │   │   │   │   │   ├── drawable-v24
    │   │   │   │   │       └── ic_launcher_foreground.xml
    │   │   │   │   │   ├── drawable
    │   │   │   │   │       └── ic_launcher_background.xml
    │   │   │   │   │   ├── layout
    │   │   │   │   │       └── activity_main.xml
    │   │   │   │   │   ├── mipmap-anydpi-v26
    │   │   │   │   │       ├── ic_launcher.xml
    │   │   │   │   │       └── ic_launcher_round.xml
    │   │   │   │   │   ├── mipmap-hdpi
    │   │   │   │   │       ├── ic_launcher.png
    │   │   │   │   │       └── ic_launcher_round.png
    │   │   │   │   │   ├── mipmap-mdpi
    │   │   │   │   │       ├── ic_launcher.png
    │   │   │   │   │       └── ic_launcher_round.png
    │   │   │   │   │   ├── mipmap-xhdpi
    │   │   │   │   │       ├── ic_launcher.png
    │   │   │   │   │       └── ic_launcher_round.png
    │   │   │   │   │   ├── mipmap-xxhdpi
    │   │   │   │   │       ├── ic_launcher.png
    │   │   │   │   │       └── ic_launcher_round.png
    │   │   │   │   │   ├── mipmap-xxxhdpi
    │   │   │   │   │       ├── ic_launcher.png
    │   │   │   │   │       └── ic_launcher_round.png
    │   │   │   │   │   ├── values-night
    │   │   │   │   │       └── themes.xml
    │   │   │   │   │   └── values
    │   │   │   │   │       ├── attrs.xml
    │   │   │   │   │       ├── colors.xml
    │   │   │   │   │       ├── strings.xml
    │   │   │   │   │       └── themes.xml
    │   │   │   └── test
    │   │   │   │   └── java
    │   │   │   │       └── com
    │   │   │   │           └── mobvoi
    │   │   │   │               └── wenet
    │   │   │   │                   └── ExampleUnitTest.java
    │   │   └── wenet.keystore
    │   ├── build.gradle
    │   ├── gradle.properties
    │   ├── gradle
    │   │   └── wrapper
    │   │   │   ├── gradle-wrapper.jar
    │   │   │   └── gradle-wrapper.properties
    │   ├── gradlew
    │   ├── gradlew.bat
    │   └── settings.gradle
    ├── core
    │   ├── api
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── wenet_api.cc
    │   │   └── wenet_api.h
    │   ├── bin
    │   │   ├── CMakeLists.txt
    │   │   ├── api_main.cc
    │   │   ├── decoder_main.cc
    │   │   ├── grpc_client_main.cc
    │   │   ├── grpc_server_main.cc
    │   │   ├── http_client_main.cc
    │   │   ├── http_server_main.cc
    │   │   ├── label_checker_main.cc
    │   │   ├── websocket_client_main.cc
    │   │   └── websocket_server_main.cc
    │   ├── cmake
    │   │   ├── boost.cmake
    │   │   ├── bpu.cmake
    │   │   ├── gflags.cmake
    │   │   ├── glog.cmake
    │   │   ├── grpc.cmake
    │   │   ├── gtest.cmake
    │   │   ├── ipex.cmake
    │   │   ├── libtorch.cmake
    │   │   ├── onnx.cmake
    │   │   ├── openfst.cmake
    │   │   ├── openvino.cmake
    │   │   ├── pybind11.cmake
    │   │   ├── wetextprocessing.cmake
    │   │   └── xpu.cmake
    │   ├── decoder
    │   │   ├── CMakeLists.txt
    │   │   ├── asr_decoder.cc
    │   │   ├── asr_decoder.h
    │   │   ├── asr_model.cc
    │   │   ├── asr_model.h
    │   │   ├── context_graph.cc
    │   │   ├── context_graph.h
    │   │   ├── ctc_endpoint.cc
    │   │   ├── ctc_endpoint.h
    │   │   ├── ctc_prefix_beam_search.cc
    │   │   ├── ctc_prefix_beam_search.h
    │   │   ├── ctc_wfst_beam_search.cc
    │   │   ├── ctc_wfst_beam_search.h
    │   │   ├── onnx_asr_model.cc
    │   │   ├── onnx_asr_model.h
    │   │   ├── params.h
    │   │   ├── search_interface.h
    │   │   ├── torch_asr_model.cc
    │   │   └── torch_asr_model.h
    │   ├── frontend
    │   │   ├── CMakeLists.txt
    │   │   ├── fbank.h
    │   │   ├── feature_pipeline.cc
    │   │   ├── feature_pipeline.h
    │   │   ├── fft.cc
    │   │   ├── fft.h
    │   │   └── wav.h
    │   ├── grpc
    │   │   ├── CMakeLists.txt
    │   │   ├── grpc_client.cc
    │   │   ├── grpc_client.h
    │   │   ├── grpc_server.cc
    │   │   ├── grpc_server.h
    │   │   └── wenet.proto
    │   ├── http
    │   │   ├── CMakeLists.txt
    │   │   ├── http_client.cc
    │   │   ├── http_client.h
    │   │   ├── http_server.cc
    │   │   └── http_server.h
    │   ├── kaldi
    │   │   ├── CMakeLists.txt
    │   │   ├── README.md
    │   │   ├── base
    │   │   │   ├── io-funcs-inl.h
    │   │   │   ├── io-funcs.cc
    │   │   │   ├── io-funcs.h
    │   │   │   ├── kaldi-common.h
    │   │   │   ├── kaldi-error.cc
    │   │   │   ├── kaldi-error.h
    │   │   │   ├── kaldi-math.cc
    │   │   │   ├── kaldi-math.h
    │   │   │   ├── kaldi-types.h
    │   │   │   └── kaldi-utils.h
    │   │   ├── decoder
    │   │   │   ├── lattice-faster-decoder.cc
    │   │   │   ├── lattice-faster-decoder.h
    │   │   │   ├── lattice-faster-online-decoder.cc
    │   │   │   └── lattice-faster-online-decoder.h
    │   │   ├── fstbin
    │   │   │   ├── fstaddselfloops.cc
    │   │   │   ├── fstdeterminizestar.cc
    │   │   │   ├── fstisstochastic.cc
    │   │   │   ├── fstminimizeencoded.cc
    │   │   │   └── fsttablecompose.cc
    │   │   ├── fstext
    │   │   │   ├── determinize-lattice-inl.h
    │   │   │   ├── determinize-lattice.h
    │   │   │   ├── determinize-star-inl.h
    │   │   │   ├── determinize-star.h
    │   │   │   ├── fstext-lib.h
    │   │   │   ├── fstext-utils-inl.h
    │   │   │   ├── fstext-utils.h
    │   │   │   ├── kaldi-fst-io-inl.h
    │   │   │   ├── kaldi-fst-io.cc
    │   │   │   ├── kaldi-fst-io.h
    │   │   │   ├── lattice-utils-inl.h
    │   │   │   ├── lattice-utils.h
    │   │   │   ├── lattice-weight.h
    │   │   │   ├── pre-determinize-inl.h
    │   │   │   ├── pre-determinize.h
    │   │   │   ├── remove-eps-local-inl.h
    │   │   │   ├── remove-eps-local.h
    │   │   │   └── table-matcher.h
    │   │   ├── itf
    │   │   │   ├── decodable-itf.h
    │   │   │   └── options-itf.h
    │   │   ├── lat
    │   │   │   ├── CPPLINT.cfg
    │   │   │   ├── determinize-lattice-pruned.cc
    │   │   │   ├── determinize-lattice-pruned.h
    │   │   │   ├── kaldi-lattice.cc
    │   │   │   ├── kaldi-lattice.h
    │   │   │   ├── lattice-functions.cc
    │   │   │   └── lattice-functions.h
    │   │   ├── lm
    │   │   │   ├── arpa-file-parser.cc
    │   │   │   ├── arpa-file-parser.h
    │   │   │   ├── arpa-lm-compiler.cc
    │   │   │   └── arpa-lm-compiler.h
    │   │   ├── lmbin
    │   │   │   └── arpa2fst.cc
    │   │   └── util
    │   │   │   ├── basic-filebuf.h
    │   │   │   ├── const-integer-set-inl.h
    │   │   │   ├── const-integer-set.h
    │   │   │   ├── hash-list-inl.h
    │   │   │   ├── hash-list.h
    │   │   │   ├── kaldi-io-inl.h
    │   │   │   ├── kaldi-io.cc
    │   │   │   ├── kaldi-io.h
    │   │   │   ├── kaldi-pipebuf.h
    │   │   │   ├── parse-options.cc
    │   │   │   ├── parse-options.h
    │   │   │   ├── simple-io-funcs.cc
    │   │   │   ├── simple-io-funcs.h
    │   │   │   ├── stl-utils.h
    │   │   │   ├── text-utils.cc
    │   │   │   └── text-utils.h
    │   ├── patch
    │   │   ├── CPPLINT.cfg
    │   │   └── openfst
    │   │   │   └── src
    │   │   │       ├── CMakeLists.txt
    │   │   │       ├── extensions
    │   │   │           └── special
    │   │   │           │   └── CMakeLists.txt
    │   │   │       ├── include
    │   │   │           └── fst
    │   │   │           │   ├── flags.h
    │   │   │           │   └── log.h
    │   │   │       ├── lib
    │   │   │           └── flags.cc
    │   │   │       └── test
    │   │   │           └── CMakeLists.txt
    │   ├── post_processor
    │   │   ├── CMakeLists.txt
    │   │   ├── post_processor.cc
    │   │   └── post_processor.h
    │   ├── test
    │   │   ├── CMakeLists.txt
    │   │   ├── ctc_prefix_beam_search_test.cc
    │   │   ├── feature_pipeline_test.cc
    │   │   ├── post_processor_test.cc
    │   │   └── utils_test.cc
    │   ├── toolchains
    │   │   ├── aarch64-linux-gnu.toolchain.cmake
    │   │   └── ios.toolchain.cmake
    │   ├── utils
    │   │   ├── CMakeLists.txt
    │   │   ├── blocking_queue.h
    │   │   ├── file.h
    │   │   ├── flags.h
    │   │   ├── json.h
    │   │   ├── log.h
    │   │   ├── string.cc
    │   │   ├── string.h
    │   │   ├── thread_pool.h
    │   │   ├── timer.h
    │   │   ├── utils.cc
    │   │   └── utils.h
    │   └── websocket
    │   │   ├── CMakeLists.txt
    │   │   ├── websocket_client.cc
    │   │   ├── websocket_client.h
    │   │   ├── websocket_server.cc
    │   │   └── websocket_server.h
    ├── gpu
    │   ├── .gitmodules
    │   ├── Dockerfile
    │   │   ├── Dockerfile.client
    │   │   └── Dockerfile.server
    │   ├── Overview.JPG
    │   ├── README.md
    │   ├── client
    │   │   ├── client.py
    │   │   ├── decode_manifest_triton.py
    │   │   ├── decode_manifest_triton.sh
    │   │   ├── generate_perf_input.py
    │   │   ├── speech_client.py
    │   │   ├── stats_summary.py
    │   │   ├── test_wavs
    │   │   │   ├── long.wav
    │   │   │   └── mid.wav
    │   │   └── utils.py
    │   ├── cuda_decoders
    │   │   ├── README.md
    │   │   ├── build_tlg.sh
    │   │   ├── model_repo_cuda_decoder
    │   │   │   ├── attention_rescoring
    │   │   │   │   ├── 1
    │   │   │   │   │   └── .gitkeep
    │   │   │   │   └── config.pbtxt.template
    │   │   │   ├── decoder
    │   │   │   │   ├── 1
    │   │   │   │   │   └── .gitkeep
    │   │   │   │   └── config.pbtxt.template
    │   │   │   ├── encoder
    │   │   │   │   ├── 1
    │   │   │   │   │   └── .gitkeep
    │   │   │   │   └── config.pbtxt.template
    │   │   │   ├── feature_extractor
    │   │   │   │   ├── 1
    │   │   │   │   │   └── model.py
    │   │   │   │   └── config.pbtxt.template
    │   │   │   └── scoring
    │   │   │   │   ├── 1
    │   │   │   │       ├── decoder.py
    │   │   │   │       ├── frame_reducer.py
    │   │   │   │       ├── lang
    │   │   │   │       │   └── .gitkeep
    │   │   │   │       ├── model.py
    │   │   │   │       └── wfst_decoding_config.yaml
    │   │   │   │   └── config.pbtxt.template
    │   │   ├── model_repo_stateful_cuda_decoder
    │   │   │   ├── encoder
    │   │   │   │   ├── 1
    │   │   │   │   │   ├── .gitignore
    │   │   │   │   │   └── .gitkeep
    │   │   │   │   └── config_template.pbtxt
    │   │   │   ├── feature_extractor
    │   │   │   │   ├── 1
    │   │   │   │   │   └── model.py
    │   │   │   │   └── config_template.pbtxt
    │   │   │   ├── scoring
    │   │   │   │   ├── 1
    │   │   │   │   │   ├── decoder.py
    │   │   │   │   │   ├── frame_reducer.py
    │   │   │   │   │   ├── lang
    │   │   │   │   │   │   └── .gitkeep
    │   │   │   │   │   ├── model.py
    │   │   │   │   │   └── wfst_decoding_config.yaml
    │   │   │   │   └── config_template.pbtxt
    │   │   │   └── streaming_wenet
    │   │   │   │   ├── 1
    │   │   │   │       ├── .gitignore
    │   │   │   │       └── .gitkeep
    │   │   │   │   └── config_template.pbtxt
    │   │   ├── requirements.txt
    │   │   ├── run.sh
    │   │   └── run_streaming.sh
    │   ├── model_repo
    │   │   ├── attention_rescoring
    │   │   │   ├── 1
    │   │   │   │   └── .gitignore
    │   │   │   └── config_template.pbtxt
    │   │   ├── decoder
    │   │   │   ├── 1
    │   │   │   │   └── .gitignore
    │   │   │   ├── config_template.pbtxt
    │   │   │   └── config_template2.pbtxt
    │   │   ├── encoder
    │   │   │   ├── 1
    │   │   │   │   └── .gitignore
    │   │   │   └── config_template.pbtxt
    │   │   ├── feature_extractor
    │   │   │   ├── 1
    │   │   │   │   └── model.py
    │   │   │   └── config_template.pbtxt
    │   │   └── scoring
    │   │   │   ├── 1
    │   │   │       └── model.py
    │   │   │   ├── config_template.pbtxt
    │   │   │   └── hotwords.yaml
    │   ├── model_repo_stateful
    │   │   ├── decoder
    │   │   │   ├── 1
    │   │   │   │   └── .gitignore
    │   │   │   ├── config_template.pbtxt
    │   │   │   └── config_template2.pbtxt
    │   │   ├── encoder
    │   │   │   ├── 1
    │   │   │   │   └── .gitignore
    │   │   │   ├── config_template.pbtxt
    │   │   │   └── config_template2.pbtxt
    │   │   ├── feature_extractor
    │   │   │   ├── 1
    │   │   │   │   └── model.py
    │   │   │   └── config_template.pbtxt
    │   │   ├── streaming_wenet
    │   │   │   ├── 1
    │   │   │   │   └── .gitignore
    │   │   │   └── config_template.pbtxt
    │   │   └── wenet
    │   │   │   ├── 1
    │   │   │       ├── model.py
    │   │   │       └── wenet_onnx_model.py
    │   │   │   ├── config_template.pbtxt
    │   │   │   └── hotwords.yaml
    │   ├── scripts
    │   │   ├── benchmark_onnx_throughput.py
    │   │   ├── compute_hotwords_f1.py
    │   │   ├── convert.py
    │   │   ├── convert_start_server.sh
    │   │   └── run_qa.sh
    │   ├── tensorrt
    │   │   ├── LayerNormPlugin
    │   │   │   ├── LayerNormPlugin.cu
    │   │   │   ├── LayerNormPlugin.h
    │   │   │   ├── Makefile
    │   │   │   └── testLayerNormPlugin.py
    │   │   ├── README.md
    │   │   ├── export_streaming_conformer_trt.py
    │   │   ├── model_repo_stateful_trt
    │   │   │   ├── decoder
    │   │   │   │   ├── 1
    │   │   │   │   │   ├── .gitignore
    │   │   │   │   │   └── .gitkeep
    │   │   │   │   ├── config_template.pbtxt
    │   │   │   │   └── config_template2.pbtxt
    │   │   │   ├── encoder
    │   │   │   │   ├── 1
    │   │   │   │   │   ├── .gitignore
    │   │   │   │   │   └── .gitkeep
    │   │   │   │   └── config_template.pbtxt
    │   │   │   ├── feature_extractor
    │   │   │   │   ├── 1
    │   │   │   │   │   └── model.py
    │   │   │   │   └── config_template.pbtxt
    │   │   │   ├── streaming_wenet
    │   │   │   │   ├── 1
    │   │   │   │   │   ├── .gitignore
    │   │   │   │   │   └── .gitkeep
    │   │   │   │   └── config_template.pbtxt
    │   │   │   └── wenet
    │   │   │   │   ├── 1
    │   │   │   │       ├── model.py
    │   │   │   │       └── wenet_onnx_model.py
    │   │   │   │   └── config_template.pbtxt
    │   │   ├── replace_layernorm.py
    │   │   ├── requirements.txt
    │   │   └── run_streaming_small_model.sh
    │   ├── tensorrt_fastertransformer
    │   │   ├── README.md
    │   │   ├── decoder_plugin.JPG
    │   │   ├── encoder_plugin.JPG
    │   │   ├── extract_weights.py
    │   │   ├── model_repo_ft
    │   │   │   ├── attention_rescoring
    │   │   │   │   └── config.pbtxt.template
    │   │   │   ├── decoder
    │   │   │   │   ├── 1
    │   │   │   │   │   └── .gitkeep
    │   │   │   │   └── config.pbtxt.template
    │   │   │   ├── encoder
    │   │   │   │   ├── 1
    │   │   │   │   │   └── .gitkeep
    │   │   │   │   └── config.pbtxt.template
    │   │   │   ├── feature_extractor
    │   │   │   │   ├── 1
    │   │   │   │   │   └── model.py
    │   │   │   │   └── config.pbtxt.template
    │   │   │   └── scoring
    │   │   │   │   ├── 1
    │   │   │   │       └── model.py
    │   │   │   │   └── config.pbtxt.template
    │   │   ├── replace_plugin.py
    │   │   ├── requirements.txt
    │   │   ├── run.sh
    │   │   ├── run_large.sh
    │   │   └── utils.py
    │   └── test.gif
    ├── horizonbpu
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── api
    │   ├── bin
    │   ├── bpu
    │   │   ├── CMakeLists.txt
    │   │   ├── bpu_asr_model.cc
    │   │   └── bpu_asr_model.h
    │   ├── cmake
    │   ├── decoder
    │   ├── frontend
    │   ├── kaldi
    │   ├── patch
    │   ├── post_processor
    │   ├── test
    │   ├── toolchains
    │   ├── utils
    │   └── websocket
    ├── ios
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── WenetDemo
    │   │   ├── WenetDemo.xcodeproj
    │   │   │   ├── project.pbxproj
    │   │   │   └── project.xcworkspace
    │   │   │   │   ├── contents.xcworkspacedata
    │   │   │   │   └── xcshareddata
    │   │   │   │       └── IDEWorkspaceChecks.plist
    │   │   └── WenetDemo
    │   │   │   ├── AppDelegate.swift
    │   │   │   ├── Assets.xcassets
    │   │   │       ├── AccentColor.colorset
    │   │   │       │   └── Contents.json
    │   │   │       ├── AppIcon.appiconset
    │   │   │       │   └── Contents.json
    │   │   │       └── Contents.json
    │   │   │   ├── Base.lproj
    │   │   │       ├── LaunchScreen.storyboard
    │   │   │       └── Main.storyboard
    │   │   │   ├── Info.plist
    │   │   │   ├── SceneDelegate.swift
    │   │   │   ├── ViewController.swift
    │   │   │   ├── model
    │   │   │       └── .gitkeep
    │   │   │   └── wenet
    │   │   │       ├── WenetDemo-Bridging-Header.h
    │   │   │       ├── wenet.h
    │   │   │       └── wenet.mm
    │   ├── build
    │   │   └── Podfile
    │   ├── cmake
    │   ├── decoder
    │   ├── frontend
    │   ├── kaldi
    │   ├── patch
    │   ├── post_processor
    │   ├── test
    │   ├── toolchains
    │   └── utils
    ├── ipex
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── api
    │   ├── bin
    │   ├── cmake
    │   ├── decoder
    │   ├── docker
    │   │   └── Dockerfile
    │   ├── env_checking.sh
    │   ├── frontend
    │   ├── grpc
    │   ├── http
    │   ├── kaldi
    │   ├── patch
    │   ├── post_processor
    │   ├── test
    │   ├── utils
    │   ├── web
    │   └── websocket
    ├── kunlun
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── README_EN.md
    │   ├── api
    │   ├── bin
    │   ├── cmake
    │   ├── compile.sh
    │   ├── decoder
    │   ├── frontend
    │   ├── grpc
    │   ├── kaldi
    │   ├── patch
    │   ├── post_processor
    │   ├── test
    │   ├── utils
    │   ├── websocket
    │   └── xpu
    │   │   ├── CMakeLists.txt
    │   │   ├── conformer_test.cpp
    │   │   ├── xpu_asr_model.cc
    │   │   ├── xpu_asr_model.h
    │   │   ├── xpu_conformer.cpp
    │   │   ├── xpu_conformer.h
    │   │   ├── xpu_util.cpp
    │   │   └── xpu_util.h
    ├── libtorch
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── README_CN.md
    │   ├── api
    │   ├── bin
    │   ├── cmake
    │   ├── decoder
    │   ├── docker
    │   │   └── Dockerfile
    │   ├── frontend
    │   ├── grpc
    │   ├── http
    │   ├── kaldi
    │   ├── patch
    │   ├── post_processor
    │   ├── test
    │   ├── utils
    │   ├── web
    │   │   ├── app.py
    │   │   ├── static
    │   │   │   ├── css
    │   │   │   │   ├── font-awesome.min.css
    │   │   │   │   └── style.css
    │   │   │   ├── favicon.ico
    │   │   │   ├── fonts
    │   │   │   │   ├── FontAwesome.otf
    │   │   │   │   ├── fontawesome-webfont.eot
    │   │   │   │   ├── fontawesome-webfont.svg
    │   │   │   │   ├── fontawesome-webfont.ttf
    │   │   │   │   ├── fontawesome-webfont.woff
    │   │   │   │   └── fontawesome-webfont.woff2
    │   │   │   ├── image
    │   │   │   │   ├── qrcode-enterprise.png
    │   │   │   │   ├── qrcode-official-account.png
    │   │   │   │   ├── voice-dictation.svg
    │   │   │   │   └── voice-pic.png
    │   │   │   └── js
    │   │   │   │   ├── SoundRecognizer.js
    │   │   │   │   ├── jquery-3.2.1.min.js
    │   │   │   │   └── recorder
    │   │   │   │       ├── engine
    │   │   │   │           ├── mp3.js
    │   │   │   │           ├── pcm.js
    │   │   │   │           └── wav.js
    │   │   │   │       ├── extensions
    │   │   │   │           ├── frequency.histogram.view.js
    │   │   │   │           └── lib.fft.js
    │   │   │   │       └── recorder-core.js
    │   │   └── templates
    │   │   │   └── index.html
    │   └── websocket
    ├── onnxruntime
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── api
    │   ├── bin
    │   ├── cmake
    │   ├── decoder
    │   ├── frontend
    │   ├── grpc
    │   ├── kaldi
    │   ├── patch
    │   ├── post_processor
    │   ├── test
    │   ├── utils
    │   └── websocket
    ├── openvino
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── api
    │   ├── bin
    │   ├── cmake
    │   ├── decoder
    │   ├── frontend
    │   ├── kaldi
    │   ├── ov
    │   │   ├── CMakeLists.txt
    │   │   ├── ov_asr_model.cc
    │   │   └── ov_asr_model.h
    │   ├── patch
    │   ├── post_processor
    │   ├── test
    │   ├── utils
    │   └── websocket
    ├── raspberrypi
    │   ├── .gitignore
    │   ├── CMakeLists.txt
    │   ├── README.md
    │   ├── api
    │   ├── bin
    │   ├── cmake
    │   ├── decoder
    │   ├── frontend
    │   ├── kaldi
    │   ├── patch
    │   ├── post_processor
    │   ├── test
    │   ├── toolchains
    │   └── utils
    └── web
    │   ├── README.md
    │   ├── app.py
    │   └── requirements.txt
├── setup.cfg
├── setup.py
├── test
    ├── resources
    │   ├── aishell-BAC009S0724W0121.wav
    │   ├── aishell2.words.txt
    │   ├── dataset
    │   │   ├── aishell-BAC009S0724W0121.wav
    │   │   ├── data.list
    │   │   ├── data.shards.list
    │   │   ├── librispeech-1995-1837-0001.wav
    │   │   ├── shards
    │   │   │   └── shards_000000000.tar
    │   │   ├── text
    │   │   └── wav.scp
    │   ├── global_cmvn
    │   ├── librispeech-1995-1837-0001.wav
    │   ├── librispeech.train_960_unigram5000.bpemodel
    │   ├── librispeech.words.txt
    │   ├── non-linguistic-symbols.invalid
    │   ├── non-linguistic-symbols.valid
    │   ├── paraformer.seg_dict.txt
    │   └── paraformer.words.txt
    ├── test_file_utils.py
    ├── tools
    │   └── test_make_shard.py
    └── wenet
    │   ├── dataset
    │       ├── test_datapipes.py
    │       ├── test_dataset.py
    │       └── test_processor.py
    │   ├── models
    │       ├── paraformer
    │       │   └── test_paraformer.py
    │       ├── transformer
    │       │   ├── test_attention.py
    │       │   └── test_grad_ckpt.py
    │       └── whisper
    │       │   └── test_whisper.py
    │   ├── text
    │       ├── test_bpe_tokenizer.py
    │       ├── test_char_tokenizer.py
    │       ├── test_hugging_face_tokenizer.py
    │       ├── test_paraformer_tokenizer.py
    │       ├── test_parallel.py
    │       └── test_whisper_tokenizer.py
    │   └── utils
    │       ├── test_init_model.py
    │       └── test_init_tokenizer.py
├── tools
    ├── alignment.sh
    ├── analyze_dataset.py
    ├── cmvn_kaldi2json.py
    ├── combine_data.sh
    ├── compute-cer.py
    ├── compute-wer.py
    ├── compute_cmvn_stats.py
    ├── compute_fbank_feats.py
    ├── compute_shard_cmvn_stats.py
    ├── copy_data_dir.sh
    ├── data
    │   ├── remove_dup_utts.sh
    │   └── split_scp.pl
    ├── decode.sh
    ├── extract_shard_data.py
    ├── feat_to_shape.sh
    ├── filter_scp.pl
    ├── fix_data_dir.sh
    ├── flake8_hook.py
    ├── format_data.sh
    ├── fst
    │   ├── add_lex_disambig.pl
    │   ├── compile_lexicon_token_fst.sh
    │   ├── ctc_token_fst.py
    │   ├── ctc_token_fst_compact.py
    │   ├── ctc_token_fst_corrected.py
    │   ├── eps2disambig.pl
    │   ├── make_lexicon_fst.pl
    │   ├── make_tlg.sh
    │   ├── prepare_dict.py
    │   ├── remove_oovs.pl
    │   ├── rnnt_token_fst.py
    │   └── s2eps.pl
    ├── git-pre-commit
    ├── install_srilm.sh
    ├── k2
    │   ├── make_hlg.sh
    │   ├── prepare_char.py
    │   └── prepare_mmi.sh
    ├── latency_metrics.py
    ├── make_raw_list.py
    ├── make_shard_list.py
    ├── merge_scp2txt.py
    ├── onnx2horizonbin.py
    ├── parse_options.sh
    ├── perturb_data_dir_speed.sh
    ├── reduce_data_dir.sh
    ├── remove_longshortdata.py
    ├── segment.py
    ├── setup_anaconda.sh
    ├── sph2wav.sh
    ├── spk2utt_to_utt2spk.pl
    ├── spm_decode
    ├── spm_encode
    ├── spm_train
    ├── ssh_launcher.py
    ├── subset_data_dir.sh
    ├── subset_scp.pl
    ├── sym2int.pl
    ├── text2token.py
    ├── utt2spk_to_spk2utt.pl
    ├── validate_data_dir.sh
    ├── validate_dict_dir.pl
    ├── validate_text.pl
    ├── wav2dur.py
    ├── wav_to_duration.sh
    └── websocket
    │   └── performance-ws.py
└── wenet
    ├── README.md
    ├── __init__.py
    ├── bin
        ├── __init__.py
        ├── alignment.py
        ├── average_model.py
        ├── export_ipex.py
        ├── export_jit.py
        ├── export_onnx_bpu.py
        ├── export_onnx_cpu.py
        ├── export_onnx_gpu.py
        ├── recognize.py
        ├── recognize_onnx_gpu.py
        └── train.py
    ├── cli
        ├── __init__.py
        ├── hub.py
        ├── model.py
        ├── punc_model.py
        └── transcribe.py
    ├── dataset
        ├── __init__.py
        ├── datapipes.py
        ├── dataset.py
        ├── deprecated
        │   ├── __init__.py
        │   ├── dataset.py
        │   └── processor.py
        ├── kaldi_io.py
        ├── processor.py
        └── wav_distortion.py
    ├── models
        ├── __init__.py
        ├── branchformer
        │   ├── __init__.py
        │   ├── cgmlp.py
        │   ├── encoder.py
        │   └── encoder_layer.py
        ├── ctl_model
        │   ├── __init__.py
        │   ├── asr_model_ctl.py
        │   └── encoder.py
        ├── e_branchformer
        │   ├── __init__.py
        │   ├── encoder.py
        │   └── encoder_layer.py
        ├── efficient_conformer
        │   ├── __init__.py
        │   ├── attention.py
        │   ├── convolution.py
        │   ├── encoder.py
        │   ├── encoder_layer.py
        │   └── subsampling.py
        ├── finetune
        │   ├── __init__.py
        │   └── lora
        │   │   ├── __init__.py
        │   │   ├── config.yaml
        │   │   ├── layers.py
        │   │   └── utils.py
        ├── firered
        │   ├── __init__.py
        │   ├── attention.py
        │   ├── convert_FireRed_AED_L_to_wenet_config_and_ckpt.py
        │   ├── encoder.py
        │   ├── encoder_layer.py
        │   ├── model.py
        │   └── subsampling.py
        ├── k2
        │   ├── __init__.py
        │   └── model.py
        ├── paraformer
        │   ├── __init__.py
        │   ├── attention.py
        │   ├── cif.py
        │   ├── convert_paraformer_to_wenet_config_and_ckpt.py
        │   ├── embedding.py
        │   ├── layers.py
        │   ├── paraformer.py
        │   ├── search.py
        │   └── subsampling.py
        ├── squeezeformer
        │   ├── __init__.py
        │   ├── attention.py
        │   ├── conv2d.py
        │   ├── convolution.py
        │   ├── encoder.py
        │   ├── encoder_layer.py
        │   ├── positionwise_feed_forward.py
        │   └── subsampling.py
        ├── ssl
        │   ├── __init__.py
        │   ├── bestrq
        │   │   ├── __init__.py
        │   │   ├── bestrq_model.py
        │   │   └── mask.py
        │   ├── init_dataset.py
        │   ├── init_model.py
        │   ├── w2vbert
        │   │   ├── __init__.py
        │   │   ├── convert_w2vbert_to_wenet_config_and_ckpt.py
        │   │   └── w2vbert_model.py
        │   └── wav2vec2
        │   │   ├── __init__.py
        │   │   ├── quantizer.py
        │   │   └── wav2vec2_model.py
        ├── transducer
        │   ├── __init__.py
        │   ├── joint.py
        │   ├── predictor.py
        │   ├── search
        │   │   ├── __init__.py
        │   │   ├── greedy_search.py
        │   │   └── prefix_beam_search.py
        │   └── transducer.py
        ├── transformer
        │   ├── __init__.py
        │   ├── asr_model.py
        │   ├── attention.py
        │   ├── cmvn.py
        │   ├── convolution.py
        │   ├── ctc.py
        │   ├── decoder.py
        │   ├── decoder_layer.py
        │   ├── embedding.py
        │   ├── encoder.py
        │   ├── encoder_layer.py
        │   ├── label_smoothing_loss.py
        │   ├── norm.py
        │   ├── positionwise_feed_forward.py
        │   ├── search.py
        │   ├── subsampling.py
        │   └── swish.py
        └── whisper
        │   ├── __init__.py
        │   ├── convert_whisper_to_wenet_config_and_ckpt.py
        │   └── whisper.py
    ├── text
        ├── __init__.py
        ├── base_tokenizer.py
        ├── bpe_tokenizer.py
        ├── char_tokenizer.py
        ├── hugging_face_tokenizer.py
        ├── paraformer_tokenizer.py
        ├── tokenize_utils.py
        └── whisper_tokenizer.py
    └── utils
        ├── __init__.py
        ├── checkpoint.py
        ├── class_utils.py
        ├── cmvn.py
        ├── common.py
        ├── config.py
        ├── context_graph.py
        ├── ctc_utils.py
        ├── executor.py
        ├── file_utils.py
        ├── fsdp_utils.py
        ├── init_dataset.py
        ├── init_model.py
        ├── init_tokenizer.py
        ├── mask.py
        ├── rope_utils.py
        ├── scheduler.py
        └── train_utils.py


/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | select = B,C,E,F,P,T4,W,B9
 3 | max-line-length = 80
 4 | # C408 ignored because we like the dict keyword argument syntax
 5 | # E501 is not flexible enough, we're using B950 instead
 6 | ignore =
 7 |     E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
 8 |     # shebang has extra meaning in fbcode lints, so I think it's not worth trying
 9 |     # to line this up with executable bit
10 |     EXE001, EXE002,
11 |     # these ignores are from flake8-bugbear; please fix!
12 |     B007,B008,B905
13 |     # these ignores are from flake8-comprehensions; please fix!
14 |     C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415
15 | exclude = compute-wer.py,kaldi_io.py,__torch__,docs/conf.py
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/workflows/runtime.yml:
--------------------------------------------------------------------------------
 1 | name: Build Runtime
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   pull_request:
 6 |     paths:
 7 |       - 'runtime/**'
 8 | 
 9 | env:
10 |   RUNTIME_DIR: runtime/libtorch
11 |   FC_BASE_DIR: runtime/libtorch/fc_base
12 | 
13 | jobs:
14 |   build:
15 |     runs-on: ${{ matrix.os }}
16 |     strategy:
17 |       matrix:
18 |         os: [macos-latest, ubuntu-latest]
19 | 
20 |     steps:
21 |       - uses: actions/checkout@v3
22 |       - name: Cache FC Base
23 |         uses: actions/cache@v3
24 |         with:
25 |           path: ${{ env.FC_BASE_DIR }}
26 |           key: ${{ runner.os }}-fc-base
27 | 
28 |       - uses: hendrikmuhs/ccache-action@v1.2
29 |         with:
30 |           key: ${{ runner.os }}-build
31 | 
32 |       - name: Build
33 |         run: |
34 |           export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
35 |           cd ${{ env.RUNTIME_DIR }}
36 |           cmake -B build -DCMAKE_BUILD_TYPE=Release
37 |           cmake --build build -j$(nproc)
38 | 


--------------------------------------------------------------------------------
/.github/workflows/stale-issues.yml:
--------------------------------------------------------------------------------
 1 | name: Close Stale Issues
 2 | on:
 3 |   schedule:
 4 |     - cron: '0 0 * * *'
 5 | 
 6 | jobs:
 7 |   close-stale-issues:
 8 |     if: github.repository == 'wenet-e2e/wenet'
 9 |     runs-on: ubuntu-latest
10 |     permissions:
11 |       issues: write
12 |     steps:
13 |       - name: Check for Stale Issues
14 |         uses: actions/stale@v5
15 | 
16 |       - name: Close Stale Issues
17 |         uses: actions/stale@v5
18 |         with:
19 |           stale-issue-message: 'This issue has been automatically closed due to inactivity.'
20 |           close-issue-message: "This issue was closed because it has been inactive for 7 days
21 |             since being marked as stale. Please reopen if you'd like to work on this further."
22 |           days-before-stale: 60
23 |           days-before-close: 7
24 |           stale-issue-label: stale
25 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
26 | 


--------------------------------------------------------------------------------
/.github/workflows/windows.yml:
--------------------------------------------------------------------------------
 1 | name: Build Windows Runtime
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 | 
 6 | env:
 7 |   RUNTIME_DIR: runtime/libtorch
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: windows-latest
12 |     defaults:
13 |       run:
14 |         shell: bash
15 |     steps:
16 |       - uses: actions/checkout@v3
17 |       - name: Build
18 |         run: |
19 |           cd ${{ env.RUNTIME_DIR }}
20 |           cmake -B build -DGRAPH_TOOLS=ON -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF
21 |           cmake --build build --config Release -j$(nproc)
22 | 
23 |       - name: Prepare Release Binary
24 |         run: |
25 |           cd ${{ env.RUNTIME_DIR }}
26 |           mkdir -p wenet/kaldi
27 |           cp build/*.dll wenet
28 |           cp build/api/Release/*.dll wenet
29 |           cp build/bin/Release/*.exe wenet
30 |           cp build/kaldi/Release/*.exe wenet/kaldi
31 | 
32 |       - name: Upload WeNet Binary
33 |         uses: actions/upload-artifact@v3
34 |         with:
35 |           name: release-wenet-binary
36 |           path: ${{ env.RUNTIME_DIR }}/wenet
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Visual Studio Code files
 7 | .vscode
 8 | .vs
 9 | 
10 | # PyCharm files
11 | .idea
12 | 
13 | # Eclipse Project settings
14 | *.*project
15 | .settings
16 | 
17 | # Sublime Text settings
18 | *.sublime-workspace
19 | *.sublime-project
20 | 
21 | # Editor temporaries
22 | *.swn
23 | *.swo
24 | *.swp
25 | *.swm
26 | *~
27 | 
28 | # IPython notebook checkpoints
29 | .ipynb_checkpoints
30 | 
31 | # macOS dir files
32 | .DS_Store
33 | 
34 | exp
35 | data
36 | raw_wav
37 | tensorboard
38 | **/*build*
39 | 
40 | # protoc output files
41 | runtime/core/grpc/wenet.grpc.pb.cc
42 | runtime/core/grpc/wenet.grpc.pb.h
43 | runtime/core/grpc/wenet.pb.cc
44 | runtime/core/grpc/wenet.pb.h
45 | 
46 | # Clangd files
47 | .cache
48 | compile_commands.json
49 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.5.0
 4 |     hooks:
 5 |     - id: trailing-whitespace
 6 |       exclude: 'test/resources/.*'
 7 |   - repo: https://github.com/pre-commit/mirrors-yapf
 8 |     rev: 'v0.32.0'
 9 |     hooks:
10 |     - id: yapf
11 |   - repo: https://github.com/pycqa/flake8
12 |     rev: '3.8.2'
13 |     hooks:
14 |     - id: flake8
15 |   - repo: https://github.com/pre-commit/mirrors-clang-format
16 |     rev: 'v17.0.6'
17 |     hooks:
18 |     - id: clang-format
19 |       args: ['--style=file']
20 |       exclude: 'runtime/ios/WenetDemo/WenetDemo/wenet/.*\.h$|.*\.(json|java|js|m|mm|proto)'
21 |   - repo: https://github.com/cpplint/cpplint
22 |     rev: '1.6.1'
23 |     hooks:
24 |     - id: cpplint
25 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing guidelines
 2 | 
 3 | ## Pre-commit tidy/linting hook
 4 | 
 5 | You'll need to install flake8 first.
 6 | 
 7 | `pip install flake8==3.8.2`
 8 | 
 9 | We use flake8 to perform additional formatting and semantic checking of code.
10 | We provide a pre-commit git hook for performing these checks, before a commit
11 | is created:
12 | 
13 | ```bash
14 | ln -s ../../tools/git-pre-commit .git/hooks/pre-commit
15 | ```
16 | 
17 | You have to execute above command in wenet project root directory.
18 | After that, each commit will be checked by flake8.
19 | 
20 | If you do not set pre-commit, just run `flake8` in wenet project root directory
21 | and fix all the problems.
22 | 
23 | ## Github checks
24 | 
25 | After a pull request is submitted, some checks will run to check your code style.
26 | 
27 | Below is an example where some checks fail.
28 | 
29 | ![github checks](docs/images/checks.png)
30 | 
31 | You need to click the details to see the detailed info like the example below.
32 | 
33 | ![github checks](docs/images/check_detail.png)
34 | 
35 | You have to fix all style problems according to the detailed info.
36 | 
37 | 


--------------------------------------------------------------------------------
/CPPLINT.cfg:
--------------------------------------------------------------------------------
1 | root=runtime/core
2 | filter=-build/c++11
3 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _gen/
2 | _build/
3 | build/
4 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SPHINXPROJ    = wenet
 9 | SOURCEDIR     = .
10 | BUILDDIR      = _build
11 | 
12 | # Put it first so that "make" without argument is like "make help".
13 | help:
14 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
15 | 
16 | .PHONY: help Makefile
17 | 
18 | api:
19 | 	sphinx-apidoc -f --separate --module-first -d 2 -o ./python_api ../wenet
20 | 	sed -i 's:^wenet:Python API Reference:g' ./python_api/modules.rst
21 | 	sed -i 's:^=====:====================:g' ./python_api/modules.rst
22 | 
23 | # Catch-all target: route all unknown targets to Sphinx using the new
24 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
25 | %: Makefile
26 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
27 | 


--------------------------------------------------------------------------------
/docs/images/UIO_dataflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/UIO_dataflow.png


--------------------------------------------------------------------------------
/docs/images/UIO_system.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/UIO_system.png


--------------------------------------------------------------------------------
/docs/images/UIO_wenetspeech_cer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/UIO_wenetspeech_cer.png


--------------------------------------------------------------------------------
/docs/images/check_detail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/check_detail.png


--------------------------------------------------------------------------------
/docs/images/checks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/checks.png


--------------------------------------------------------------------------------
/docs/images/context_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/context_graph.png


--------------------------------------------------------------------------------
/docs/images/lm_system.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/lm_system.png


--------------------------------------------------------------------------------
/docs/images/runtime_android.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/runtime_android.gif


--------------------------------------------------------------------------------
/docs/images/runtime_server.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/runtime_server.gif


--------------------------------------------------------------------------------
/docs/images/runtime_web.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/runtime_web.png


--------------------------------------------------------------------------------
/docs/images/subsampling_overalp.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/subsampling_overalp.gif


--------------------------------------------------------------------------------
/docs/images/u2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/docs/images/u2.gif


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. wenet documentation master file, created by
 2 |    sphinx-quickstart on Thu Dec  3 11:43:53 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to wenet's documentation!
 7 | =================================
 8 | 
 9 | 
10 | wenet is an tansformer-based end-to-end ASR toolkit.
11 | 
12 | .. toctree::
13 |    :maxdepth: 2
14 |    :caption: Contents:
15 | 
16 |    ./python_package.md
17 |    ./train.rst
18 |    ./production.rst
19 |    ./reference.rst
20 | 
21 | Indices and tables
22 | ==================
23 | 
24 | * :ref:`genindex`
25 | * :ref:`modindex`
26 | * :ref:`search`
27 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 |     set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 |     echo.
18 |     echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 |     echo.installed, then set the SPHINXBUILD environment variable to point
20 |     echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 |     echo.may add the Sphinx directory to PATH.
22 |     echo.
23 |     echo.If you don't have Sphinx installed, grab it from
24 |     echo.http://sphinx-doc.org/
25 |     exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/papers.md:
--------------------------------------------------------------------------------
1 | ## Papers
2 | 
3 | * [WeNet: Production Oriented Streaming and Non-streaming End-to-End Speech Recognition Toolkit](https://arxiv.org/pdf/2102.01547.pdf), accepted by InterSpeech 2021.
4 | * [WeNet 2.0: More Productive End-to-End Speech Recognition Toolkit](https://arxiv.org/pdf/2203.15455.pdf), accepted by InterSpeech 2022.
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/production.rst:
--------------------------------------------------------------------------------
 1 | Production Runtime
 2 | ==================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 |    :caption: Contents:
 7 | 
 8 |    ./lm.md
 9 |    ./context.md
10 |    ./runtime.md
11 |    ./jit_in_wenet.md
12 | 


--------------------------------------------------------------------------------
/docs/python_package.md:
--------------------------------------------------------------------------------
 1 | # Python Package
 2 | 
 3 | 
 4 | ## Install
 5 | 
 6 | ``` sh
 7 | pip install git+https://github.com/wenet-e2e/wenet.git
 8 | ```
 9 | 
10 | ## Development Install
11 | 
12 | ``` sh
13 | git clone https://github.com/wenet-e2e/wenet.git
14 | cd wenet
15 | pip install -e .
16 | ```
17 | 
18 | 
19 | ## Command line Usage
20 | 
21 | ``` sh
22 | wenet --language chinese audio.wav
23 | ```
24 | 
25 | You can specify the following parameters.
26 | 
27 | * `-l` or `--language`: chinese/english are supported now.
28 | * `-m` or `--model_dir`: your own model dir
29 | * `-g` or `--gpu`: the device id of gpu, default value -1 represents for cpu.
30 | * `-t` or `--show_tokens_info`: show the token level information such as timestamp, confidence, etc.
31 | * `--align`: force align the input audio and transcript
32 | * `--label`: the input label to align
33 | * `--paraformer`: use the best Chinese model
34 | * `--device`: specify the backend accelerator (cuda/npu/cpu)
35 | 
36 | ## Python Programming Usage
37 | 
38 | ``` python
39 | import wenet
40 | 
41 | model = wenet.load_model('chinese')
42 | # or model = wenet.load_model(model_dir='xxx')
43 | result = model.transcribe('audio.wav')
44 | print(result['text'])
45 | ```
46 | 


--------------------------------------------------------------------------------
/docs/reference.rst:
--------------------------------------------------------------------------------
 1 | Reference
 2 | =========
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 |    :caption: Contents:
 7 | 
 8 |    ./papers.md
 9 |    ./python_api/modules.rst
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | jinja2
 2 | nbsphinx
 3 | sphinx
 4 | recommonmark
 5 | sphinx-markdown-tables
 6 | sphinx-rtd-theme
 7 | torch
 8 | torchaudio
 9 | typeguard
10 | 


--------------------------------------------------------------------------------
/docs/train.rst:
--------------------------------------------------------------------------------
 1 | How to train models?
 2 | ====================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 |    :caption: Contents:
 7 | 
 8 |    ./tutorial_librispeech.md
 9 |    ./tutorial_aishell.md
10 |    ./pretrained_models.md
11 |    ./UIO.md
12 | 


--------------------------------------------------------------------------------
/examples/aishell/NST/local/NST_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/examples/aishell/NST/local/NST_plot.png


--------------------------------------------------------------------------------
/examples/aishell/NST/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/server/x86/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/aishell/paraformer/local:
--------------------------------------------------------------------------------
1 | ../whisper/local


--------------------------------------------------------------------------------
/examples/aishell/paraformer/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/aishell/paraformer/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/aishell/paraformer/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/aishell/rnnt/local:
--------------------------------------------------------------------------------
1 | ../s0/local


--------------------------------------------------------------------------------
/examples/aishell/rnnt/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/aishell/rnnt/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/aishell/rnnt/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/aishell/s0/UIO_RESULT.md:
--------------------------------------------------------------------------------
 1 | # Benchmark on Conformer
 2 | 
 3 | | IO           | CER   |
 4 | |--------------|-------|
 5 | | Old          | 4.61  |
 6 | | UIO(Raw)     | 4.63  |
 7 | | UIO(Shards)  | 4.67  |
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/examples/aishell/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/aishell/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools/


--------------------------------------------------------------------------------
/examples/aishell/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet/


--------------------------------------------------------------------------------
/examples/aishell/whisper/conf/ds_stage1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_micro_batch_size_per_gpu": 1,
 3 |   "gradient_accumulation_steps": 1,
 4 |   "steps_per_print": 100,
 5 |   "gradient_clipping": 5,
 6 |   "fp16": {
 7 |     "enabled": false,
 8 |     "auto_cast": false,
 9 |     "loss_scale": 0,
10 |     "initial_scale_power": 16,
11 |     "loss_scale_window": 1000,
12 |     "hysteresis": 2,
13 |     "consecutive_hysteresis": false,
14 |     "min_loss_scale": 1
15 |   },
16 |   "bf16": {
17 |    "enabled": true
18 |   },
19 |   "zero_force_ds_cpu_optimizer": false,
20 |   "zero_optimization": {
21 |     "stage": 1,
22 |     "offload_optimizer": {
23 |       "device": "none",
24 |       "pin_memory": true
25 |     },
26 |     "allgather_partitions": true,
27 |     "allgather_bucket_size": 5e8,
28 |     "overlap_comm": true,
29 |     "reduce_scatter": true,
30 |     "reduce_bucket_size": 5e8,
31 |     "contiguous_gradients" : true
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/examples/aishell/whisper/conf/ds_stage2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_micro_batch_size_per_gpu": 1,
 3 |   "gradient_accumulation_steps": 1,
 4 |   "steps_per_print": 100,
 5 |   "gradient_clipping": 5,
 6 |   "fp16": {
 7 |     "enabled": false,
 8 |     "auto_cast": false,
 9 |     "loss_scale": 0,
10 |     "initial_scale_power": 16,
11 |     "loss_scale_window": 1000,
12 |     "hysteresis": 2,
13 |     "consecutive_hysteresis": false,
14 |     "min_loss_scale": 1
15 |   },
16 |   "bf16": {
17 |    "enabled": true
18 |   },
19 |   "zero_force_ds_cpu_optimizer": false,
20 |   "zero_optimization": {
21 |     "stage": 2,
22 |     "offload_optimizer": {
23 |       "device": "none",
24 |       "pin_memory": true
25 |     },
26 |     "allgather_partitions": true,
27 |     "allgather_bucket_size": 5e8,
28 |     "overlap_comm": false,
29 |     "reduce_scatter": true,
30 |     "reduce_bucket_size": 5e8,
31 |     "contiguous_gradients" : true
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/examples/aishell/whisper/conf/ds_stage3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_micro_batch_size_per_gpu": 1,
 3 |   "gradient_accumulation_steps": 1,
 4 |   "steps_per_print": 100,
 5 |   "gradient_clipping": 5,
 6 |   "fp16": {
 7 |     "enabled": false,
 8 |     "auto_cast": false,
 9 |     "loss_scale": 0,
10 |     "initial_scale_power": 16,
11 |     "loss_scale_window": 1000,
12 |     "hysteresis": 2,
13 |     "consecutive_hysteresis": false,
14 |     "min_loss_scale": 1
15 |   },
16 |   "bf16": {
17 |    "enabled": true
18 |   },
19 |   "zero_force_ds_cpu_optimizer": false,
20 |   "zero_optimization": {
21 |     "stage": 3,
22 |     "offload_optimizer": {
23 |       "device": "none",
24 |       "pin_memory": true
25 |     },
26 |     "offload_param": {
27 |       "device": "none",
28 |       "pin_memory": true
29 |     },
30 |     "allgather_partitions": true,
31 |     "allgather_bucket_size": 5e8,
32 |     "overlap_comm": true,
33 |     "reduce_scatter": true,
34 |     "reduce_bucket_size": 5e8,
35 |     "contiguous_gradients" : true,
36 |     "stage3_max_live_parameters": 1e9,
37 |     "stage3_max_reuse_distance": 1e9,
38 |     "stage3_prefetch_bucket_size": 5e8,
39 |     "stage3_param_persistence_threshold": 1e5
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/examples/aishell/whisper/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/aishell/whisper/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/aishell/whisper/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/aishell2/rnnt/README.md:
--------------------------------------------------------------------------------
 1 | # Performance Record
 2 | 
 3 | ## U2++ Conformer Result
 4 | 
 5 | * Feature info: using fbank feature, dither, cmvn, oneline speed perturb
 6 | * Training info: lr 0.001, dynamic batch with max_frames_in_batch 15000, 4 gpu, acc_grad 1, 130 epochs
 7 | * Training weight info: transducer_weight 0.75,  ctc_weight 0.1, reverse_weight 0.30, average_num 30
 8 | * Predictor type: lstm
 9 | 
10 | | decoding mode/chunk size  | full  | 16    |
11 | |---------------------------|-------|-------|
12 | | rnnt greedy search        | 6.44  | 7.09  |
13 | 
14 | 


--------------------------------------------------------------------------------
/examples/aishell2/rnnt/local:
--------------------------------------------------------------------------------
1 | ../s0/local


--------------------------------------------------------------------------------
/examples/aishell2/rnnt/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/aishell2/rnnt/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/aishell2/rnnt/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/aishell2/s0/local/word_segmentation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding=utf-8
 3 | # Copyright 2018 AIShell-Foundation(Authors:Jiayu DU, Xingyu NA, Bengu WU, Hao ZHENG)
 4 | #           2018 Beijing Shell Shell Tech. Co. Ltd. (Author: Hui BU)
 5 | # Apache 2.0
 6 | 
 7 | from __future__ import print_function
 8 | import sys
 9 | import jieba
10 | 
11 | if len(sys.argv) < 3:
12 |     sys.stderr.write(
13 |         "word_segmentation.py <vocab> <trans> <word-segmented-trans>\n")
14 |     exit(1)
15 | 
16 | vocab_file = sys.argv[1]
17 | trans_file = sys.argv[2]
18 | 
19 | jieba.set_dictionary(vocab_file)
20 | for line in open(trans_file, 'r', encoding='utf8'):
21 |     key, trans = line.strip().split(' ', 1)
22 |     words = jieba.cut(trans,
23 |                       HMM=False)  # turn off new word discovery (HMM-based)
24 |     new_line = key + '\t' + " ".join(words)
25 |     print(new_line)
26 | 


--------------------------------------------------------------------------------
/examples/aishell2/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/aishell2/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools/


--------------------------------------------------------------------------------
/examples/aishell2/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet/


--------------------------------------------------------------------------------
/examples/aishell4/s0/local/spk2utt_to_utt2spk.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | while(<>){
19 |     @A = split(" ", $_);
20 |     @A > 1 || die "Invalid line in spk2utt file: $_";
21 |     $s = shift @A;
22 |     foreach $u ( @A ) {
23 |         print "$u $s\n";
24 |     }
25 | }
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/examples/aishell4/s0/local/text_format.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use warnings; #sed replacement for -w perl parameter
 3 | # Copyright Chao Weng
 4 | 
 5 | # normalizations for hkust trascript
 6 | # see the docs/trans-guidelines.pdf for details
 7 | 
 8 | while (<STDIN>) {
 9 |   @A = split(" ", $_);
10 |   if (@A == 1) {
11 |     next;
12 |   }
13 |   print $_
14 | }
15 | 


--------------------------------------------------------------------------------
/examples/aishell4/s0/local/utt2spk_to_spk2utt.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # converts an utt2spk file to a spk2utt file.
18 | # Takes input from the stdin or from a file argument;
19 | # output goes to the standard out.
20 | 
21 | if ( @ARGV > 1 ) {
22 |     die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
23 | }
24 | 
25 | while(<>){
26 |     @A = split(" ", $_);
27 |     @A == 2 || die "Invalid line in utt2spk file: $_";
28 |     ($u,$s) = @A;
29 |     if(!$seen_spk{$s}) {
30 |         $seen_spk{$s} = 1;
31 |         push @spklist, $s;
32 |     }
33 |     push (@{$spk_hash{$s}}, "$u");
34 | }
35 | foreach $s (@spklist) {
36 |     $l = join(' ',@{$spk_hash{$s}});
37 |     print "$s $l\n";
38 | }
39 | 


--------------------------------------------------------------------------------
/examples/aishell4/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/aishell4/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/aishell4/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/chime4/s0/README.md:
--------------------------------------------------------------------------------
 1 | # Performance Record
 2 | 
 3 | ## Conformer Result
 4 | 
 5 | * Feature info: dither + specaug + speed perturb
 6 | * Training info: lr 0.0005, batch size 8, 1 gpu, acc_grad 4, 80 epochs
 7 | * Decoding info: average_num 10
 8 | 
 9 | |      decoding mode     | dt05_real_1ch | dt05_simu_1ch | et05_real_1ch | et05_simu_1ch |
10 | |:----------------------:|:-------------:|:-------------:|:-------------:|:-------------:|
11 | | ctc_prefix_beam_search |   19.06%      |   21.17%      |   28.39%      |    29.16%     |
12 | |  attention_rescoring   |   17.92%      |   20.22%      |   27.40%      |    28.25%     |
13 | 


--------------------------------------------------------------------------------
/examples/chime4/s0/local/chime4_format_dir.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # wujian@2020
 4 | 
 5 | set -eu
 6 | 
 7 | echo "$0: Formating chime4 data dir..."
 8 | 
 9 | track=isolated_1ch_track
10 | data_dir=data/chime4
11 | 
12 | mkdir -p $data_dir/{train,dev}
13 | 
14 | cat $data_dir/tr05_{simu,real}_noisy/wav.scp $data_dir/tr05_orig_clean/wav.scp \
15 |   $data_dir/train_si200_wsj1_clean/wav.scp | sort -k1 > $data_dir/train/wav.scp
16 | cat $data_dir/tr05_{simu,real}_noisy/text $data_dir/tr05_orig_clean/text \
17 |   $data_dir/train_si200_wsj1_clean/text | sort -k1 > $data_dir/train/text
18 | 
19 | cat $data_dir/dt05_{real,simu}_${track}/wav.scp | sort -k1 > $data_dir/dev/wav.scp
20 | cat $data_dir/dt05_{real,simu}_${track}/text | sort -k1 > $data_dir/dev/text
21 | 
22 | echo "$0: Format $data_dir done"
23 | 


--------------------------------------------------------------------------------
/examples/chime4/s0/local/chime4_gen_wav.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # wujian@2020
 4 | 
 5 | set -eu
 6 | 
 7 | [ $# -ne 2 ] && echo "Script format error: $0 <data-dir> <dump-dir>" && exit 0
 8 | 
 9 | data_dir=$1
10 | dump_dir=$2
11 | 
12 | mkdir -p $dump_dir
13 | 
14 | num_utts=$(cat $data_dir/wav.scp | wc -l)
15 | echo "Orginal utterances (.wav + .wv1): $num_utts"
16 | 
17 | # cat $data_dir/wav.scp | grep "sph2pipe" | \
18 | #   awk -v dir=$dump_dir '{printf("%s -f wav %s %s/%s.wav\n", $2, $5, dir, $1)}' | bash
19 | 
20 | cat $data_dir/wav.scp | grep -v "sph2pipe" > $data_dir/raw_wav.scp
21 | find $dump_dir -name "*.wav" | awk -F '/' '{printf("%s %s\n", $NF, $0)}' | \
22 |   sed 's:\.wav::' > $data_dir/sph_wav.scp
23 | 
24 | cat $data_dir/{raw_wav,sph_wav}.scp | sort -k1 > $data_dir/wav.scp
25 | num_utts=$(cat $data_dir/wav.scp | wc -l)
26 | echo "Wave utterances (.wav): $num_utts"
27 | 
28 | echo "$0: Generate wav => $dump_dir done"
29 | 


--------------------------------------------------------------------------------
/examples/chime4/s0/local/flist2scp.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | # takes in a file list with lines like
19 | # /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
20 | # and outputs an scp in kaldi format with lines like
21 | # 4k0c030a /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
22 | # (the first thing is the utterance-id, which is the same as the basename of the file.
23 | 
24 | 
25 | while(<>){
26 |     m:^\S+/(\w+)\.[wW][vV]1$: || die "Bad line $_";
27 |     $id = $1;
28 |     $id =~ tr/A-Z/a-z/;  # Necessary because of weirdness on disk 13-16.1 (uppercase filenames)
29 |     print "$id $_";
30 | }
31 | 


--------------------------------------------------------------------------------
/examples/chime4/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/chime4/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/chime4/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/commonvoice/fr/README.md:
--------------------------------------------------------------------------------
 1 | # Performance Record
 2 | # Should be installed ffmpeg , pandas !!!
 3 | ## Conformer Result
 4 | 
 5 | * Feature info: dither + specaug + speed perturb
 6 | * Training info: lr 0.0005, warmup_steps 20000 batch size 8, 3 gpu, 30 epochs
 7 | * Decoding info: average_num 20
 8 | 
 9 | 
10 | 
11 | |     decoding mode      | test (wer) |
12 | | :--------------------: | :---------: |
13 | |   ctc_greedy_search    |   16.12%    |
14 | | ctc_prefix_beam_search |   16.07%    |
15 | |       attention        |   13.56%    |
16 | |  attention_rescoring   |   14.01%    |


--------------------------------------------------------------------------------
/examples/commonvoice/fr/local/download_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | if [ $# -le 1 ]; then
 3 |     echo "Args_Error:Two parameters are required."
 4 |     exit 1;
 5 | fi
 6 | download_path=$1
 7 | data_France=$2
 8 | wget -O ${download_path}/tmp.zip https://mozilla-common-voice-datasets.s3.dualstack.us-west-2.amazonaws.com/cv-corpus-8.0-2022-01-19/cv-corpus-8.0-2022-01-19-fr.tar.gz
 9 | tar -xvf ${download_path}/tmp.zip  -C ${data_France}
10 | rm -rf ${download_path}/tmp.zip


--------------------------------------------------------------------------------
/examples/commonvoice/fr/local/prepare_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | if [ $# -le 0 ]; then
 3 |     echo "Argument should be France src directory, see ../run.sh for example."
 4 |     exit 1;
 5 | fi
 6 | dir=`pwd`/data
 7 | local=`pwd`/local
 8 | src_path=$1
 9 | if [ ! -d ${dir} ]; then
10 |     mkdir ${dir}
11 |   else
12 |     rm -rf ${dir}
13 |     mkdir ${dir}
14 | fi
15 | 
16 | for x in train dev test; do
17 |     if [ ! ${dir}/${x} ]; then
18 |         mkdir ${dir}/${x}
19 |     else
20 |         rm -rf ${dir}/${x}
21 |         mkdir ${dir}/${x}
22 |     fi
23 | done
24 | 
25 | if [ ! -d ${src_path}/wavs ]; then
26 |     mkdir ${src_path}/wavs
27 | fi
28 | for x in train dev test; do
29 |     python3 ${local}/create_scp_text.py  ${src_path} ${x} ${dir}/${x}
30 | done
31 | 


--------------------------------------------------------------------------------
/examples/commonvoice/fr/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/commonvoice/fr/tools:
--------------------------------------------------------------------------------
1 | ../../../tools/


--------------------------------------------------------------------------------
/examples/commonvoice/fr/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet/


--------------------------------------------------------------------------------
/examples/csj/s0/README.md:
--------------------------------------------------------------------------------
 1 | # Performance Record
 2 | 
 3 | ## Conformer Result Bidecoder (large)
 4 | 
 5 | 
 6 | ## Conformer Result
 7 | 
 8 | * Feature info: using fbank feature, cmvn, dither, online speed perturb
 9 | * Training info: train_conformer.yaml, kernel size 15, lr 0.004, batch size 12, 8 gpu, acc_grad 1, 50 epochs, dither 0.0
10 | * Decoding info: ctc_weight 0.5, average_num 10
11 | 
12 | 
13 | | decoding mode                    | test1      | test2      | test3      |
14 | |----------------------------------|------------|------------|------------|
15 | | ctc greedy search                | 7.94       | 5.29       | 6.10       |
16 | | ctc prefix beam search           | 7.83+      | 5.28       | 6.08       |
17 | | attention decoder                | 7.83       | 5.63       | 6.37       |
18 | | attention rescoring              | 7.28+      | 4.81       | 5.44       |
19 | 
20 | note that "+" means we removed two <0.1s wav files in test1 before decoding.
21 | 
22 | 
23 | 
24 | 
25 | ## Conformer U2++ Result
26 | 
27 | 
28 | ## Conformer U2 Result
29 | 
30 | 


--------------------------------------------------------------------------------
/examples/csj/s0/csj_tools/wn.3.mincut.py:
--------------------------------------------------------------------------------
 1 | import librosa
 2 | # import os
 3 | import sys
 4 | 
 5 | 
 6 | def mincut(wavscpfn, minsec):
 7 |     outfn = wavscpfn + "_" + str(minsec)
 8 | 
 9 |     with open(outfn, 'w') as bw:
10 |         with open(wavscpfn) as br:
11 |             for aline in br.readlines():
12 |                 aline = aline.strip()
13 |                 afn = aline.split('\t')[1]
14 |                 # print(afn)
15 |                 dur = librosa.get_duration(filename=afn)
16 |                 if dur >= minsec:
17 |                     bw.write(aline + '\n')
18 | 
19 | 
20 | # wn.3.mincut.py <wav.scp> <min.sec>
21 | if __name__ == '__main__':
22 |     if len(sys.argv) < 3:
23 |         print('{} <in.wav.scp> <min.sec.cut>'.format(sys.argv[0]))
24 |         exit()
25 | 
26 |     wavscpfn = sys.argv[1]
27 |     minsec = float(sys.argv[2])
28 | 
29 |     mincut(wavscpfn, minsec)
30 | 


--------------------------------------------------------------------------------
/examples/csj/s0/list_files/2ch.id.list:
--------------------------------------------------------------------------------
 1 | D01F0002
 2 | D01F0003
 3 | D01F0023
 4 | D01F0030
 5 | D01F0046
 6 | D01F0049
 7 | D01F0055
 8 | D01F0057
 9 | D01M0005
10 | D01M0009
11 | D01M0012
12 | D01M0019
13 | D01M0020
14 | D01M0042
15 | D01M0043
16 | D01M0047
17 | D02F0015
18 | D02F0018
19 | D02F0025
20 | D02F0027
21 | D02F0031
22 | D02F0032
23 | D02F0033
24 | D02F0054
25 | D02M0014
26 | D02M0016
27 | D02M0024
28 | D02M0026
29 | D02M0028
30 | D02M0035
31 | D02M0039
32 | D02M0051
33 | D03F0001
34 | D03F0006
35 | D03F0008
36 | D03F0034
37 | D03F0036
38 | D03F0040
39 | D03F0045
40 | D03F0058
41 | D03M0004
42 | D03M0007
43 | D03M0013
44 | D03M0017
45 | D03M0037
46 | D03M0038
47 | D03M0048
48 | D03M0053
49 | D04F0011
50 | D04F0022
51 | D04F0029
52 | D04F0044
53 | D04F0050
54 | D04M0010
55 | D04M0021
56 | D04M0041
57 | D04M0052
58 | D04M0056
59 | 


--------------------------------------------------------------------------------
/examples/csj/s0/list_files/test.set.1.list:
--------------------------------------------------------------------------------
 1 | A01M0097
 2 | A04M0051
 3 | A04M0121
 4 | A03M0156
 5 | A03M0112
 6 | A01M0110
 7 | A05M0011
 8 | A03M0106
 9 | A01M0137
10 | A04M0123
11 | 
12 | 


--------------------------------------------------------------------------------
/examples/csj/s0/list_files/test.set.123.list:
--------------------------------------------------------------------------------
 1 | A01M0097
 2 | A04M0051
 3 | A04M0121
 4 | A03M0156
 5 | A03M0112
 6 | A01M0110
 7 | A05M0011
 8 | A03M0106
 9 | A01M0137
10 | A04M0123
11 | 
12 | A01F0063
13 | A01M0056
14 | A06F0135
15 | A02M0012
16 | A06M0064
17 | A01M0141
18 | A01F0034
19 | A03M0016
20 | A03F0072
21 | A01F0001
22 | 
23 | S00F0066
24 | S00M0213
25 | S00M0070
26 | S00M0008
27 | S01F0105
28 | S00F0148
29 | S00F0019
30 | S00M0112
31 | S00F0152
32 | S00M0079
33 | 
34 | 


--------------------------------------------------------------------------------
/examples/csj/s0/list_files/test.set.2.list:
--------------------------------------------------------------------------------
 1 | A01F0063
 2 | A01M0056
 3 | A06F0135
 4 | A02M0012
 5 | A06M0064
 6 | A01M0141
 7 | A01F0034
 8 | A03M0016
 9 | A03F0072
10 | A01F0001
11 | 
12 | 


--------------------------------------------------------------------------------
/examples/csj/s0/list_files/test.set.3.list:
--------------------------------------------------------------------------------
 1 | S00F0066
 2 | S00M0213
 3 | S00M0070
 4 | S00M0008
 5 | S01F0105
 6 | S00F0148
 7 | S00F0019
 8 | S00M0112
 9 | S00F0152
10 | S00M0079
11 | 
12 | 


--------------------------------------------------------------------------------
/examples/csj/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/csj/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/csj/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/gigaspeech/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/gigaspeech/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/gigaspeech/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/hkust/s0/README.md:
--------------------------------------------------------------------------------
 1 | # Performance Record
 2 | 
 3 | ## Conformer Result (Old IO)
 4 | 
 5 | * Feature info: using fbank feature, with cmvn, with speed perturb.
 6 | * Training info: lr 0.002, batch size 16, 1 machines, 1*4 = 4 gpu, acc_grad 4, 240 epochs, dither 0.1
 7 | * Decoding info: ctc_weight 0.5, average_num 30
 8 | 
 9 | | decoding mode            |       |
10 | |--------------------------|-------|
11 | | attention decoder        | 21.9  |
12 | | ctc greedy search        | 21.15 |
13 | | ctc prefix beam search   | 21.13 |
14 | | attention rescoring      | 20.47 |
15 | 
16 | ## Conformer Result (New IO)
17 | 
18 | * Feature info: using fbank feature, with cmvn, with speed perturb.
19 | * Training info: lr 0.002, batch size 16, 1 machines, 1*4 = 4 gpu, acc_grad 4, 133 epochs, dither 0.1
20 | * Decoding info: ctc_weight 0.5, average_num 30
21 | 
22 | | decoding mode            |       |
23 | |--------------------------|-------|
24 | | attention decoder        | 21.42 |
25 | | ctc greedy search        | 21.16 |
26 | | ctc prefix beam search   | 21.18 |
27 | | attention rescoring      | 20.42 |
28 | 


--------------------------------------------------------------------------------
/examples/hkust/s0/conf/train_960_unigram5000.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/examples/hkust/s0/conf/train_960_unigram5000.model


--------------------------------------------------------------------------------
/examples/hkust/s0/local/hkust_normalize.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use warnings; #sed replacement for -w perl parameter
 3 | # Copyright Chao Weng
 4 | 
 5 | # normalizations for hkust trascript
 6 | # see the docs/trans-guidelines.pdf for details
 7 | 
 8 | while (<STDIN>) {
 9 |   @A = split(" ", $_);
10 |   print "$A[0] ";
11 |   for ($n = 1; $n < @A; $n++) {
12 |     $a = $A[$n];
13 |     if (($a eq "{breath}")||($a eq "{cough}")||($a eq "{sneeze}")
14 |        || ($a eq "{lipsmack}")) {next;}
15 |     if (($a eq "{laugh}")) {next;}
16 |     if (($a eq "<noise>")) {next;}
17 |     $tmp = $a;
18 |     if ($tmp =~ /[^.,?+-]{0,}[.,?+-]+/) { $tmp =~ s:([^.,?+-]{0,})[.,?+-]+:$1:g; }
19 |     if ($tmp =~ /\~[A-Z]/) { $tmp =~ s:\~([A-Z]):$1:; }
20 |     if ($tmp =~ /%\S/) { $tmp =~ s:%(\S):$1:; }
21 |     if ($tmp =~ /[a-zA-Z]/) {$tmp=uc($tmp);}
22 |     print "$tmp ";
23 |   }
24 |   print "\n";
25 | }
26 | 


--------------------------------------------------------------------------------
/examples/hkust/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/hkust/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/hkust/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/librispeech/rnnt/README.md:
--------------------------------------------------------------------------------
 1 | # Performance Record
 2 | 
 3 | ## Conformer Bidecoder Transducer Result
 4 | 
 5 | * Feature info: using fbank feature, dither, cmvn, online speed perturb
 6 | * Training info: lr 0.001, dynamic batch with max_frames_in_batch 4000, 8 gpu, acc_grad 1, 60 epochs
 7 | * Training weight info: transducer_weight 0.75,  ctc_weight 0.1, reverse_weight 0.30, average_num 10
 8 | * Predictor type: lstm
 9 | 
10 | | decoding mode         | dev_clean  | dev_other | test_clean | test_other |
11 | |-----------------------|------------|-----------|------------|------------|
12 | | rnnt_greedy_search    | 3.42%      | 8.99%     |    3.56%   |   9.15%    |
13 | | rnnt_beam_search      | 3.35%      | 8.77%     |    3.45%   |   8.78%    |
14 | | rnnt_beam_att_rescore | 3.25%      | 8.66%     |    3.41%   |   8.68%    |
15 | 
16 | Pretrained model: https://huggingface.co/yuekai/wenet-asr-librispeech-conformer-transducer-mtl/blob/main/exp/conformer_transducer/avg_10.pt
17 | 
18 | 


--------------------------------------------------------------------------------
/examples/librispeech/rnnt/local:
--------------------------------------------------------------------------------
1 | ../s0/local/


--------------------------------------------------------------------------------
/examples/librispeech/rnnt/path.sh:
--------------------------------------------------------------------------------
1 | ../s0/path.sh


--------------------------------------------------------------------------------
/examples/librispeech/rnnt/tools:
--------------------------------------------------------------------------------
1 | ../../../tools/


--------------------------------------------------------------------------------
/examples/librispeech/rnnt/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet/


--------------------------------------------------------------------------------
/examples/librispeech/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/librispeech/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/librispeech/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/multi_cn/s0/conf/train_960_unigram5000.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/examples/multi_cn/s0/conf/train_960_unigram5000.model


--------------------------------------------------------------------------------
/examples/multi_cn/s0/local/magicdata_badlist:
--------------------------------------------------------------------------------
1 | 16_4013_20170819121429.wav
2 | 18_1565_20170712000170.wav
3 | 


--------------------------------------------------------------------------------
/examples/multi_cn/s0/local/primewords_data_prep.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2019 Xingyu Na
 4 | # Apache 2.0
 5 | 
 6 | . ./path.sh || exit 1;
 7 | 
 8 | if [ $# != 2 ]; then
 9 |   echo "Usage: $0 <corpus-path> <data-path>"
10 |   echo " $0 /export/a05/xna/data/primewords data/primewords"
11 |   exit 1;
12 | fi
13 | 
14 | corpus=$1/primewords_md_2018_set1
15 | data=$2
16 | 
17 | if [ ! -d $corpus/audio_files ] || [ ! -f $corpus/set1_transcript.json ]; then
18 |   echo "Error: $0 requires complete corpus"
19 |   exit 1;
20 | fi
21 | 
22 | echo "**** Creating primewords data folder ****"
23 | 
24 | mkdir -p $data/train
25 | 
26 | # find wav audio file for train
27 | 
28 | find $corpus -iname "*.wav" > $data/wav.flist
29 | n=`cat $data/wav.flist | wc -l`
30 | [ $n -ne 50384 ] && \
31 |   echo Warning: expected 50384 data files, found $n
32 | 
33 | echo "Filtering data using found wav list and provided transcript"
34 | local/primewords_parse_transcript.py $data/wav.flist $corpus/set1_transcript.json $data/train
35 | cat $data/train/transcripts.txt |\
36 |   awk '{if (NF > 1) print $0;}' > $data/train/text
37 | 
38 | for file in wav.scp utt2spk text; do
39 |   sort $data/train/$file -o $data/train/$file
40 | done
41 | tools/utt2spk_to_spk2utt.pl $data/train/utt2spk > $data/train/spk2utt
42 | 
43 | # rm -r $data/wav.flist
44 | 
45 | tools/validate_data_dir.sh --no-feats $data/train || exit 1;
46 | 


--------------------------------------------------------------------------------
/examples/multi_cn/s0/local/primewords_parse_transcript.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | import sys
 4 | import json
 5 | 
 6 | 
 7 | def main(argv):
 8 |     fp = open(argv[1], encoding="utf-8")
 9 |     js = json.load(fp)
10 |     fp.close()
11 |     metas = {}
12 |     for ele in js:
13 |         fname = ele['file']
14 |         metas[fname] = ele
15 | 
16 |     fWavScp = open(os.path.join(argv[2], 'wav.scp'), 'w')
17 |     fText = open(os.path.join(argv[2], 'transcripts.txt'),
18 |                  'w',
19 |                  encoding="utf-8")
20 |     fUtt2Spk = open(os.path.join(argv[2], 'utt2spk'), 'w')
21 |     for line in open(argv[0]):
22 |         fpath = line.strip('\r\n')
23 |         wname = os.path.basename(fpath)
24 |         meta = metas[wname]
25 |         spkid = 'P' + meta['user_id']
26 |         uttid = spkid + '-' + meta['id']
27 |         fWavScp.write(uttid + ' ' + fpath + '\n')
28 |         fText.write(uttid + ' ' + meta['text'] + '\n')
29 |         fUtt2Spk.write(uttid + ' ' + spkid + '\n')
30 |     fWavScp.close()
31 |     fText.close()
32 |     fUtt2Spk.close()
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     main(sys.argv[1:])
37 | 


--------------------------------------------------------------------------------
/examples/multi_cn/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/multi_cn/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/multi_cn/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/openasr2021/s0/local/make_absolute.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script replaces the command readlink -f (which is not portable).
 4 | # It turns a pathname into an absolute pathname, including following soft links.
 5 | target_file=$1
 6 | 
 7 | cd $(dirname $target_file)
 8 | target_file=$(basename $target_file)
 9 | 
10 | # Iterate down a (possible) chain of symlinks
11 | while [ -L "$target_file" ]; do
12 |     target_file=$(readlink $target_file)
13 |     cd $(dirname $target_file)
14 |     target_file=$(basename $target_file)
15 | done
16 | 
17 | # Compute the canonicalized name by finding the physical path
18 | # for the directory we're in and appending the target file.
19 | phys_dir=$(pwd -P)
20 | result=$phys_dir/$target_file
21 | echo $result
22 | 


--------------------------------------------------------------------------------
/examples/openasr2021/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/openasr2021/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/openasr2021/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet/


--------------------------------------------------------------------------------
/examples/swbd/s0/README.md:
--------------------------------------------------------------------------------
 1 | # Performance Record
 2 | 
 3 | ## Conformer Result
 4 | 
 5 | * Feature info: dither + specaug + speed perturb
 6 | * Training info: lr 0.001, warmup_steps 25000, batch size 16, 1 gpu, acc_grad 4, 240 epochs
 7 | * Decoding info: average_num 10
 8 | 
 9 | |      decoding mode     |   eval2000 (wer) |
10 | |:----------------------:|:----------------:|
11 | |   ctc_greedy_search    |       32.39%     |
12 | | ctc_prefix_beam_search |       32.39%     |
13 | |         attention      |       31.28%     |
14 | |  attention_rescoring   |       31.36%     |


--------------------------------------------------------------------------------
/examples/swbd/s0/local/MSU_single_letter.txt:
--------------------------------------------------------------------------------
 1 | A ey
 2 | B b iy
 3 | C s iy
 4 | D d iy
 5 | E iy
 6 | F eh f
 7 | G jh iy
 8 | H ey ch
 9 | I ay
10 | J jh ey
11 | K k ey
12 | L eh l
13 | M eh m
14 | N eh n
15 | O ow
16 | P p iy
17 | Q k y uw
18 | R aa r
19 | S eh s
20 | T t iy
21 | U y uw
22 | V v iy
23 | W d ah b ax l y uw
24 | X eh k s
25 | Y w ay
26 | Z z iy
27 | 


--------------------------------------------------------------------------------
/examples/swbd/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/swbd/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools/


--------------------------------------------------------------------------------
/examples/swbd/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet/


--------------------------------------------------------------------------------
/examples/tedlium3/s0/README.md:
--------------------------------------------------------------------------------
 1 | # Performance Record
 2 | 
 3 | ## Conformer Result
 4 | 
 5 | * Feature info: using fbank feature, dither, cmvn, without speed perturb (not supported segments yet)
 6 | * Training info: lr 0.001, batch size 20, 8 gpu, acc_grad 1, 240 epochs, dither 0.1
 7 | * Decoding info: ctc_weight 0.5, average_num 10
 8 | 
 9 | 
10 | | decoding mode       | Dev WER | Test WER |
11 | |---------------------|---------|----------|
12 | | attention rescoring | 9.54%   | 8.66%    |


--------------------------------------------------------------------------------
/examples/tedlium3/s0/local/join_suffix.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #
 3 | # Copyright  2014  Nickolay V. Shmyrev
 4 | #            2016  Johns Hopkins University (author: Daniel Povey)
 5 | # Apache 2.0
 6 | 
 7 | import sys
 8 | 
 9 | # This script joins together pairs of split-up words like "you 're" -> "you're".
10 | # The TEDLIUM transcripts are normalized in a way that's not traditional for
11 | # speech recognition.
12 | 
13 | prev_line = ""
14 | for line in sys.stdin:
15 |     if line == prev_line:
16 |         continue
17 |     items = line.split()
18 |     new_items = []
19 |     i = 0
20 |     while i < len(items):
21 |         if i < len(items) - 1 and items[i + 1][0] == "'":
22 |             new_items.append(items[i] + items[i + 1])
23 |             i = i + 1
24 |         else:
25 |             new_items.append(items[i])
26 |         i = i + 1
27 |     print(" ".join(new_items))
28 |     prev_line = line
29 | 


--------------------------------------------------------------------------------
/examples/tedlium3/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/tedlium3/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools/


--------------------------------------------------------------------------------
/examples/tedlium3/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet/


--------------------------------------------------------------------------------
/examples/timit/s0/README.md:
--------------------------------------------------------------------------------
 1 | # Performance Record
 2 | 
 3 | ## Conformer Result
 4 | 
 5 | * Feature info: dither + specaug + speed perturb
 6 | * Training info: lr 0.002, warmup_steps 5000 batch size 16, 1 gpu, acc_grad 4, 120 epochs
 7 | * Decoding info: average_num 20
 8 | * trans_type: phn
 9 | 
10 | 
11 | |     decoding mode      | test (wer) |
12 | | :--------------------: | :---------: |
13 | |   ctc_greedy_search    |   16.70%    |
14 | | ctc_prefix_beam_search |   16.60%    |
15 | |       attention        |   22.37%    |
16 | |  attention_rescoring   |   16.60%    |
17 | 
18 | ## transformer Result
19 | 
20 | * Feature info: dither + specaug + speed perturb
21 | * Training info: lr 0.002, warmup_steps 5000 batch size 16, 1 gpu, acc_grad 4, 120 epochs
22 | * Decoding info: average_num 20
23 | * trans_type: phn
24 | 
25 | 
26 | |     decoding mode      | test (wer) |
27 | | :--------------------: | :---------: |
28 | |   ctc_greedy_search    |   17.78%    |
29 | | ctc_prefix_beam_search |   17.46%    |
30 | |       attention        |   21.77%    |
31 | |  attention_rescoring   |   17.06%    |


--------------------------------------------------------------------------------
/examples/timit/s0/local/dev_spk.list:
--------------------------------------------------------------------------------
 1 | faks0
 2 | fdac1
 3 | fjem0
 4 | mgwt0
 5 | mjar0
 6 | mmdb1
 7 | mmdm2
 8 | mpdf0
 9 | fcmh0
10 | fkms0
11 | mbdg0
12 | mbwm0
13 | mcsh0
14 | fadg0
15 | fdms0
16 | fedw0
17 | mgjf0
18 | mglb0
19 | mrtk0
20 | mtaa0
21 | mtdt0
22 | mthc0
23 | mwjg0
24 | fnmr0
25 | frew0
26 | fsem0
27 | mbns0
28 | mmjr0
29 | mdls0
30 | mdlf0
31 | mdvc0
32 | mers0
33 | fmah0
34 | fdrw0
35 | mrcs0
36 | mrjm4
37 | fcal1
38 | mmwh0
39 | fjsj0
40 | majc0
41 | mjsw0
42 | mreb0
43 | fgjd0
44 | fjmg0
45 | mroa0
46 | mteb0
47 | mjfc0
48 | mrjr0
49 | fmml0
50 | mrws1
51 | 


--------------------------------------------------------------------------------
/examples/timit/s0/local/phones.60-48-39.map:
--------------------------------------------------------------------------------
 1 | aa  aa  aa
 2 | ae  ae  ae
 3 | ah  ah  ah
 4 | ao  ao  aa
 5 | aw  aw  aw
 6 | ax  ax  ah
 7 | ax-h  ax  ah
 8 | axr  er  er
 9 | ay  ay  ay
10 | b  b  b
11 | bcl  vcl  sil
12 | ch  ch  ch
13 | d  d  d
14 | dcl  vcl  sil
15 | dh  dh  dh
16 | dx  dx  dx
17 | eh  eh  eh
18 | el  el  l
19 | em  m  m
20 | en  en  n
21 | eng  ng  ng
22 | epi  epi  sil
23 | er  er  er
24 | ey  ey  ey
25 | f  f  f
26 | g  g  g
27 | gcl  vcl  sil
28 | h#  sil  sil
29 | hh  hh  hh
30 | hv  hh  hh
31 | ih  ih  ih
32 | ix  ix  ih
33 | iy  iy  iy
34 | jh  jh  jh
35 | k  k  k
36 | kcl  cl  sil
37 | l  l  l
38 | m  m  m
39 | n  n  n
40 | ng  ng  ng
41 | nx  n  n
42 | ow  ow  ow
43 | oy  oy  oy
44 | p  p  p
45 | pau  sil  sil
46 | pcl  cl  sil
47 | q
48 | r  r  r
49 | s  s  s
50 | sh  sh  sh
51 | t  t  t
52 | tcl  cl  sil
53 | th  th  th
54 | uh  uh  uh
55 | uw  uw  uw
56 | ux  uw  uw
57 | v  v  v
58 | w  w  w
59 | y  y  y
60 | z  z  z
61 | zh  zh  sh
62 | 


--------------------------------------------------------------------------------
/examples/timit/s0/local/sph2pipe_process.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import sys
 5 | import os
 6 | 
 7 | 
 8 | def sph2pipe_wav(in_wav, tmp_out_wav, out_wav):
 9 |     with open(in_wav, 'r', encoding='utf-8') as in_f:
10 |         with open(tmp_out_wav, 'w', encoding='utf-8') as tmp_out_f:
11 |             with open(out_wav, 'w', encoding='utf-8') as out_f:
12 |                 for line in in_f:
13 |                     _tmp = line.strip().split(' ')
14 |                     wav_out_path = _tmp[4]
15 |                     wav_out_path = wav_out_path.split('/')
16 |                     wav_out_path[-4] = wav_out_path[-4] + '_pipe'
17 |                     if not os.path.exists('/'.join(wav_out_path[:-1])):
18 |                         os.makedirs('/'.join(wav_out_path[:-1]))
19 |                     wav_out_path = '/'.join(wav_out_path)
20 |                     tmp_out_f.write(' '.join(_tmp[1:5]) + ' ' + wav_out_path +
21 |                                     '\n')
22 |                     out_f.write(_tmp[0] + ' ' + wav_out_path + '\n')
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     if len(sys.argv) != 4:
27 |         print('wrong input parameter')
28 |         raise NotImplementedError(len(sys.argv))
29 |     in_wav = sys.argv[1]
30 |     tmp_out_wav = sys.argv[2]
31 |     out_wav = sys.argv[3]
32 |     sph2pipe_wav(in_wav, tmp_out_wav, out_wav)
33 | 


--------------------------------------------------------------------------------
/examples/timit/s0/local/test_spk.list:
--------------------------------------------------------------------------------
 1 | mdab0
 2 | mwbt0
 3 | felc0
 4 | mtas1
 5 | mwew0
 6 | fpas0
 7 | mjmp0
 8 | mlnt0
 9 | fpkt0
10 | mlll0
11 | mtls0
12 | fjlm0
13 | mbpm0
14 | mklt0
15 | fnlp0
16 | mcmj0
17 | mjdh0
18 | fmgd0
19 | mgrt0
20 | mnjm0
21 | fdhc0
22 | mjln0
23 | mpam0
24 | fmld0
25 | 


--------------------------------------------------------------------------------
/examples/timit/s0/local/timit_format_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2013  (Author: Daniel Povey)
 4 | # Apache 2.0
 5 | 
 6 | # This script takes data prepared in a corpus-dependent way
 7 | # in data/local/, and converts it into the "canonical" form,
 8 | # in various subdirectories of data/, e.g. data/lang, data/train, etc.
 9 | 
10 | . ./path.sh || exit 1;
11 | 
12 | echo "Preparing train, dev and test data"
13 | srcdir=data/local/data
14 | 
15 | 
16 | for x in train dev test; do
17 |     mkdir -p data/$x
18 |     # cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1;
19 |     local/sph2pipe_process.py $srcdir/${x}_wav.scp data/${x}/tmp_wav.scp data/${x}/wav.scp || exit 1;
20 |     while read line
21 |     do
22 |       echo $line
23 |       $line
24 |     done < data/${x}/tmp_wav.scp
25 |     rm data/${x}/tmp_wav.scp
26 | 
27 |     cp $srcdir/$x.text data/$x/text || exit 1;
28 |     cp $srcdir/$x.spk2utt data/$x/spk2utt || exit 1;
29 |     cp $srcdir/$x.utt2spk data/$x/utt2spk || exit 1;
30 |     tools/filter_scp.pl data/$x/spk2utt $srcdir/$x.spk2gender > data/$x/spk2gender || exit 1;
31 |     [ -e $srcdir/${x}.stm ] && cp $srcdir/${x}.stm data/$x/stm
32 |     [ -e $srcdir/${x}.glm ] && cp $srcdir/${x}.glm data/$x/glm
33 |     # tools/validate_data_dir.sh --no-feats data/$x || exit 1
34 | done


--------------------------------------------------------------------------------
/examples/timit/s0/local/utt2spk_to_spk2utt.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # converts an utt2spk file to a spk2utt file.
18 | # Takes input from the stdin or from a file argument;
19 | # output goes to the standard out.
20 | 
21 | if ( @ARGV > 1 ) {
22 |     die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
23 | }
24 | 
25 | while(<>){
26 |     @A = split(" ", $_);
27 |     @A == 2 || die "Invalid line in utt2spk file: $_";
28 |     ($u,$s) = @A;
29 |     if(!$seen_spk{$s}) {
30 |         $seen_spk{$s} = 1;
31 |         push @spklist, $s;
32 |     }
33 |     push (@{$spk_hash{$s}}, "$u");
34 | }
35 | foreach $s (@spklist) {
36 |     $l = join(' ',@{$spk_hash{$s}});
37 |     print "$s $l\n";
38 | }
39 | 


--------------------------------------------------------------------------------
/examples/timit/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/timit/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/timit/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/vkw2021/s0/local/vkw_data_prep.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2021 Tencent Inc. (Author: Yougen Yuan).
 3 | # Apach 2.0
 4 | 
 5 | current_dir=$(pwd)
 6 | stage=0
 7 | stop_stage=0
 8 | . ./path.sh || exit 1;
 9 | 
10 | if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
11 |   cd $current_dir/data/
12 |   [ ! -z vkw_v1.1.zip ] && echo "wget vkw challenge data to this directory" && exit 0
13 |   [ ! -z vkw ] && unzip vkw_v1.1.zip
14 |   cd $current_dir
15 | fi
16 | 
17 | if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
18 |   x=train
19 |   [ ! -f data/${x}/text ] && echo "vkw trainset is missing, wget to this directory" && exit 0
20 | fi
21 | 
22 | echo "$0: vkw  data preparation succeeded"
23 | 


--------------------------------------------------------------------------------
/examples/vkw2021/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/vkw2021/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools/


--------------------------------------------------------------------------------
/examples/vkw2021/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet/


--------------------------------------------------------------------------------
/examples/wenetspeech/paraformer/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_BIN=${BUILD_DIR}/../fc_base/openfst-build/src
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_BIN}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/wenetspeech/paraformer/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/wenetspeech/paraformer/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/wenetspeech/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/wenetspeech/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools/


--------------------------------------------------------------------------------
/examples/wenetspeech/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet/


--------------------------------------------------------------------------------
/examples/wenetspeech/whisper/conf/ds_stage1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_micro_batch_size_per_gpu": 1,
 3 |   "gradient_accumulation_steps": 8,
 4 |   "steps_per_print": 100,
 5 |   "gradient_clipping": 5,
 6 |   "fp16": {
 7 |     "enabled": false,
 8 |     "auto_cast": false,
 9 |     "loss_scale": 0,
10 |     "initial_scale_power": 16,
11 |     "loss_scale_window": 1000,
12 |     "hysteresis": 2,
13 |     "consecutive_hysteresis": false,
14 |     "min_loss_scale": 1
15 |   },
16 |   "bf16": {
17 |    "enabled": true
18 |   },
19 |   "zero_force_ds_cpu_optimizer": false,
20 |   "zero_optimization": {
21 |     "stage": 1,
22 |     "offload_optimizer": {
23 |       "device": "none",
24 |       "pin_memory": true
25 |     },
26 |     "allgather_partitions": true,
27 |     "allgather_bucket_size": 5e8,
28 |     "overlap_comm": true,
29 |     "reduce_scatter": true,
30 |     "reduce_bucket_size": 5e8,
31 |     "contiguous_gradients" : true
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/examples/wenetspeech/whisper/local:
--------------------------------------------------------------------------------
1 | ../../aishell/whisper/local


--------------------------------------------------------------------------------
/examples/wenetspeech/whisper/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/wenetspeech/whisper/tools:
--------------------------------------------------------------------------------
1 | ../../../tools


--------------------------------------------------------------------------------
/examples/wenetspeech/whisper/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet


--------------------------------------------------------------------------------
/examples/wsj/s0/README.md:
--------------------------------------------------------------------------------
 1 | # Performance Record
 2 | 
 3 | ## Conformer Result
 4 | 
 5 | * Feature info: dither + specaug + speed perturb
 6 | * Training info: lr 0.002, warmup_steps 20000 batch size 16, 1 gpu, acc_grad 4, 120 epochs
 7 | * Decoding info: average_num 20
 8 | 
 9 | |      decoding mode     |   dev93 (cer) |  dev93 (wer)  |
10 | |:----------------------:|:-------------:|:-------------:|
11 | |   ctc_greedy_search    |     5.25%     |    13.16%     |
12 | | ctc_prefix_beam_search |     5.17%     |    13.10%     |
13 | |  attention_rescoring   |     5.11%     |    12.17%     |


--------------------------------------------------------------------------------
/examples/wsj/s0/local/flist2scp.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | # takes in a file list with lines like
19 | # /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
20 | # and outputs an scp in kaldi format with lines like
21 | # 4k0c030a /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
22 | # (the first thing is the utterance-id, which is the same as the basename of the file.
23 | 
24 | 
25 | while(<>){
26 |     m:^\S+/(\w+)\.[wW][vV]1$: || die "Bad line $_";
27 |     $id = $1;
28 |     $id =~ tr/A-Z/a-z/;  # Necessary because of weirdness on disk 13-16.1 (uppercase filenames)
29 |     print "$id $_";
30 | }
31 | 


--------------------------------------------------------------------------------
/examples/wsj/s0/local/wsj_format_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2012  Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 4 | #           2015  Guoguo Chen
 5 | # Apache 2.0
 6 | 
 7 | # This script takes data prepared in a corpus-dependent way
 8 | # in data/local/, and converts it into the "canonical" form,
 9 | # in various subdirectories of data/, e.g. data/lang, data/lang_test_ug,
10 | # data/train_si284, data/train_si84, etc.
11 | 
12 | # Don't bother doing train_si84 separately (although we have the file lists
13 | # in data/local/) because it's just the first 7138 utterances in train_si284.
14 | # We'll create train_si84 after doing the feature extraction.
15 | 
16 | echo "$0 $@"  # Print the command line for logging
17 | . ./tools/parse_options.sh || exit 1;
18 | 
19 | . ./path.sh || exit 1;
20 | 
21 | echo "Preparing train and test data"
22 | srcdir=data/local/data
23 | 
24 | for x in train_si284 test_eval92 test_dev93; do
25 |   mkdir -p data/$x
26 |   cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1;
27 |   cp $srcdir/$x.txt data/$x/text || exit 1;
28 | done
29 | 
30 | echo "Succeeded in formatting data."


--------------------------------------------------------------------------------
/examples/wsj/s0/local/wsj_gen_wav.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -eu
 4 | 
 5 | [ $# -ne 2 ] && echo "Script format error: $0 <data-dir> <dump-dir>" && exit 0
 6 | 
 7 | data_dir=$1
 8 | dump_dir=$2
 9 | 
10 | mkdir -p $dump_dir
11 | 
12 | num_utts=$(cat $data_dir/wav.scp | wc -l)
13 | echo "Orginal utterances (.wav + .wv1): $num_utts"
14 | 
15 | # cat $data_dir/wav.scp | grep "sph2pipe" | \
16 | #   awk -v dir=$dump_dir '{printf("%s -f wav %s %s/%s.wav\n", $2, $5, dir, $1)}' | bash
17 | 
18 | awk '{print $1,$5}' $data_dir/wav.scp > $data_dir/raw_wav.scp
19 | find $dump_dir -name "*.wav" | awk -F '/' '{printf("%s %s\n", $NF, $0)}' | \
20 |   sed 's:\.wav::' > $data_dir/wav.scp
21 | 
22 | num_utts=$(cat $data_dir/wav.scp | wc -l)
23 | echo "Wave utterances (.wav): $num_utts"
24 | 
25 | echo "$0: Generate wav => $dump_dir done"
26 | 


--------------------------------------------------------------------------------
/examples/wsj/s0/path.sh:
--------------------------------------------------------------------------------
1 | export WENET_DIR=$PWD/../../..
2 | export BUILD_DIR=${WENET_DIR}/runtime/libtorch/build
3 | export OPENFST_PREFIX_DIR=${BUILD_DIR}/../fc_base/openfst-subbuild/openfst-populate-prefix
4 | export PATH=$PWD:${BUILD_DIR}/bin:${BUILD_DIR}/kaldi:${OPENFST_PREFIX_DIR}/bin:$PATH
5 | 
6 | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
7 | export PYTHONIOENCODING=UTF-8
8 | export PYTHONPATH=../../../:$PYTHONPATH
9 | 


--------------------------------------------------------------------------------
/examples/wsj/s0/tools:
--------------------------------------------------------------------------------
1 | ../../../tools/


--------------------------------------------------------------------------------
/examples/wsj/s0/wenet:
--------------------------------------------------------------------------------
1 | ../../../wenet/


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Pillow
 2 | pyyaml>=5.1
 3 | sentencepiece
 4 | tensorboard
 5 | tensorboardX
 6 | textgrid
 7 | pytest
 8 | flake8==3.8.2
 9 | flake8-bugbear
10 | flake8-comprehensions
11 | flake8-executable
12 | flake8-pyi==20.5.0
13 | mccabe
14 | pycodestyle==2.6.0
15 | pyflakes==2.2.0
16 | clang-format==17.0.6
17 | cpplint==1.6.1
18 | torch>=2.1.2
19 | torchaudio>=2.1.2
20 | tqdm
21 | deepspeed>=0.14.0
22 | librosa
23 | openai-whisper==20231117
24 | pre-commit==3.5.0
25 | langid
26 | 


--------------------------------------------------------------------------------
/runtime/android/.gitignore:
--------------------------------------------------------------------------------
 1 | *.iml
 2 | .gradle
 3 | /local.properties
 4 | /.idea/caches
 5 | /.idea/libraries
 6 | /.idea/modules.xml
 7 | /.idea/workspace.xml
 8 | /.idea/navEditor.xml
 9 | /.idea/assetWizardSettings.xml
10 | .DS_Store
11 | /build
12 | /captures
13 | .externalNativeBuild
14 | .cxx
15 | local.properties
16 | 


--------------------------------------------------------------------------------
/runtime/android/app/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | /release
3 | 


--------------------------------------------------------------------------------
/runtime/android/app/proguard-rules.pro:
--------------------------------------------------------------------------------
 1 | # Add project specific ProGuard rules here.
 2 | # You can control the set of applied configuration files using the
 3 | # proguardFiles setting in build.gradle.
 4 | #
 5 | # For more details, see
 6 | #   http://developer.android.com/guide/developing/tools/proguard.html
 7 | 
 8 | # If your project uses WebView with JS, uncomment the following
 9 | # and specify the fully qualified class name to the JavaScript interface
10 | # class:
11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview {
12 | #   public *;
13 | #}
14 | 
15 | # Uncomment this to preserve the line number information for
16 | # debugging stack traces.
17 | #-keepattributes SourceFile,LineNumberTable
18 | 
19 | # If you keep the line number information, uncomment this to
20 | # hide the original source file name.
21 | #-renamesourcefileattribute SourceFile


--------------------------------------------------------------------------------
/runtime/android/app/src/androidTest/java/com/mobvoi/wenet/ExampleInstrumentedTest.java:
--------------------------------------------------------------------------------
 1 | package com.mobvoi.wenet;
 2 | 
 3 | import android.content.Context;
 4 | 
 5 | import androidx.test.platform.app.InstrumentationRegistry;
 6 | import androidx.test.ext.junit.runners.AndroidJUnit4;
 7 | 
 8 | import org.junit.Test;
 9 | import org.junit.runner.RunWith;
10 | 
11 | import static org.junit.Assert.*;
12 | 
13 | /**
14 |  * Instrumented test, which will execute on an Android device.
15 |  *
16 |  * @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
17 |  */
18 | @RunWith(AndroidJUnit4.class)
19 | public class ExampleInstrumentedTest {
20 |     @Test
21 |     public void useAppContext() {
22 |         // Context of the app under test.
23 |         Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext();
24 |         assertEquals("com.mobvoi.wenet", appContext.getPackageName());
25 |     }
26 | }


--------------------------------------------------------------------------------
/runtime/android/app/src/main/AndroidManifest.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <manifest xmlns:android="http://schemas.android.com/apk/res/android"
 3 |     xmlns:tools="http://schemas.android.com/tools"
 4 |     package="com.mobvoi.wenet">
 5 |     <uses-permission android:name="android.permission.RECORD_AUDIO" />
 6 |     <application
 7 |         android:allowBackup="true"
 8 |         android:icon="@mipmap/ic_launcher"
 9 |         android:label="@string/app_name"
10 |         android:roundIcon="@mipmap/ic_launcher_round"
11 |         android:supportsRtl="true"
12 |         tools:replace="android:theme"
13 |         android:theme="@style/Theme.Wenet">
14 |         <activity android:name=".MainActivity">
15 |             <intent-filter>
16 |                 <action android:name="android.intent.action.MAIN" />
17 | 
18 |                 <category android:name="android.intent.category.LAUNCHER" />
19 |             </intent-filter>
20 |         </activity>
21 |     </application>
22 | 
23 | </manifest>
24 | 


--------------------------------------------------------------------------------
/runtime/android/app/src/main/assets/README.md:
--------------------------------------------------------------------------------
1 | put final.zip and units.txt here.
2 | 


--------------------------------------------------------------------------------
/runtime/android/app/src/main/cpp/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.4.1)
 2 | set(TARGET wenet)
 3 | project(${TARGET} CXX)
 4 | set(CMAKE_CXX_STANDARD 14)
 5 | include(ExternalProject)
 6 | 
 7 | option(TORCH "whether to build with Torch" ON)
 8 | option(ONNX "whether to build with ONNX" OFF)
 9 | option(ITN "whether to use WeTextProcessing" ON)
10 | set(CMAKE_VERBOSE_MAKEFILE on)
11 | set(build_DIR ${CMAKE_SOURCE_DIR}/../../../build)
12 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
13 | string(REPLACE "-Wl,--exclude-libs,libgcc_real.a" "" CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
14 | 
15 | include(libtorch)
16 | include(openfst)
17 | include(wetextprocessing)
18 | 
19 | include_directories(
20 |   ${CMAKE_SOURCE_DIR}
21 |   ${CMAKE_SOURCE_DIR}/kaldi
22 | )
23 | 
24 | add_subdirectory(utils)
25 | add_subdirectory(frontend)
26 | add_subdirectory(post_processor)
27 | add_subdirectory(kaldi)  # kaldi: wfst based decoder
28 | add_subdirectory(decoder)
29 | add_dependencies(post_processor wetextprocessing)
30 | 
31 | link_libraries(frontend decoder android)
32 | add_library(${TARGET} SHARED wenet.cc)
33 | 
34 | add_executable(decoder_main bin/decoder_main.cc)
35 | target_link_libraries(decoder_main PUBLIC libc++_shared.so)
36 | 


--------------------------------------------------------------------------------
/runtime/android/app/src/main/cpp/bin:
--------------------------------------------------------------------------------
1 | ../../../../../core/bin


--------------------------------------------------------------------------------
/runtime/android/app/src/main/cpp/cmake:
--------------------------------------------------------------------------------
1 | ../../../../../core/cmake


--------------------------------------------------------------------------------
/runtime/android/app/src/main/cpp/decoder:
--------------------------------------------------------------------------------
1 | ../../../../../core/decoder


--------------------------------------------------------------------------------
/runtime/android/app/src/main/cpp/frontend:
--------------------------------------------------------------------------------
1 | ../../../../../core/frontend


--------------------------------------------------------------------------------
/runtime/android/app/src/main/cpp/kaldi:
--------------------------------------------------------------------------------
1 | ../../../../../core/kaldi


--------------------------------------------------------------------------------
/runtime/android/app/src/main/cpp/patch:
--------------------------------------------------------------------------------
1 | ../../../../../core/patch


--------------------------------------------------------------------------------
/runtime/android/app/src/main/cpp/post_processor:
--------------------------------------------------------------------------------
1 | ../../../../../core/post_processor


--------------------------------------------------------------------------------
/runtime/android/app/src/main/cpp/utils:
--------------------------------------------------------------------------------
1 | ../../../../../core/utils


--------------------------------------------------------------------------------
/runtime/android/app/src/main/java/com/mobvoi/wenet/Recognize.java:
--------------------------------------------------------------------------------
 1 | package com.mobvoi.wenet;
 2 | 
 3 | public class Recognize {
 4 | 
 5 |   static {
 6 |     System.loadLibrary("wenet");
 7 |   }
 8 | 
 9 |   public static native void init(String modelDir);
10 |   public static native void reset();
11 |   public static native void acceptWaveform(short[] waveform);
12 |   public static native void setInputFinished();
13 |   public static native boolean getFinished();
14 |   public static native void startDecode();
15 |   public static native String getResult();
16 | }
17 | 


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
3 |     <background android:drawable="@drawable/ic_launcher_background" />
4 |     <foreground android:drawable="@drawable/ic_launcher_foreground" />
5 | </adaptive-icon>


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
3 |     <background android:drawable="@drawable/ic_launcher_background" />
4 |     <foreground android:drawable="@drawable/ic_launcher_foreground" />
5 | </adaptive-icon>


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher.png


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.png


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher.png


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.png


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/values-night/themes.xml:
--------------------------------------------------------------------------------
 1 | <resources xmlns:tools="http://schemas.android.com/tools">
 2 |     <!-- Base application theme. -->
 3 |     <style name="Theme.Wenet" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
 4 |         <!-- Primary brand color. -->
 5 |         <item name="colorPrimary">@color/purple_200</item>
 6 |         <item name="colorPrimaryVariant">@color/purple_700</item>
 7 |         <item name="colorOnPrimary">@color/black</item>
 8 |         <!-- Secondary brand color. -->
 9 |         <item name="colorSecondary">@color/teal_200</item>
10 |         <item name="colorSecondaryVariant">@color/teal_200</item>
11 |         <item name="colorOnSecondary">@color/black</item>
12 |         <!-- Status bar color. -->
13 |         <item name="android:statusBarColor" tools:targetApi="l">?attr/colorPrimaryVariant</item>
14 |         <!-- Customize your theme here. -->
15 |     </style>
16 | </resources>


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/values/attrs.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <resources>
 3 |     <declare-styleable name="VoiceRect">
 4 |         <attr name="RectCount" format="integer" />
 5 |         <attr name="RectDownColor" format="reference|color" />
 6 |         <attr name="RectSpeed" format="integer" />
 7 |         <attr name="RectTopColor" format="reference|color" />
 8 |         <attr name="RectOffset" format="integer" />
 9 |     </declare-styleable>
10 |     <declare-styleable name="VoiceLine">
11 |         <attr name="amplitude_big" format="dimension|reference" />
12 |         <attr name="amplitude_small" format="dimension|reference" />
13 |         <attr name="lineColor" format="color|reference" />
14 |         <attr name="backColor" format="color|reference" />
15 |         <attr name="frequency" format="float" />
16 |     </declare-styleable>
17 | </resources>


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/values/colors.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <resources>
 3 |     <color name="purple_200">#FFBB86FC</color>
 4 |     <color name="purple_500">#FF6200EE</color>
 5 |     <color name="purple_700">#FF3700B3</color>
 6 |     <color name="teal_200">#FF03DAC5</color>
 7 |     <color name="teal_700">#FF018786</color>
 8 |     <color name="black">#FF000000</color>
 9 |     <color name="white">#FFFFFFFF</color>
10 | 
11 |     <color name="red">#f16d7a</color>
12 |     <color name="green">#b7d28d</color>
13 |     <color name="blue">#b8f1ed</color>
14 |     <color name="top_color">#b7d28d</color>
15 |     <color name="down_color">#b8f1ed</color>
16 | </resources>


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/values/strings.xml:
--------------------------------------------------------------------------------
1 | <resources>
2 |     <string name="app_name">wenet</string>
3 | </resources>


--------------------------------------------------------------------------------
/runtime/android/app/src/main/res/values/themes.xml:
--------------------------------------------------------------------------------
 1 | <resources xmlns:tools="http://schemas.android.com/tools">
 2 |     <!-- Base application theme. -->
 3 |     <style name="Theme.Wenet" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
 4 |         <!-- Primary brand color. -->
 5 |         <item name="colorPrimary">@color/purple_500</item>
 6 |         <item name="colorPrimaryVariant">@color/purple_700</item>
 7 |         <item name="colorOnPrimary">@color/white</item>
 8 |         <!-- Secondary brand color. -->
 9 |         <item name="colorSecondary">@color/teal_200</item>
10 |         <item name="colorSecondaryVariant">@color/teal_700</item>
11 |         <item name="colorOnSecondary">@color/black</item>
12 |         <!-- Status bar color. -->
13 |         <item name="android:statusBarColor" tools:targetApi="l">?attr/colorPrimaryVariant</item>
14 |         <!-- Customize your theme here. -->
15 |     </style>
16 | </resources>


--------------------------------------------------------------------------------
/runtime/android/app/src/test/java/com/mobvoi/wenet/ExampleUnitTest.java:
--------------------------------------------------------------------------------
 1 | package com.mobvoi.wenet;
 2 | 
 3 | import org.junit.Test;
 4 | 
 5 | import static org.junit.Assert.*;
 6 | 
 7 | /**
 8 |  * Example local unit test, which will execute on the development machine (host).
 9 |  *
10 |  * @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
11 |  */
12 | public class ExampleUnitTest {
13 |     @Test
14 |     public void addition_isCorrect() {
15 |         assertEquals(4, 2 + 2);
16 |     }
17 | }


--------------------------------------------------------------------------------
/runtime/android/app/wenet.keystore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/app/wenet.keystore


--------------------------------------------------------------------------------
/runtime/android/build.gradle:
--------------------------------------------------------------------------------
 1 | buildscript {
 2 |     repositories {
 3 |         google()
 4 |         jcenter()
 5 |     }
 6 |     dependencies {
 7 |         classpath 'com.android.tools.build:gradle:7.4.2'
 8 |     }
 9 | }
10 | 
11 | allprojects {
12 |     repositories {
13 |         google()
14 |         jcenter()
15 |         maven { url 'https://jitpack.io' }
16 |     }
17 | }
18 | 
19 | task clean(type: Delete) {
20 |     delete rootProject.buildDir
21 | }


--------------------------------------------------------------------------------
/runtime/android/gradle.properties:
--------------------------------------------------------------------------------
 1 | # Project-wide Gradle settings.
 2 | # IDE (e.g. Android Studio) users:
 3 | # Gradle settings configured through the IDE *will override*
 4 | # any settings specified in this file.
 5 | # For more details on how to configure your build environment visit
 6 | # http://www.gradle.org/docs/current/userguide/build_environment.html
 7 | # Specifies the JVM arguments used for the daemon process.
 8 | # The setting is particularly useful for tweaking memory settings.
 9 | org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
10 | # When configured, Gradle will run in incubating parallel mode.
11 | # This option should only be used with decoupled projects. More details, visit
12 | # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
13 | # org.gradle.parallel=true
14 | # AndroidX package structure to make it clearer which packages are bundled with the
15 | # Android operating system, and which are packaged with your app"s APK
16 | # https://developer.android.com/topic/libraries/support-library/androidx-rn
17 | android.useAndroidX=true
18 | # Automatically convert third-party libraries to use AndroidX
19 | android.enableJetifier=true


--------------------------------------------------------------------------------
/runtime/android/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/android/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/runtime/android/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Tue Jan 12 17:33:20 CST 2021
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.5-bin.zip
7 | 


--------------------------------------------------------------------------------
/runtime/android/settings.gradle:
--------------------------------------------------------------------------------
1 | include ':app'
2 | rootProject.name = "wenet"


--------------------------------------------------------------------------------
/runtime/core/api/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | if(TORCH OR ONNX)
2 |  add_library(wenet_api SHARED wenet_api.cc)
3 |  target_link_libraries(wenet_api PUBLIC decoder)
4 | endif()
5 | 


--------------------------------------------------------------------------------
/runtime/core/api/README.md:
--------------------------------------------------------------------------------
 1 | # WeNet API
 2 | 
 3 | We refer [vosk](https://github.com/alphacep/vosk-api/blob/master/src/vosk_api.h)
 4 | for the interface design.
 5 | 
 6 | 
 7 | We are going to implement the following interfaces:
 8 | 
 9 | - [x] non-streaming recognition
10 | - [] streaming recognition
11 | - [] nbest
12 | - [] contextual biasing word
13 | - [] alignment
14 | - [] language support(post processor)
15 | - [] label check
16 | 


--------------------------------------------------------------------------------
/runtime/core/bin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(decoder_main decoder_main.cc)
 2 | target_link_libraries(decoder_main PUBLIC decoder)
 3 | if(IPEX)
 4 |   target_link_libraries(decoder_main PUBLIC "${TORCH_IPEX_LIBRARIES}")
 5 | endif()
 6 | 
 7 | add_executable(label_checker_main label_checker_main.cc)
 8 | target_link_libraries(label_checker_main PUBLIC decoder)
 9 | 
10 | if(TORCH)
11 |  add_executable(api_main api_main.cc)
12 |  target_link_libraries(api_main PUBLIC wenet_api)
13 | endif()
14 | 
15 | if(WEBSOCKET)
16 |   add_executable(websocket_client_main websocket_client_main.cc)
17 |   target_link_libraries(websocket_client_main PUBLIC websocket)
18 |   add_executable(websocket_server_main websocket_server_main.cc)
19 |   target_link_libraries(websocket_server_main PUBLIC websocket)
20 | endif()
21 | 
22 | if(GRPC)
23 |   add_executable(grpc_server_main grpc_server_main.cc)
24 |   target_link_libraries(grpc_server_main PUBLIC wenet_grpc)
25 |   add_executable(grpc_client_main grpc_client_main.cc)
26 |   target_link_libraries(grpc_client_main PUBLIC wenet_grpc)
27 | endif()
28 | 
29 | if(HTTP)
30 |   add_executable(http_client_main http_client_main.cc)
31 |   target_link_libraries(http_client_main PUBLIC http)
32 |   add_executable(http_server_main http_server_main.cc)
33 |   target_link_libraries(http_server_main PUBLIC http)
34 | endif()
35 | 


--------------------------------------------------------------------------------
/runtime/core/bin/http_server_main.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023 Ximalaya Speech Team (Xiang Lyu)
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //   http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "decoder/params.h"
16 | #include "http/http_server.h"
17 | #include "utils/log.h"
18 | 
19 | DEFINE_int32(port, 10086, "http listening port");
20 | 
21 | int main(int argc, char* argv[]) {
22 |   gflags::ParseCommandLineFlags(&argc, &argv, false);
23 |   google::InitGoogleLogging(argv[0]);
24 | 
25 |   auto decode_config = wenet::InitDecodeOptionsFromFlags();
26 |   auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
27 |   auto decode_resource = wenet::InitDecodeResourceFromFlags();
28 | 
29 |   wenet::HttpServer server(FLAGS_port, feature_config, decode_config,
30 |                            decode_resource);
31 |   LOG(INFO) << "Listening at port " << FLAGS_port;
32 |   server.Start();
33 |   return 0;
34 | }
35 | 


--------------------------------------------------------------------------------
/runtime/core/bin/websocket_server_main.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //   http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "decoder/params.h"
16 | #include "utils/log.h"
17 | #include "websocket/websocket_server.h"
18 | 
19 | DEFINE_int32(port, 10086, "websocket listening port");
20 | 
21 | int main(int argc, char* argv[]) {
22 |   gflags::ParseCommandLineFlags(&argc, &argv, false);
23 |   google::InitGoogleLogging(argv[0]);
24 | 
25 |   auto decode_config = wenet::InitDecodeOptionsFromFlags();
26 |   auto feature_config = wenet::InitFeaturePipelineConfigFromFlags();
27 |   auto decode_resource = wenet::InitDecodeResourceFromFlags();
28 | 
29 |   wenet::WebSocketServer server(FLAGS_port, feature_config, decode_config,
30 |                                 decode_resource);
31 |   LOG(INFO) << "Listening at port " << FLAGS_port;
32 |   server.Start();
33 |   return 0;
34 | }
35 | 


--------------------------------------------------------------------------------
/runtime/core/cmake/boost.cmake:
--------------------------------------------------------------------------------
 1 | FetchContent_Declare(boost
 2 |   URL      https://archives.boost.io/release/1.75.0/source/boost_1_75_0.tar.gz
 3 |   URL_HASH SHA256=aeb26f80e80945e82ee93e5939baebdca47b9dee80a07d3144be1e1a6a66dd6a
 4 | )
 5 | FetchContent_MakeAvailable(boost)
 6 | include_directories(${boost_SOURCE_DIR})
 7 | 
 8 | if(MSVC)
 9 |   add_definitions(-DBOOST_ALL_DYN_LINK -DBOOST_ALL_NO_LIB)
10 | endif()
11 | 


--------------------------------------------------------------------------------
/runtime/core/cmake/gflags.cmake:
--------------------------------------------------------------------------------
1 | FetchContent_Declare(gflags
2 |   URL      https://github.com/gflags/gflags/archive/v2.2.2.zip
3 |   URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5
4 | )
5 | FetchContent_MakeAvailable(gflags)
6 | include_directories(${gflags_BINARY_DIR}/include)


--------------------------------------------------------------------------------
/runtime/core/cmake/glog.cmake:
--------------------------------------------------------------------------------
1 | FetchContent_Declare(glog
2 |   URL      https://github.com/google/glog/archive/v0.4.0.zip
3 |   URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
4 | )
5 | FetchContent_MakeAvailable(glog)
6 | include_directories(${glog_SOURCE_DIR}/src ${glog_BINARY_DIR})


--------------------------------------------------------------------------------
/runtime/core/cmake/grpc.cmake:
--------------------------------------------------------------------------------
1 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/grpc)
2 | # third_party: grpc
3 | # On how to build grpc, you may refer to https://github.com/grpc/grpc
4 | # We recommend manually recursive clone the repo to avoid internet connection problem
5 | FetchContent_Declare(gRPC
6 |   GIT_REPOSITORY https://github.com/grpc/grpc
7 |   GIT_TAG        v1.37.1
8 | )
9 | FetchContent_MakeAvailable(gRPC)


--------------------------------------------------------------------------------
/runtime/core/cmake/gtest.cmake:
--------------------------------------------------------------------------------
1 | FetchContent_Declare(googletest
2 |   URL      https://github.com/google/googletest/archive/release-1.11.0.zip
3 |   URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
4 | )
5 | if(MSVC)
6 |   set(gtest_force_shared_crt ON CACHE BOOL "Always use msvcrt.dll" FORCE)
7 | endif()
8 | FetchContent_MakeAvailable(googletest)


--------------------------------------------------------------------------------
/runtime/core/cmake/pybind11.cmake:
--------------------------------------------------------------------------------
1 | FetchContent_Declare(pybind11
2 |   URL      https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.zip
3 |   URL_HASH SHA256=d1646e6f70d8a3acb2ddd85ce1ed543b5dd579c68b8fb8e9638282af20edead8
4 | )
5 | FetchContent_MakeAvailable(pybind11)
6 | 


--------------------------------------------------------------------------------
/runtime/core/decoder/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(decoder_srcs
 2 |   asr_decoder.cc
 3 |   asr_model.cc
 4 |   context_graph.cc
 5 |   ctc_prefix_beam_search.cc
 6 |   ctc_wfst_beam_search.cc
 7 |   ctc_endpoint.cc
 8 | )
 9 | 
10 | if(NOT TORCH AND NOT ONNX AND NOT XPU AND NOT IOS AND NOT BPU AND NOT OPENVINO)
11 |   message(FATAL_ERROR "Please build with TORCH or ONNX or OPENVINO or XPU or IOS or BPU!!!")
12 | endif()
13 | if(TORCH OR IOS)
14 |   list(APPEND decoder_srcs torch_asr_model.cc)
15 | endif()
16 | if(ONNX)
17 |   list(APPEND decoder_srcs onnx_asr_model.cc)
18 | endif()
19 | 
20 | add_library(decoder STATIC ${decoder_srcs})
21 | target_link_libraries(decoder PUBLIC kaldi-decoder frontend
22 |                       post_processor utils)
23 | 
24 | if(ANDROID)
25 |   target_link_libraries(decoder PUBLIC ${PYTORCH_LIBRARY} ${FBJNI_LIBRARY})
26 | else()
27 |   if(TORCH)
28 |     target_link_libraries(decoder PUBLIC ${TORCH_LIBRARIES})
29 |   endif()
30 |   if(ONNX)
31 |     target_link_libraries(decoder PUBLIC onnxruntime)
32 |   endif()
33 |   if(BPU)
34 |     target_link_libraries(decoder PUBLIC bpu_asr_model)
35 |   endif()
36 |   if(XPU)
37 |     target_link_libraries(decoder PUBLIC xpu_conformer)
38 |   endif()
39 |   if(OPENVINO)
40 |     target_link_libraries(decoder PUBLIC ov_asr_model)
41 |   endif()
42 | endif()
43 | 


--------------------------------------------------------------------------------
/runtime/core/frontend/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(frontend STATIC
2 |   feature_pipeline.cc
3 |   fft.cc
4 | )
5 | target_link_libraries(frontend PUBLIC utils)


--------------------------------------------------------------------------------
/runtime/core/frontend/fft.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2016 Network
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef FRONTEND_FFT_H_
16 | #define FRONTEND_FFT_H_
17 | 
18 | #ifndef M_PI
19 | #define M_PI 3.1415926535897932384626433832795
20 | #endif
21 | #ifndef M_2PI
22 | #define M_2PI 6.283185307179586476925286766559005
23 | #endif
24 | 
25 | namespace wenet {
26 | 
27 | // Fast Fourier Transform
28 | 
29 | void make_sintbl(int n, float* sintbl);
30 | 
31 | void make_bitrev(int n, int* bitrev);
32 | 
33 | int fft(const int* bitrev, const float* sintbl, float* x, float* y, int n);
34 | 
35 | }  // namespace wenet
36 | 
37 | #endif  // FRONTEND_FFT_H_
38 | 


--------------------------------------------------------------------------------
/runtime/core/grpc/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # compile wenet.proto
 2 | set(PROTO_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
 3 | add_custom_command(
 4 |   OUTPUT  ${PROTO_DIR}/wenet.pb.cc
 5 |           ${PROTO_DIR}/wenet.pb.h
 6 |           ${PROTO_DIR}/wenet.grpc.pb.cc
 7 |           ${PROTO_DIR}/wenet.grpc.pb.h
 8 |   COMMAND ${protobuf_BINARY_DIR}/protoc
 9 |   ARGS --grpc_out "${PROTO_DIR}"
10 |     --cpp_out "${PROTO_DIR}"
11 |     -I "${PROTO_DIR}"
12 |     --plugin=protoc-gen-grpc=${grpc_BINARY_DIR}/grpc_cpp_plugin
13 |     wenet.proto)
14 | 
15 | # grpc_server/client
16 | link_directories(${protobuf_BINARY_DIR}/lib)
17 | add_library(wenet_grpc STATIC
18 |   grpc_client.cc
19 |   grpc_server.cc
20 |   wenet.pb.cc
21 |   wenet.grpc.pb.cc
22 | )
23 | target_link_libraries(wenet_grpc PUBLIC grpc++ grpc++_reflection decoder)
24 | 


--------------------------------------------------------------------------------
/runtime/core/http/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(http STATIC
2 |   http_client.cc
3 |   http_server.cc
4 | )
5 | target_link_libraries(http PUBLIC decoder)
6 | 


--------------------------------------------------------------------------------
/runtime/core/kaldi/README.md:
--------------------------------------------------------------------------------
 1 | We use Kaldi decoder to implement TLG based language model integration,
 2 | so we copied related files to this directory.
 3 | The main changes are:
 4 | 
 5 | 1. To minimize the change, we use the same directories tree as Kaldi.
 6 | 
 7 | 2. We replace Kaldi log system with glog in the following way.
 8 | 
 9 | ``` c++
10 | #define KALDI_WARN \
11 |   google::LogMessage(__FILE__, __LINE__, google::GLOG_WARNING).stream()
12 | #define KALDI_ERR \
13 |   google::LogMessage(__FILE__, __LINE__, google::GLOG_ERROR).stream()
14 | #define KALDI_INFO \
15 |   google::LogMessage(__FILE__, __LINE__, google::GLOG_INFO).stream()
16 | #define KALDI_VLOG(v) VLOG(v)
17 | 
18 | #define KALDI_ASSERT(condition) CHECK(condition)
19 | ```
20 | 
21 | 3. We lint all the files to satisfy the lint in WeNet.
22 | 


--------------------------------------------------------------------------------
/runtime/core/kaldi/fstext/fstext-lib.h:
--------------------------------------------------------------------------------
 1 | // fstext/fstext-lib.h
 2 | 
 3 | // Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (author:
 4 | // Daniel Povey)
 5 | 
 6 | // See ../../COPYING for clarification regarding multiple authors
 7 | //
 8 | // Licensed under the Apache License, Version 2.0 (the "License");
 9 | // you may not use this file except in compliance with the License.
10 | // You may obtain a copy of the License at
11 | //
12 | //  http://www.apache.org/licenses/LICENSE-2.0
13 | //
14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 | // MERCHANTABLITY OR NON-INFRINGEMENT.
18 | // See the Apache 2 License for the specific language governing permissions and
19 | // limitations under the License.
20 | 
21 | #ifndef KALDI_FSTEXT_FSTEXT_LIB_H_
22 | #define KALDI_FSTEXT_FSTEXT_LIB_H_
23 | 
24 | #include "fst/fstlib.h"
25 | #include "fstext/determinize-lattice.h"
26 | #include "fstext/determinize-star.h"
27 | #include "fstext/fstext-utils.h"
28 | #include "fstext/kaldi-fst-io.h"
29 | #include "fstext/lattice-utils.h"
30 | #include "fstext/lattice-weight.h"
31 | #include "fstext/pre-determinize.h"
32 | #include "fstext/table-matcher.h"
33 | 
34 | #endif  // KALDI_FSTEXT_FSTEXT_LIB_H_
35 | 


--------------------------------------------------------------------------------
/runtime/core/kaldi/lat/CPPLINT.cfg:
--------------------------------------------------------------------------------
1 | # So many lint errors now, we just ignore it now.
2 | # We will try to fix it in the future.
3 | exclude_files=.*
4 | 


--------------------------------------------------------------------------------
/runtime/core/kaldi/util/kaldi-io-inl.h:
--------------------------------------------------------------------------------
 1 | // util/kaldi-io-inl.h
 2 | 
 3 | // Copyright 2009-2011 Microsoft Corporation
 4 | 
 5 | // See ../../COPYING for clarification regarding multiple authors
 6 | //
 7 | // Licensed under the Apache License, Version 2.0 (the "License");
 8 | // you may not use this file except in compliance with the License.
 9 | // You may obtain a copy of the License at
10 | 
11 | //  http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 | #ifndef KALDI_UTIL_KALDI_IO_INL_H_
20 | #define KALDI_UTIL_KALDI_IO_INL_H_
21 | 
22 | #include <string>
23 | 
24 | namespace kaldi {
25 | 
26 | bool Input::Open(const std::string& rxfilename, bool* binary) {
27 |   return OpenInternal(rxfilename, true, binary);
28 | }
29 | 
30 | bool Input::OpenTextMode(const std::string& rxfilename) {
31 |   return OpenInternal(rxfilename, false, NULL);
32 | }
33 | 
34 | bool Input::IsOpen() { return impl_ != NULL; }
35 | 
36 | bool Output::IsOpen() { return impl_ != NULL; }
37 | 
38 | }  // end namespace kaldi.
39 | 
40 | #endif  // KALDI_UTIL_KALDI_IO_INL_H_
41 | 


--------------------------------------------------------------------------------
/runtime/core/patch/CPPLINT.cfg:
--------------------------------------------------------------------------------
1 | exclude_files=.*
2 | 


--------------------------------------------------------------------------------
/runtime/core/patch/openfst/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | #-DHAVE_CONFIG_H -I./../include -fno-exceptions -funsigned-char -std=c++11 -MT symbol-table.lo -MD -MP -MF .deps/symbol-table.Tpo -c symbol-table.cc  -fno-common -DPIC -o .libs/symbol-table.o
 3 | 
 4 | include_directories(./include/)
 5 | install(DIRECTORY include/ DESTINATION include/
 6 |         FILES_MATCHING PATTERN "*.h")
 7 | 
 8 | add_subdirectory(lib)
 9 | 
10 | if(HAVE_SCRIPT)
11 |   add_subdirectory(script)
12 | endif(HAVE_SCRIPT)
13 | 
14 | if(HAVE_BIN)
15 |   add_subdirectory(bin)
16 | endif(HAVE_BIN)
17 | 
18 | add_subdirectory(extensions)
19 | 
20 | if(BUILD_TESTING)
21 |   enable_testing()
22 |   add_subdirectory(test)
23 | endif(BUILD_TESTING)
24 | 


--------------------------------------------------------------------------------
/runtime/core/post_processor/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(post_processor STATIC
2 |   post_processor.cc
3 | )
4 | target_link_libraries(post_processor PUBLIC utils wetext_processor wetext_utils)
5 | 
6 | 


--------------------------------------------------------------------------------
/runtime/core/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | link_libraries(gtest_main gmock)
 2 | 
 3 | add_executable(utils_test utils_test.cc)
 4 | target_link_libraries(utils_test PUBLIC utils)
 5 | add_test(UTILS_TEST utils_test)
 6 | 
 7 | add_executable(ctc_prefix_beam_search_test ctc_prefix_beam_search_test.cc)
 8 | target_link_libraries(ctc_prefix_beam_search_test PUBLIC decoder)
 9 | add_test(CTC_PREFIX_BEAM_SEARCH_TEST ctc_prefix_beam_search_test)
10 | 
11 | add_executable(post_processor_test post_processor_test.cc)
12 | target_link_libraries(post_processor_test PUBLIC post_processor)
13 | add_test(POST_PROCESSOR_TEST post_processor_test)
14 | 
15 | 
16 | add_executable(feature_pipeline_test feature_pipeline_test.cc)
17 | target_link_libraries(feature_pipeline_test PUBLIC frontend)
18 | add_test(FEATURE_PIPELINE_TEST feature_pipeline_test)


--------------------------------------------------------------------------------
/runtime/core/test/utils_test.cc:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Binbin Zhang (binbzha@qq.com)
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "utils/utils.h"
16 | 
17 | #include <vector>
18 | 
19 | #include "gmock/gmock.h"
20 | #include "gtest/gtest.h"
21 | 
22 | TEST(UtilsTest, TopKTest) {
23 |   using ::testing::ElementsAre;
24 |   using ::testing::FloatNear;
25 |   using ::testing::Pointwise;
26 |   std::vector<float> data = {1, 3, 5, 7, 9, 2, 4, 6, 8, 10};
27 |   std::vector<float> values;
28 |   std::vector<int32_t> indices;
29 |   wenet::TopK(data, 3, &values, &indices);
30 |   EXPECT_THAT(values, Pointwise(FloatNear(1e-8), {10, 9, 8}));
31 |   ASSERT_THAT(indices, ElementsAre(9, 4, 8));
32 | }
33 | 


--------------------------------------------------------------------------------
/runtime/core/toolchains/aarch64-linux-gnu.toolchain.cmake:
--------------------------------------------------------------------------------
1 | set(CMAKE_SYSTEM_NAME Linux)
2 | SET (CMAKE_SYSTEM_PROCESSOR aarch64)
3 | 
4 | set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
5 | set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)
6 | 


--------------------------------------------------------------------------------
/runtime/core/utils/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_library(utils STATIC
 2 |   string.cc
 3 |   utils.cc
 4 | )
 5 | 
 6 | if(NOT ANDROID)
 7 |   if(MSVC)
 8 |     target_link_libraries(utils PUBLIC fst)
 9 |   else()
10 |     target_link_libraries(utils PUBLIC fst dl)
11 |   endif()
12 | endif()


--------------------------------------------------------------------------------
/runtime/core/utils/file.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022  Binbin Zhang (binbzha@qq.com)
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef UTILS_FILE_H_
16 | #define UTILS_FILE_H_
17 | 
18 | #include <fstream>
19 | #include <string>
20 | 
21 | namespace wenet {
22 | 
23 | inline bool FileExists(const std::string& path) {
24 |   std::ifstream f(path.c_str());
25 |   return f.good();
26 | }
27 | 
28 | }  // namespace wenet
29 | 
30 | #endif  // UTILS_FILE_H_
31 | 


--------------------------------------------------------------------------------
/runtime/core/utils/flags.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //   http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef UTILS_FLAGS_H_
16 | #define UTILS_FLAGS_H_
17 | 
18 | // Because openfst is a dynamic library compiled with gflags/glog, we must use
19 | // the gflags/glog from openfst to avoid them linked both statically and
20 | // dynamically into the executable.
21 | #include "fst/flags.h"
22 | 
23 | #endif  // UTILS_FLAGS_H_
24 | 


--------------------------------------------------------------------------------
/runtime/core/utils/log.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //   http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef UTILS_LOG_H_
16 | #define UTILS_LOG_H_
17 | 
18 | // Because openfst is a dynamic library compiled with gflags/glog, we must use
19 | // the gflags/glog from openfst to avoid them linked both statically and
20 | // dynamically into the executable.
21 | #include "fst/log.h"
22 | 
23 | #endif  // UTILS_LOG_H_
24 | 


--------------------------------------------------------------------------------
/runtime/core/utils/timer.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2021 Mobvoi Inc (Binbin Zhang)
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //   http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef UTILS_TIMER_H_
16 | #define UTILS_TIMER_H_
17 | 
18 | #include <chrono>
19 | 
20 | namespace wenet {
21 | 
22 | class Timer {
23 |  public:
24 |   Timer() : time_start_(std::chrono::steady_clock::now()) {}
25 |   void Reset() { time_start_ = std::chrono::steady_clock::now(); }
26 |   // return int in milliseconds
27 |   int Elapsed() const {
28 |     auto time_now = std::chrono::steady_clock::now();
29 |     return std::chrono::duration_cast<std::chrono::milliseconds>(time_now -
30 |                                                                  time_start_)
31 |         .count();
32 |   }
33 | 
34 |  private:
35 |   std::chrono::time_point<std::chrono::steady_clock> time_start_;
36 | };
37 | }  // namespace wenet
38 | 
39 | #endif  // UTILS_TIMER_H_
40 | 


--------------------------------------------------------------------------------
/runtime/core/utils/utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //   http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef UTILS_UTILS_H_
16 | #define UTILS_UTILS_H_
17 | 
18 | #include <cstdint>
19 | #include <limits>
20 | #include <vector>
21 | 
22 | namespace wenet {
23 | 
24 | #define WENET_DISALLOW_COPY_AND_ASSIGN(Type) \
25 |   Type(const Type&) = delete;                \
26 |   Type& operator=(const Type&) = delete;
27 | 
28 | const float kFloatMax = std::numeric_limits<float>::max();
29 | // kSpaceSymbol in UTF-8 is: ▁
30 | const char kSpaceSymbol[] = "\xe2\x96\x81";
31 | 
32 | // Return the sum of two probabilities in log scale
33 | float LogAdd(float x, float y);
34 | 
35 | template <typename T>
36 | void TopK(const std::vector<T>& data, int32_t k, std::vector<T>* values,
37 |           std::vector<int>* indices);
38 | 
39 | }  // namespace wenet
40 | 
41 | #endif  // UTILS_UTILS_H_
42 | 


--------------------------------------------------------------------------------
/runtime/core/websocket/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(websocket STATIC
2 |   websocket_client.cc
3 |   websocket_server.cc
4 | )
5 | target_link_libraries(websocket PUBLIC decoder)
6 | 


--------------------------------------------------------------------------------
/runtime/gpu/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "tensorrt_fastertransformer/FasterTransformer"]
2 |     path = tensorrt_fastertransformer/FasterTransformer
3 |     url = https://github.com/NVIDIA/FasterTransformer.git
4 | 


--------------------------------------------------------------------------------
/runtime/gpu/Dockerfile/Dockerfile.client:
--------------------------------------------------------------------------------
1 | FROM  nvcr.io/nvidia/tritonserver:23.01-py3-sdk
2 | LABEL maintainer="NVIDIA"
3 | LABEL repository="tritonserver"
4 | 
5 | RUN apt-get update && apt-get install -y libsndfile1
6 | RUN pip3 install soundfile
7 | WORKDIR /workspace
8 | 


--------------------------------------------------------------------------------
/runtime/gpu/Dockerfile/Dockerfile.server:
--------------------------------------------------------------------------------
 1 | FROM nvcr.io/nvidia/tritonserver:23.01-py3
 2 | LABEL maintainer="NVIDIA"
 3 | LABEL repository="tritonserver"
 4 | 
 5 | RUN apt-get update && apt-get -y install swig && apt-get -y install python3-dev && apt-get install -y cmake
 6 | RUN pip3 install torch torchaudio
 7 | RUN pip3 install -v kaldifeat pyyaml onnx
 8 | 
 9 | WORKDIR /workspace
10 | RUN git clone https://github.com/Slyne/ctc_decoder.git && cd ctc_decoder/swig && bash setup.sh
11 | COPY ./scripts scripts
12 | 


--------------------------------------------------------------------------------
/runtime/gpu/Overview.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/Overview.JPG


--------------------------------------------------------------------------------
/runtime/gpu/client/test_wavs/long.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/client/test_wavs/long.wav


--------------------------------------------------------------------------------
/runtime/gpu/client/test_wavs/mid.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/client/test_wavs/mid.wav


--------------------------------------------------------------------------------
/runtime/gpu/cuda_decoders/model_repo_cuda_decoder/attention_rescoring/1/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_cuda_decoder/attention_rescoring/1/.gitkeep


--------------------------------------------------------------------------------
/runtime/gpu/cuda_decoders/model_repo_cuda_decoder/decoder/1/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_cuda_decoder/decoder/1/.gitkeep


--------------------------------------------------------------------------------
/runtime/gpu/cuda_decoders/model_repo_cuda_decoder/encoder/1/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_cuda_decoder/encoder/1/.gitkeep


--------------------------------------------------------------------------------
/runtime/gpu/cuda_decoders/model_repo_cuda_decoder/scoring/1/lang/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_cuda_decoder/scoring/1/lang/.gitkeep


--------------------------------------------------------------------------------
/runtime/gpu/cuda_decoders/model_repo_cuda_decoder/scoring/1/wfst_decoding_config.yaml:
--------------------------------------------------------------------------------
 1 | acoustic_scale: 10.0
 2 | n_input_per_chunk: 50
 3 | default_beam: 17.0
 4 | lattice_beam: 4.0
 5 | max_active: 7000
 6 | determinize_lattice: True
 7 | max_batch_size: 200
 8 | num_channels: 400
 9 | frame_shift_seconds: 0.04
10 | lm_scale: 5.0
11 | word_ins_penalty: 0.0
12 | 


--------------------------------------------------------------------------------
/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/encoder/1/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/encoder/1/.gitignore


--------------------------------------------------------------------------------
/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/encoder/1/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/encoder/1/.gitkeep


--------------------------------------------------------------------------------
/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/scoring/1/lang/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/scoring/1/lang/.gitkeep


--------------------------------------------------------------------------------
/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/scoring/1/wfst_decoding_config.yaml:
--------------------------------------------------------------------------------
 1 | acoustic_scale: 10.0
 2 | n_input_per_chunk: 50
 3 | default_beam: 17.0
 4 | lattice_beam: 4.0
 5 | max_active: 7000
 6 | determinize_lattice: True
 7 | max_batch_size: 200
 8 | num_channels: 400
 9 | frame_shift_seconds: 0.04
10 | lm_scale: 5.0
11 | word_ins_penalty: 0.0
12 | 


--------------------------------------------------------------------------------
/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/streaming_wenet/1/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/streaming_wenet/1/.gitignore


--------------------------------------------------------------------------------
/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/streaming_wenet/1/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/cuda_decoders/model_repo_stateful_cuda_decoder/streaming_wenet/1/.gitkeep


--------------------------------------------------------------------------------
/runtime/gpu/cuda_decoders/requirements.txt:
--------------------------------------------------------------------------------
1 | riva-asrlib-decoder==0.4.0
2 | onnxmltools
3 | 


--------------------------------------------------------------------------------
/runtime/gpu/model_repo/attention_rescoring/1/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/model_repo/attention_rescoring/1/.gitignore


--------------------------------------------------------------------------------
/runtime/gpu/model_repo/decoder/1/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/model_repo/decoder/1/.gitignore


--------------------------------------------------------------------------------
/runtime/gpu/model_repo/encoder/1/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/model_repo/encoder/1/.gitignore


--------------------------------------------------------------------------------
/runtime/gpu/model_repo_stateful/decoder/1/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/model_repo_stateful/decoder/1/.gitignore


--------------------------------------------------------------------------------
/runtime/gpu/model_repo_stateful/encoder/1/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/model_repo_stateful/encoder/1/.gitignore


--------------------------------------------------------------------------------
/runtime/gpu/model_repo_stateful/streaming_wenet/1/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/model_repo_stateful/streaming_wenet/1/.gitignore


--------------------------------------------------------------------------------
/runtime/gpu/scripts/convert_start_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | onnx_model_dir=/ws/onnx_model
19 | model_repo=/ws/model_repo
20 | 
21 | # Convert config.pbtxt in model_repo and move models
22 | python3 scripts/convert.py --config=$onnx_model_dir/train.yaml --vocab=$onnx_model_dir/words.txt \
23 |         --model_repo=$model_repo --onnx_model_dir=$onnx_model_dir
24 | 
25 | # Start server
26 | tritonserver --model-repository=${model_repo} --pinned-memory-pool-byte-size=1024000000 --cuda-memory-pool-byte-size=0:1024000000
27 | 


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt/LayerNormPlugin/Makefile:
--------------------------------------------------------------------------------
 1 | CUDA_PATH       = /usr/local/cuda
 2 | TRT_PATH        = /usr/lib/x86_64-linux-gnu
 3 | NVCC            = $(CUDA_PATH)/bin/nvcc
 4 | #SM              = 61
 5 |                 # 61 for GTX1070, 75 for T4,80 for A30
 6 | GENCODE         = -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86
 7 | CUFLAG          = -w -std=c++14 -O3 -UDEBUG -Xcompiler -fPIC $(GENCODE)
 8 | CPPFLAG         = -w -std=c++14 -O3 -use_fast_math
 9 | SOFLAG          = $(CUFLAG) -shared
10 | INCLUDE         = -I. -I$(CUDA_PATH)/include
11 | LDFLAG          = -L$(CUDA_PATH)/lib64 -lcudart -lcublas -lcublasLt -L$(TRT_PATH)/lib -lnvinfer
12 | 
13 | SRC_CU          = $(shell find ./ -name '*.cu')
14 | 
15 | all: LayerNorm.so
16 | 
17 | %.o: %.cu
18 | 	$(NVCC) $(CUFLAG) $(INCLUDE) -o $@ -c $<
19 | 
20 | LayerNorm.so: $(SRC_CU:.cu=.o)
21 | 	$(NVCC) $(SOFLAG) $(LDFLAG) -o $@ $^
22 | 
23 | .PHONY: clean
24 | clean:
25 | 	rm -rf ./*.so ./*.o ./*.d ./*.trt
26 | 
27 | .PHONY: test
28 | test:
29 | 	clear
30 | 	python testLayerNormPlugin.py
31 | 
32 | 


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt/README.md:
--------------------------------------------------------------------------------
 1 | ### Using Tensorrt for Triton ASR Server
 2 | 
 3 | ```sh
 4 | # using docker image runtime/gpu/Dockerfile/Dockerfile.server
 5 | docker pull soar97/triton-wenet:22.12
 6 | docker run -it --rm --name "wenet_trt_test" --gpus all --shm-size 1g --net host soar97/triton-wenet:22.12
 7 | # inside the docker container
 8 | git clone https://github.com/wenet-e2e/wenet.git
 9 | cd wenet/runtime/gpu/tensorrt
10 | pip3 install nvidia-pyindex
11 | # Use pip3 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple if you encounter network issue
12 | pip3 install -r requirements.txt
13 | 
14 | bash run_streaming_small_model.sh
15 | ```
16 | 
17 | #### Performance of Small u2pp Model for Streaming ASR
18 | 
19 | Benchmark(small u2pp onnx) based on Aishell1 test set with server-A10 (16vCPU 60GB Memory)/client(4vCPU 16GB Memory), the total audio duration is 36108.919 seconds.
20 | 
21 | (Note: using non-simulate-streaming mode)
22 | |concurrent-tasks | processing time(s) |
23 | |----------|--------------------|
24 | | 20 (onnx fp16)                | 123.796 |
25 | | 40 (onnx fp16)                | 84.557  |
26 | | 60 (onnx fp16)                | 73.232  |
27 | | 80 (onnx fp16)                | 66.862  |
28 | | 20 (trt fp16+layernorm plugin)| 90.582  |
29 | | 40 (trt fp16+layernorm plugin)| 75.411  |
30 | | 60 (trt fp16+layernorm plugin)| 69.602  |
31 | | 80 (trt fp16+layernorm plugin)| 65.603  |


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/1/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/1/.gitignore


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/1/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt/model_repo_stateful_trt/decoder/1/.gitkeep


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt/model_repo_stateful_trt/encoder/1/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt/model_repo_stateful_trt/encoder/1/.gitignore


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt/model_repo_stateful_trt/encoder/1/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt/model_repo_stateful_trt/encoder/1/.gitkeep


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt/model_repo_stateful_trt/streaming_wenet/1/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt/model_repo_stateful_trt/streaming_wenet/1/.gitignore


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt/model_repo_stateful_trt/streaming_wenet/1/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt/model_repo_stateful_trt/streaming_wenet/1/.gitkeep


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt/requirements.txt:
--------------------------------------------------------------------------------
1 | nvidia-pyindex
2 | tensorrt==8.5.1.7
3 | onnx
4 | onnxruntime-gpu
5 | onnx_graphsurgeon>=0.3.21 --index-url https://pypi.ngc.nvidia.com
6 | polygraphy
7 | cuda-python
8 | onnxmltools
9 | 


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt_fastertransformer/decoder_plugin.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt_fastertransformer/decoder_plugin.JPG


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt_fastertransformer/encoder_plugin.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt_fastertransformer/encoder_plugin.JPG


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/decoder/1/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/decoder/1/.gitkeep


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/encoder/1/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/tensorrt_fastertransformer/model_repo_ft/encoder/1/.gitkeep


--------------------------------------------------------------------------------
/runtime/gpu/tensorrt_fastertransformer/requirements.txt:
--------------------------------------------------------------------------------
1 | onnx
2 | nvidia-pyindex
3 | onnx-graphsurgeon
4 | cuda-python
5 | onnxruntime-gpu
6 | onnxmltools
7 | 


--------------------------------------------------------------------------------
/runtime/gpu/test.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/gpu/test.gif


--------------------------------------------------------------------------------
/runtime/horizonbpu/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | fc_base/
3 | wheels*
4 | 


--------------------------------------------------------------------------------
/runtime/horizonbpu/api:
--------------------------------------------------------------------------------
1 | ../core/api


--------------------------------------------------------------------------------
/runtime/horizonbpu/bin:
--------------------------------------------------------------------------------
1 | ../core/bin


--------------------------------------------------------------------------------
/runtime/horizonbpu/bpu/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | message("cmake build type is ${CMAKE_BUILD_TYPE} .")
 2 | 
 3 | if(BPU)
 4 |   list(APPEND bpu_asr_model_srcs ./bpu_asr_model.cc)
 5 |   message(STATUS "Use src_files: [ ${bpu_asr_model_srcs} ] to compile bpu_asr_model .")
 6 | 
 7 |   # compile bpu_asr_model
 8 |   include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../)
 9 |   add_library(bpu_asr_model STATIC ${bpu_asr_model_srcs})
10 |   target_link_libraries(bpu_asr_model PUBLIC easy_dnn dnn)
11 | endif()
12 | 


--------------------------------------------------------------------------------
/runtime/horizonbpu/cmake:
--------------------------------------------------------------------------------
1 | ../core/cmake


--------------------------------------------------------------------------------
/runtime/horizonbpu/decoder:
--------------------------------------------------------------------------------
1 | ../core/decoder


--------------------------------------------------------------------------------
/runtime/horizonbpu/frontend:
--------------------------------------------------------------------------------
1 | ../core/frontend


--------------------------------------------------------------------------------
/runtime/horizonbpu/kaldi:
--------------------------------------------------------------------------------
1 | ../core/kaldi


--------------------------------------------------------------------------------
/runtime/horizonbpu/patch:
--------------------------------------------------------------------------------
1 | ../core/patch


--------------------------------------------------------------------------------
/runtime/horizonbpu/post_processor:
--------------------------------------------------------------------------------
1 | ../core/post_processor


--------------------------------------------------------------------------------
/runtime/horizonbpu/test:
--------------------------------------------------------------------------------
1 | ../core/test


--------------------------------------------------------------------------------
/runtime/horizonbpu/toolchains:
--------------------------------------------------------------------------------
1 | ../core/toolchains


--------------------------------------------------------------------------------
/runtime/horizonbpu/utils:
--------------------------------------------------------------------------------
1 | ../core/utils


--------------------------------------------------------------------------------
/runtime/horizonbpu/websocket:
--------------------------------------------------------------------------------
1 | ../core/websocket


--------------------------------------------------------------------------------
/runtime/ios/WenetDemo/WenetDemo.xcodeproj/project.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <Workspace
3 |    version = "1.0">
4 |    <FileRef
5 |       location = "self:">
6 |    </FileRef>
7 | </Workspace>
8 | 


--------------------------------------------------------------------------------
/runtime/ios/WenetDemo/WenetDemo.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3 | <plist version="1.0">
4 | <dict>
5 |     <key>IDEDidComputeMac32BitWarning</key>
6 |     <true/>
7 | </dict>
8 | </plist>
9 | 


--------------------------------------------------------------------------------
/runtime/ios/WenetDemo/WenetDemo/Assets.xcassets/AccentColor.colorset/Contents.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "colors" : [
 3 |     {
 4 |       "idiom" : "universal"
 5 |     }
 6 |   ],
 7 |   "info" : {
 8 |     "author" : "xcode",
 9 |     "version" : 1
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/runtime/ios/WenetDemo/WenetDemo/Assets.xcassets/AppIcon.appiconset/Contents.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "images" : [
 3 |     {
 4 |       "idiom" : "universal",
 5 |       "platform" : "ios",
 6 |       "size" : "1024x1024"
 7 |     }
 8 |   ],
 9 |   "info" : {
10 |     "author" : "xcode",
11 |     "version" : 1
12 |   }
13 | }
14 | 


--------------------------------------------------------------------------------
/runtime/ios/WenetDemo/WenetDemo/Assets.xcassets/Contents.json:
--------------------------------------------------------------------------------
1 | {
2 |   "info" : {
3 |     "author" : "xcode",
4 |     "version" : 1
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/runtime/ios/WenetDemo/WenetDemo/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 |     <key>UIApplicationSceneManifest</key>
 6 |     <dict>
 7 |         <key>UIApplicationSupportsMultipleScenes</key>
 8 |         <false/>
 9 |         <key>UISceneConfigurations</key>
10 |         <dict>
11 |             <key>UIWindowSceneSessionRoleApplication</key>
12 |             <array>
13 |                 <dict>
14 |                     <key>UISceneConfigurationName</key>
15 |                     <string>Default Configuration</string>
16 |                     <key>UISceneDelegateClassName</key>
17 |                     <string>$(PRODUCT_MODULE_NAME).SceneDelegate</string>
18 |                     <key>UISceneStoryboardFile</key>
19 |                     <string>Main</string>
20 |                 </dict>
21 |             </array>
22 |         </dict>
23 |     </dict>
24 |     <key>NSMicrophoneUsageDescription</key>
25 |     <string>Need microphone access for recording speech</string>
26 | </dict>
27 | </plist>
28 | 


--------------------------------------------------------------------------------
/runtime/ios/WenetDemo/WenetDemo/model/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/ios/WenetDemo/WenetDemo/model/.gitkeep


--------------------------------------------------------------------------------
/runtime/ios/WenetDemo/WenetDemo/wenet/WenetDemo-Bridging-Header.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Dan Ma (1067837450@qq.com)
 2 | //
 3 | //  Use this file to import your target's public headers
 4 | //  that you would like to expose to Swift.
 5 | //
 6 | // Licensed under the Apache License, Version 2.0 (the "License");
 7 | // you may not use this file except in compliance with the License.
 8 | // You may obtain a copy of the License at
 9 | //
10 | //     http://www.apache.org/licenses/LICENSE-2.0
11 | //
12 | // Unless required by applicable law or agreed to in writing, software
13 | // distributed under the License is distributed on an "AS IS" BASIS,
14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | // See the License for the specific language governing permissions and
16 | // limitations under the License.
17 | 
18 | #ifndef RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENETDEMO_BRIDGING_HEADER_H_
19 | #define RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENETDEMO_BRIDGING_HEADER_H_
20 | 
21 | #import "wenet.h"
22 | 
23 | #endif  // RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENETDEMO_BRIDGING_HEADER_H_
24 | 


--------------------------------------------------------------------------------
/runtime/ios/WenetDemo/WenetDemo/wenet/wenet.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2022 Dan Ma (1067837450@qq.com)
 2 | //
 3 | //  wenet.h
 4 | //  WenetDemo
 5 | //
 6 | // Licensed under the Apache License, Version 2.0 (the "License");
 7 | // you may not use this file except in compliance with the License.
 8 | // You may obtain a copy of the License at
 9 | //
10 | //     http://www.apache.org/licenses/LICENSE-2.0
11 | //
12 | // Unless required by applicable law or agreed to in writing, software
13 | // distributed under the License is distributed on an "AS IS" BASIS,
14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | // See the License for the specific language governing permissions and
16 | // limitations under the License.
17 | 
18 | #ifndef RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENET_H_
19 | #define RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENET_H_
20 | 
21 | #include <stdio.h>
22 | 
23 | #import <Foundation/Foundation.h>
24 | 
25 | @interface Wenet : NSObject
26 | 
27 | - (nullable instancetype)initWithModelPath:
28 | (NSString*)modelPath DictPath:(NSString*)dictPath;  // NOLINT
29 | 
30 | - (void)reset;
31 | 
32 | - (void)acceptWaveForm: (float*)pcm: (int)size;  // NOLINT
33 | 
34 | - (void)decode;
35 | 
36 | - (NSString*)get_result;  // NOLINT
37 | 
38 | @end
39 | 
40 | #endif  // RUNTIME_IOS_WENETDEMO_WENETDEMO_WENET_WENET_H_
41 | 


--------------------------------------------------------------------------------
/runtime/ios/build/Podfile:
--------------------------------------------------------------------------------
1 | platform :ios, '14.3'
2 | pod 'LibTorch', '~>1.11.0'
3 | 


--------------------------------------------------------------------------------
/runtime/ios/cmake:
--------------------------------------------------------------------------------
1 | ../core/cmake


--------------------------------------------------------------------------------
/runtime/ios/decoder:
--------------------------------------------------------------------------------
1 | ../core/decoder


--------------------------------------------------------------------------------
/runtime/ios/frontend:
--------------------------------------------------------------------------------
1 | ../core/frontend


--------------------------------------------------------------------------------
/runtime/ios/kaldi:
--------------------------------------------------------------------------------
1 | ../core/kaldi


--------------------------------------------------------------------------------
/runtime/ios/patch:
--------------------------------------------------------------------------------
1 | ../core/patch


--------------------------------------------------------------------------------
/runtime/ios/post_processor:
--------------------------------------------------------------------------------
1 | ../core/post_processor


--------------------------------------------------------------------------------
/runtime/ios/test:
--------------------------------------------------------------------------------
1 | ../core/test


--------------------------------------------------------------------------------
/runtime/ios/toolchains:
--------------------------------------------------------------------------------
1 | ../core/toolchains


--------------------------------------------------------------------------------
/runtime/ios/utils:
--------------------------------------------------------------------------------
1 | ../core/utils


--------------------------------------------------------------------------------
/runtime/ipex/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | fc_base/
3 | 


--------------------------------------------------------------------------------
/runtime/ipex/api:
--------------------------------------------------------------------------------
1 | ../core/api


--------------------------------------------------------------------------------
/runtime/ipex/bin:
--------------------------------------------------------------------------------
1 | ../core/bin


--------------------------------------------------------------------------------
/runtime/ipex/cmake:
--------------------------------------------------------------------------------
1 | ../core/cmake


--------------------------------------------------------------------------------
/runtime/ipex/decoder:
--------------------------------------------------------------------------------
1 | ../core/decoder


--------------------------------------------------------------------------------
/runtime/ipex/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | RUN apt-get update && apt-get install -y git cmake wget build-essential python-is-python3 python3-pip google-perftools
 5 | RUN pip install torch==2.3.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cpu
 6 | RUN pip install intel_extension_for_pytorch==2.3.0 pyyaml six intel-openmp
 7 | RUN ln -s /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4 /usr/lib/x86_64-linux-gnu/libtcmalloc.so
 8 | 
 9 | RUN git clone https://github.com/wenet-e2e/wenet.git /home/wenet
10 | ENV OMP_NUM_THREADS=1
11 | ARG src=/home/wenet/runtime/ipex
12 | RUN cmake -B $src/build -S $src -DCMAKE_BUILD_TYPE=Release && cmake --build $src/build -j32
13 | ENV LD_LIBRARY_PATH=$src/fc_base/libtorch-src/lib/:$LD_LIBRARY_PATH
14 | WORKDIR /home/wenet/
15 | 


--------------------------------------------------------------------------------
/runtime/ipex/frontend:
--------------------------------------------------------------------------------
1 | ../core/frontend


--------------------------------------------------------------------------------
/runtime/ipex/grpc:
--------------------------------------------------------------------------------
1 | ../core/grpc


--------------------------------------------------------------------------------
/runtime/ipex/http:
--------------------------------------------------------------------------------
1 | ../core/http


--------------------------------------------------------------------------------
/runtime/ipex/kaldi:
--------------------------------------------------------------------------------
1 | ../core/kaldi


--------------------------------------------------------------------------------
/runtime/ipex/patch:
--------------------------------------------------------------------------------
1 | ../core/patch


--------------------------------------------------------------------------------
/runtime/ipex/post_processor:
--------------------------------------------------------------------------------
1 | ../core/post_processor


--------------------------------------------------------------------------------
/runtime/ipex/test:
--------------------------------------------------------------------------------
1 | ../core/test


--------------------------------------------------------------------------------
/runtime/ipex/utils:
--------------------------------------------------------------------------------
1 | ../core/utils


--------------------------------------------------------------------------------
/runtime/ipex/web:
--------------------------------------------------------------------------------
1 | ../libtorch/web


--------------------------------------------------------------------------------
/runtime/ipex/websocket:
--------------------------------------------------------------------------------
1 | ../core/websocket


--------------------------------------------------------------------------------
/runtime/kunlun/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | fc_base/
3 | 


--------------------------------------------------------------------------------
/runtime/kunlun/api:
--------------------------------------------------------------------------------
1 | ../core/api


--------------------------------------------------------------------------------
/runtime/kunlun/bin:
--------------------------------------------------------------------------------
1 | ../core/bin


--------------------------------------------------------------------------------
/runtime/kunlun/cmake:
--------------------------------------------------------------------------------
1 | ../core/cmake


--------------------------------------------------------------------------------
/runtime/kunlun/decoder:
--------------------------------------------------------------------------------
1 | ../core/decoder


--------------------------------------------------------------------------------
/runtime/kunlun/frontend:
--------------------------------------------------------------------------------
1 | ../core/frontend


--------------------------------------------------------------------------------
/runtime/kunlun/grpc:
--------------------------------------------------------------------------------
1 | ../core/grpc


--------------------------------------------------------------------------------
/runtime/kunlun/kaldi:
--------------------------------------------------------------------------------
1 | ../core/kaldi


--------------------------------------------------------------------------------
/runtime/kunlun/patch:
--------------------------------------------------------------------------------
1 | ../core/patch


--------------------------------------------------------------------------------
/runtime/kunlun/post_processor:
--------------------------------------------------------------------------------
1 | ../core/post_processor


--------------------------------------------------------------------------------
/runtime/kunlun/test:
--------------------------------------------------------------------------------
1 | ../core/test


--------------------------------------------------------------------------------
/runtime/kunlun/utils:
--------------------------------------------------------------------------------
1 | ../core/utils


--------------------------------------------------------------------------------
/runtime/kunlun/websocket:
--------------------------------------------------------------------------------
1 | ../core/websocket


--------------------------------------------------------------------------------
/runtime/kunlun/xpu/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | message("cmake build type is ${CMAKE_BUILD_TYPE} .")
 2 | 
 3 | if(XPU)
 4 |   list(APPEND xpu_conformer_srcs ./xpu_asr_model.cc)
 5 |   list(APPEND xpu_conformer_srcs ./xpu_conformer.cpp)
 6 |   list(APPEND xpu_conformer_srcs ./xpu_util.cpp)
 7 |   message(STATUS "Use src_files: [ ${xpu_conformer_srcs} ] to compile xpu_conformer.a .")
 8 | 
 9 |   # compile xpu_conformer.a
10 |   add_library(xpu_conformer STATIC ${xpu_conformer_srcs})
11 |   target_link_libraries(xpu_conformer PUBLIC xpuapi xpurt)
12 | endif()
13 | 
14 | set(CMAKE_VERBOSE_MAKEFILE OFF)
15 | 
16 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
17 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
18 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fpermissive")
19 | set(CMAKE_EXE_LINKER_FLAGS "-lpthread -lrt -lm -ldl")
20 | 
21 | set(SRC_FILES ./conformer_test.cpp ./xpu_conformer.cpp ./xpu_util.cpp)
22 | message(STATUS "Use src_files: [ ${SRC_FILES} ] to compile xpu_conformer_test.")
23 | 
24 | add_executable(xpu_conformer_test ${SRC_FILES})
25 | target_link_libraries(xpu_conformer_test -lxpuapi -lxpurt)
26 | 


--------------------------------------------------------------------------------
/runtime/libtorch/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | fc_base/
3 | 


--------------------------------------------------------------------------------
/runtime/libtorch/api:
--------------------------------------------------------------------------------
1 | ../core/api


--------------------------------------------------------------------------------
/runtime/libtorch/bin:
--------------------------------------------------------------------------------
1 | ../core/bin


--------------------------------------------------------------------------------
/runtime/libtorch/cmake:
--------------------------------------------------------------------------------
1 | ../core/cmake


--------------------------------------------------------------------------------
/runtime/libtorch/decoder:
--------------------------------------------------------------------------------
1 | ../core/decoder


--------------------------------------------------------------------------------
/runtime/libtorch/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:latest
 2 | MAINTAINER <zhendong.peng@qq.com>
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | RUN apt-get update && apt-get install -y git cmake wget build-essential
 5 | RUN git clone https://github.com/wenet-e2e/wenet.git /home/wenet
 6 | ARG model=20210618_u2pp_conformer_libtorch.tar.gz
 7 | RUN wget -P /home https://wenet-1256283475.cos.ap-shanghai.myqcloud.com/models/aishell2/$model
 8 | RUN tar -xzf /home/$model -C /home
 9 | ARG src=/home/wenet/runtime/libtorch
10 | RUN cmake -B $src/build -S $src -DCMAKE_BUILD_TYPE=Release -DGRAPH_TOOLS=ON && cmake --build $src/build
11 | 


--------------------------------------------------------------------------------
/runtime/libtorch/frontend:
--------------------------------------------------------------------------------
1 | ../core/frontend


--------------------------------------------------------------------------------
/runtime/libtorch/grpc:
--------------------------------------------------------------------------------
1 | ../core/grpc


--------------------------------------------------------------------------------
/runtime/libtorch/http:
--------------------------------------------------------------------------------
1 | ../core/http/


--------------------------------------------------------------------------------
/runtime/libtorch/kaldi:
--------------------------------------------------------------------------------
1 | ../core/kaldi


--------------------------------------------------------------------------------
/runtime/libtorch/patch:
--------------------------------------------------------------------------------
1 | ../core/patch


--------------------------------------------------------------------------------
/runtime/libtorch/post_processor:
--------------------------------------------------------------------------------
1 | ../core/post_processor


--------------------------------------------------------------------------------
/runtime/libtorch/test:
--------------------------------------------------------------------------------
1 | ../core/test


--------------------------------------------------------------------------------
/runtime/libtorch/utils:
--------------------------------------------------------------------------------
1 | ../core/utils


--------------------------------------------------------------------------------
/runtime/libtorch/web/app.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright 2021 Mobvoi Inc. All Rights Reserved.
 5 | # Author: zhendong.peng@mobvoi.com (Zhendong Peng)
 6 | 
 7 | import argparse
 8 | 
 9 | from flask import Flask, render_template
10 | 
11 | parser = argparse.ArgumentParser(description='training your network')
12 | parser.add_argument('--port', default=19999, type=int, help='port id')
13 | args = parser.parse_args()
14 | 
15 | app = Flask(__name__)
16 | 
17 | 
18 | @app.route('/')
19 | def index():
20 |     return render_template('index.html')
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     app.run(host='0.0.0.0', port=args.port, debug=True)
25 | 


--------------------------------------------------------------------------------
/runtime/libtorch/web/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/favicon.ico


--------------------------------------------------------------------------------
/runtime/libtorch/web/static/fonts/FontAwesome.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/fonts/FontAwesome.otf


--------------------------------------------------------------------------------
/runtime/libtorch/web/static/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/fonts/fontawesome-webfont.eot


--------------------------------------------------------------------------------
/runtime/libtorch/web/static/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/fonts/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/runtime/libtorch/web/static/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/fonts/fontawesome-webfont.woff


--------------------------------------------------------------------------------
/runtime/libtorch/web/static/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/fonts/fontawesome-webfont.woff2


--------------------------------------------------------------------------------
/runtime/libtorch/web/static/image/qrcode-enterprise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/image/qrcode-enterprise.png


--------------------------------------------------------------------------------
/runtime/libtorch/web/static/image/qrcode-official-account.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/image/qrcode-official-account.png


--------------------------------------------------------------------------------
/runtime/libtorch/web/static/image/voice-pic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/runtime/libtorch/web/static/image/voice-pic.png


--------------------------------------------------------------------------------
/runtime/libtorch/web/static/js/recorder/engine/pcm.js:
--------------------------------------------------------------------------------
1 | /*
2 | 录音
3 | https://github.com/xiangyuecn/Recorder
4 | src: engine/pcm.js
5 | */
6 | !function(){"use strict";Recorder.prototype.enc_pcm={stable:!0,testmsg:"pcm为未封装的原始音频数据，pcm数据文件无法直接播放；支持位数8位、16位（填在比特率里面），采样率取值无限制"},Recorder.prototype.pcm=function(e,t,r){var a=this.set,n=e.length,o=8==a.bitRate?8:16,c=new ArrayBuffer(n*(o/8)),s=new DataView(c),l=0;if(8==o)for(var p=0;p<n;p++,l++){var i=128+(e[p]>>8);s.setInt8(l,i,!0)}else for(p=0;p<n;p++,l+=2)s.setInt16(l,e[p],!0);t(new Blob([s.buffer],{type:"audio/pcm"}))},Recorder.pcm2wav=function(e,a,n){e.slice&&null!=e.type&&(e={blob:e});var o=e.sampleRate||16e3,c=e.bitRate||16;if(e.sampleRate&&e.bitRate||console.warn("pcm2wav必须提供sampleRate和bitRate"),Recorder.prototype.wav){var s=new FileReader;s.onloadend=function(){var e;if(8==c){var t=new Uint8Array(s.result);e=new Int16Array(t.length);for(var r=0;r<t.length;r++)e[r]=t[r]-128<<8}else e=new Int16Array(s.result);Recorder({type:"wav",sampleRate:o,bitRate:c}).mock(e,o).stop(function(e,t){a(e,t)},n)},s.readAsArrayBuffer(e.blob)}else n("pcm2wav必须先加载wav编码器wav.js")}}();


--------------------------------------------------------------------------------
/runtime/libtorch/web/static/js/recorder/engine/wav.js:
--------------------------------------------------------------------------------
1 | /*
2 | 录音
3 | https://github.com/xiangyuecn/Recorder
4 | src: engine/wav.js
5 | */
6 | !function(){"use strict";Recorder.prototype.enc_wav={stable:!0,testmsg:"支持位数8位、16位（填在比特率里面），采样率取值无限制"},Recorder.prototype.wav=function(t,e,n){var r=this.set,a=t.length,o=r.sampleRate,f=8==r.bitRate?8:16,i=a*(f/8),s=new ArrayBuffer(44+i),c=new DataView(s),u=0,v=function(t){for(var e=0;e<t.length;e++,u++)c.setUint8(u,t.charCodeAt(e))},w=function(t){c.setUint16(u,t,!0),u+=2},l=function(t){c.setUint32(u,t,!0),u+=4};if(v("RIFF"),l(36+i),v("WAVE"),v("fmt "),l(16),w(1),w(1),l(o),l(o*(f/8)),w(f/8),w(f),v("data"),l(i),8==f)for(var p=0;p<a;p++,u++){var d=128+(t[p]>>8);c.setInt8(u,d,!0)}else for(p=0;p<a;p++,u+=2)c.setInt16(u,t[p],!0);e(new Blob([c.buffer],{type:"audio/wav"}))}}();


--------------------------------------------------------------------------------
/runtime/libtorch/web/static/js/recorder/extensions/lib.fft.js:
--------------------------------------------------------------------------------
1 | /*
2 | 录音
3 | https://github.com/xiangyuecn/Recorder
4 | src: extensions/lib.fft.js
5 | */
6 | Recorder.LibFFT=function(r){"use strict";var s,v,d,l,F,b,g,m;return function(r){var o,t,a,f;for(s=Math.round(Math.log(r)/Math.log(2)),d=((v=1<<s)<<2)*Math.sqrt(2),l=[],F=[],b=[0],g=[0],m=[],o=0;o<v;o++){for(a=o,f=t=0;t!=s;t++)f<<=1,f|=1&a,a>>>=1;m[o]=f}var n,u=2*Math.PI/v;for(o=(v>>1)-1;0<o;o--)n=o*u,g[o]=Math.cos(n),b[o]=Math.sin(n)}(r),{transform:function(r){var o,t,a,f,n,u,e,h,M=1,i=s-1;for(o=0;o!=v;o++)l[o]=r[m[o]],F[o]=0;for(o=s;0!=o;o--){for(t=0;t!=M;t++)for(n=g[t<<i],u=b[t<<i],a=t;a<v;a+=M<<1)e=n*l[f=a+M]-u*F[f],h=n*F[f]+u*l[f],l[f]=l[a]-e,F[f]=F[a]-h,l[a]+=e,F[a]+=h;M<<=1,i--}t=v>>1;var c=new Float64Array(t);for(n=-(u=d),o=t;0!=o;o--)e=l[o],h=F[o],c[o-1]=n<e&&e<u&&n<h&&h<u?0:Math.round(e*e+h*h);return c},bufferSize:v}};


--------------------------------------------------------------------------------
/runtime/libtorch/websocket:
--------------------------------------------------------------------------------
1 | ../core/websocket


--------------------------------------------------------------------------------
/runtime/onnxruntime/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | fc_base/
3 | 


--------------------------------------------------------------------------------
/runtime/onnxruntime/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | ../libtorch/CMakeLists.txt


--------------------------------------------------------------------------------
/runtime/onnxruntime/README.md:
--------------------------------------------------------------------------------
 1 | # ONNX backend on WeNet
 2 | 
 3 | * Step 1. Export your experiment model to ONNX by https://github.com/wenet-e2e/wenet/blob/main/wenet/bin/export_onnx_cpu.py
 4 | 
 5 | ``` sh
 6 | exp=exp  # Change it to your experiment dir
 7 | onnx_dir=onnx
 8 | python -m wenet.bin.export_onnx_cpu \
 9 |   --config $exp/train.yaml \
10 |   --checkpoint $exp/final.pt \
11 |   --chunk_size 16 \
12 |   --output_dir $onnx_dir \
13 |   --num_decoding_left_chunks -1
14 | 
15 | # When it finishes, you can find `encoder.onnx`, `ctc.onnx`, and `decoder.onnx` in the $onnx_dir respectively.
16 | ```
17 | 
18 | * Step 2. Build. The build requires cmake 3.14 or above.
19 | 
20 | ``` sh
21 | mkdir build && cd build
22 | cmake -DONNX=ON -DTORCH=OFF -DWEBSOCKET=OFF -DGRPC=OFF ..
23 | cmake --build .
24 | ```
25 | 
26 | * Step 3. Testing, the RTF(real time factor) is shown in the console.
27 | 
28 | ``` sh
29 | export GLOG_logtostderr=1
30 | export GLOG_v=2
31 | wav_path=your_test_wav_path
32 | onnx_dir=your_model_dir
33 | units=units.txt  # Change it to your model units path
34 | # Make sure that the `chunk_size` and `num_left_chunks` variables are set to the corresponding values used when exporting the ONNX models.
35 | ./build/bin/decoder_main \
36 |     --chunk_size 16 \
37 |     --num_left_chunks -1 \
38 |     --wav_path $wav_path \
39 |     --onnx_dir $onnx_dir \
40 |     --unit_path $units 2>&1 | tee log.txt
41 | ```
42 | 


--------------------------------------------------------------------------------
/runtime/onnxruntime/api:
--------------------------------------------------------------------------------
1 | ../core/api


--------------------------------------------------------------------------------
/runtime/onnxruntime/bin:
--------------------------------------------------------------------------------
1 | ../core/bin


--------------------------------------------------------------------------------
/runtime/onnxruntime/cmake:
--------------------------------------------------------------------------------
1 | ../core/cmake


--------------------------------------------------------------------------------
/runtime/onnxruntime/decoder:
--------------------------------------------------------------------------------
1 | ../core/decoder


--------------------------------------------------------------------------------
/runtime/onnxruntime/frontend:
--------------------------------------------------------------------------------
1 | ../core/frontend


--------------------------------------------------------------------------------
/runtime/onnxruntime/grpc:
--------------------------------------------------------------------------------
1 | ../core/grpc


--------------------------------------------------------------------------------
/runtime/onnxruntime/kaldi:
--------------------------------------------------------------------------------
1 | ../core/kaldi


--------------------------------------------------------------------------------
/runtime/onnxruntime/patch:
--------------------------------------------------------------------------------
1 | ../core/patch


--------------------------------------------------------------------------------
/runtime/onnxruntime/post_processor:
--------------------------------------------------------------------------------
1 | ../core/post_processor


--------------------------------------------------------------------------------
/runtime/onnxruntime/test:
--------------------------------------------------------------------------------
1 | ../core/test


--------------------------------------------------------------------------------
/runtime/onnxruntime/utils:
--------------------------------------------------------------------------------
1 | ../core/utils


--------------------------------------------------------------------------------
/runtime/onnxruntime/websocket:
--------------------------------------------------------------------------------
1 | ../core/websocket


--------------------------------------------------------------------------------
/runtime/openvino/api:
--------------------------------------------------------------------------------
1 | ../core/api


--------------------------------------------------------------------------------
/runtime/openvino/bin:
--------------------------------------------------------------------------------
1 | ../core/bin


--------------------------------------------------------------------------------
/runtime/openvino/cmake:
--------------------------------------------------------------------------------
1 | ../core/cmake


--------------------------------------------------------------------------------
/runtime/openvino/decoder:
--------------------------------------------------------------------------------
1 | ../core/decoder


--------------------------------------------------------------------------------
/runtime/openvino/frontend:
--------------------------------------------------------------------------------
1 | ../core/frontend


--------------------------------------------------------------------------------
/runtime/openvino/kaldi:
--------------------------------------------------------------------------------
1 | ../core/kaldi


--------------------------------------------------------------------------------
/runtime/openvino/ov/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | message("cmake build type is ${CMAKE_BUILD_TYPE} .")
 2 | 
 3 | if(OPENVINO)
 4 |   list(APPEND ov_asr_model_srcs ./ov_asr_model.cc)
 5 |   message(STATUS "Use src_files: [ ${ov_asr_model_srcs} ] to compile ov_asr_model .")
 6 | 
 7 |   add_library(ov_asr_model STATIC ${ov_asr_model_srcs})
 8 |   include(openvino)
 9 |   set(OpenVINO_DIR ${openvino_SOURCE_DIR}/runtime/cmake)
10 |   set(TBB_DIR ${openvino_SOURCE_DIR}/runtime/3rdparty/tbb/cmake)
11 |   find_package(OpenVINO REQUIRED)
12 |   find_package(TBB REQUIRED)
13 |   if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
14 |       target_link_libraries(ov_asr_model PUBLIC openvino::runtime)
15 |     else()
16 |       target_link_libraries(ov_asr_model PUBLIC openvino openvino_intel_cpu_plugin)
17 |   endif()
18 | endif()


--------------------------------------------------------------------------------
/runtime/openvino/patch:
--------------------------------------------------------------------------------
1 | ../core/patch


--------------------------------------------------------------------------------
/runtime/openvino/post_processor:
--------------------------------------------------------------------------------
1 | ../core/post_processor


--------------------------------------------------------------------------------
/runtime/openvino/test:
--------------------------------------------------------------------------------
1 | ../core/test


--------------------------------------------------------------------------------
/runtime/openvino/utils:
--------------------------------------------------------------------------------
1 | ../core/utils


--------------------------------------------------------------------------------
/runtime/openvino/websocket:
--------------------------------------------------------------------------------
1 | ../core/websocket


--------------------------------------------------------------------------------
/runtime/raspberrypi/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | fc_base/
3 | 


--------------------------------------------------------------------------------
/runtime/raspberrypi/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | ../libtorch/CMakeLists.txt


--------------------------------------------------------------------------------
/runtime/raspberrypi/api:
--------------------------------------------------------------------------------
1 | ../core/api


--------------------------------------------------------------------------------
/runtime/raspberrypi/bin:
--------------------------------------------------------------------------------
1 | ../core/bin


--------------------------------------------------------------------------------
/runtime/raspberrypi/cmake:
--------------------------------------------------------------------------------
1 | ../core/cmake


--------------------------------------------------------------------------------
/runtime/raspberrypi/decoder:
--------------------------------------------------------------------------------
1 | ../core/decoder


--------------------------------------------------------------------------------
/runtime/raspberrypi/frontend:
--------------------------------------------------------------------------------
1 | ../core/frontend


--------------------------------------------------------------------------------
/runtime/raspberrypi/kaldi:
--------------------------------------------------------------------------------
1 | ../core/kaldi


--------------------------------------------------------------------------------
/runtime/raspberrypi/patch:
--------------------------------------------------------------------------------
1 | ../core/patch


--------------------------------------------------------------------------------
/runtime/raspberrypi/post_processor:
--------------------------------------------------------------------------------
1 | ../core/post_processor


--------------------------------------------------------------------------------
/runtime/raspberrypi/test:
--------------------------------------------------------------------------------
1 | ../core/test


--------------------------------------------------------------------------------
/runtime/raspberrypi/toolchains:
--------------------------------------------------------------------------------
1 | ../core/toolchains


--------------------------------------------------------------------------------
/runtime/raspberrypi/utils:
--------------------------------------------------------------------------------
1 | ../core/utils


--------------------------------------------------------------------------------
/runtime/web/README.md:
--------------------------------------------------------------------------------
1 | ## WeNet Web Demo
2 | 
3 | * How to install? `pip install -r requirements.txt`
4 | * How to start?
5 |   - Non-streaming: `python app.py`
6 | 


--------------------------------------------------------------------------------
/runtime/web/requirements.txt:
--------------------------------------------------------------------------------
1 | wenet @ git+https://github.com/wenet-e2e/wenet.git
2 | gradio==3.14.0
3 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = wenet
 3 | version = 0.0.0
 4 | license = Apache Software License
 5 | description = End to end speech recognition toolkit
 6 | long_description = file: README.md
 7 | classifiers =
 8 |     License :: OSI Approved :: Apache Software License
 9 |     Operating System :: OS Independent
10 |     Programming Language :: Python :: 3
11 | 
12 | [options]
13 | packages = find:
14 | include_package_data = True
15 | python_requires = >= 3.8
16 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | from setuptools import setup, find_packages
 3 | 
 4 | requirements = [
 5 |     "numpy",
 6 |     "requests",
 7 |     "tqdm",
 8 |     "torch>=1.13.0",
 9 |     "torchaudio>=0.13.0",
10 |     "openai-whisper",
11 |     "librosa",
12 |     "pyyaml",
13 |     "jieba"
14 | ]
15 | 
16 | extra_require = {
17 |     "torch-npu": [
18 |         "torch==2.2.0", "torch-npu==2.2.0", "torchaudio==2.2.0", "decorator",
19 |         "numpy<2.0.0", "attrs", "psutil"
20 |     ],
21 | }
22 | 
23 | if platform.system() == 'Windows':
24 |     requirements += ['PySoundFile']
25 | 
26 | setup(
27 |     name="wenet",
28 |     install_requires=requirements,
29 |     packages=find_packages(),
30 |     entry_points={"console_scripts": [
31 |         "wenet = wenet.cli.transcribe:main",
32 |     ]},
33 |     extras_require=extra_require,
34 | )
35 | 


--------------------------------------------------------------------------------
/test/resources/aishell-BAC009S0724W0121.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/test/resources/aishell-BAC009S0724W0121.wav


--------------------------------------------------------------------------------
/test/resources/dataset/aishell-BAC009S0724W0121.wav:
--------------------------------------------------------------------------------
1 | ../aishell-BAC009S0724W0121.wav


--------------------------------------------------------------------------------
/test/resources/dataset/data.list:
--------------------------------------------------------------------------------
1 | {"key": "test/resources/dataset/aishell-BAC009S0724W0121", "wav": "test/resources/dataset/aishell-BAC009S0724W0121.wav", "txt": "广州市房地产中介协会分析"}
2 | {"key": "test/resources/dataset/librispeech-1995-1837-0001", "wav": "test/resources/dataset/librispeech-1995-1837-0001.wav", "txt": "IT WAS THE FIRST GREAT SORROW OF HIS LIFE IT WAS NOT SO MUCH THE LOSS OF THE COTTON ITSELF BUT THE FANTASY THE HOPES THE DREAMS BUILT AROUND IT"}
3 | 


--------------------------------------------------------------------------------
/test/resources/dataset/data.shards.list:
--------------------------------------------------------------------------------
1 | test/resources/dataset/shards/shards_000000000.tar
2 | 


--------------------------------------------------------------------------------
/test/resources/dataset/librispeech-1995-1837-0001.wav:
--------------------------------------------------------------------------------
1 | ../librispeech-1995-1837-0001.wav


--------------------------------------------------------------------------------
/test/resources/dataset/shards/shards_000000000.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/test/resources/dataset/shards/shards_000000000.tar


--------------------------------------------------------------------------------
/test/resources/dataset/text:
--------------------------------------------------------------------------------
1 | test/resources/dataset/aishell-BAC009S0724W0121 广州市房地产中介协会分析
2 | test/resources/dataset/librispeech-1995-1837-0001 IT WAS THE FIRST GREAT SORROW OF HIS LIFE IT WAS NOT SO MUCH THE LOSS OF THE COTTON ITSELF BUT THE FANTASY THE HOPES THE DREAMS BUILT AROUND IT
3 | 


--------------------------------------------------------------------------------
/test/resources/dataset/wav.scp:
--------------------------------------------------------------------------------
1 | test/resources/dataset/aishell-BAC009S0724W0121 test/resources/dataset/aishell-BAC009S0724W0121.wav
2 | test/resources/dataset/librispeech-1995-1837-0001 test/resources/dataset/librispeech-1995-1837-0001.wav
3 | 


--------------------------------------------------------------------------------
/test/resources/librispeech-1995-1837-0001.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/test/resources/librispeech-1995-1837-0001.wav


--------------------------------------------------------------------------------
/test/resources/librispeech.train_960_unigram5000.bpemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/test/resources/librispeech.train_960_unigram5000.bpemodel


--------------------------------------------------------------------------------
/test/resources/non-linguistic-symbols.invalid:
--------------------------------------------------------------------------------
1 | #1
2 | <<aaa>>
3 | {{BBB}}
4 | [[ccc]]
5 | 


--------------------------------------------------------------------------------
/test/resources/non-linguistic-symbols.valid:
--------------------------------------------------------------------------------
1 | {~!@#$%^&*()_+`1234567890-=[]|\\:;"'<>,./?}
2 | [~!@#$%^&*()_+`1234567890-={}|\\:;"'<>,./?]
3 | <~!@#$%^&*()_+`1234567890-={}|\\:;"'[],./?>
4 | {qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM}
5 | [qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM]
6 | <qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM>
7 | 


--------------------------------------------------------------------------------
/test/test_file_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # Copyright [2021-12-04] <sxc19@mails.tsinghua.edu.cn, Xingchen Song>
 4 | 
 5 | import pytest
 6 | 
 7 | from wenet.utils.file_utils import read_non_lang_symbols
 8 | 
 9 | 
10 | @pytest.mark.parametrize("non_lang_symbol_table_path", [
11 |     "test/resources/non-linguistic-symbols.valid",
12 |     "test/resources/non-linguistic-symbols.invalid"
13 | ])
14 | def test_read_non_lang_symbols(non_lang_symbol_table_path):
15 |     path = non_lang_symbol_table_path
16 |     try:
17 |         syms = read_non_lang_symbols(path)
18 |         assert syms[0] == "{~!@#$%^&*()_+`1234567890-=[]|\\\\:;\"'<>,./?}"
19 |         assert syms[1] == "[~!@#$%^&*()_+`1234567890-={}|\\\\:;\"'<>,./?]"
20 |         assert syms[2] == "<~!@#$%^&*()_+`1234567890-={}|\\\\:;\"'[],./?>"
21 |         assert syms[
22 |             3] == "{qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM}"
23 |         assert syms[
24 |             4] == "[qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM]"
25 |         assert syms[
26 |             5] == "<qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM>"
27 |     except Exception as e:
28 |         assert path == "test/resources/non-linguistic-symbols.invalid"
29 | 


--------------------------------------------------------------------------------
/test/tools/test_make_shard.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import io
 3 | import torch
 4 | from torchaudio._extension import torchaudio
 5 | 
 6 | 
 7 | def test_save_load_consistently():
 8 |     wav_paths = glob.glob("test/resources/*.wav")
 9 |     for wav_path in wav_paths:
10 |         wav, sr = torchaudio.load(wav_path)
11 |         with io.BytesIO() as f:
12 |             wav = torchaudio.transforms.Resample(sr, sr)(wav)
13 |             wav_short = (wav * (1 << 15))
14 |             wav_short = wav_short.to(torch.int16)
15 |             torchaudio.save(f, wav_short, sr, format="wav", bits_per_sample=16)
16 |             f.seek(0)
17 |             b = f.read()
18 | 
19 |         with io.BytesIO(b) as f:
20 |             new_wav, new_sr = torchaudio.load(f)
21 |             assert new_sr == sr
22 |             torch.allclose(new_wav, wav)
23 | 
24 | 
25 | def test_sox_set_buffer():
26 |     torchaudio.utils.sox_utils.set_buffer_size(16500)
27 | 
28 | 
29 | def test_make_shards():
30 |     # TODO(MDdct): add make shards
31 |     pass
32 | 


--------------------------------------------------------------------------------
/tools/cmvn_kaldi2json.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | import logging
 4 | import sys
 5 | import json
 6 | 
 7 | 
 8 | def kaldi2json(kaldi_cmvn_file):
 9 |     means = []
10 |     variance = []
11 |     with open(kaldi_cmvn_file, 'r') as fid:
12 |         # kaldi binary file start with '\0B'
13 |         if fid.read(2) == '\0B':
14 |             logging.error('kaldi cmvn binary file is not supported, please '
15 |                           'recompute it by: compute-cmvn-stats --binary=false '
16 |                           ' scp:feats.scp global_cmvn')
17 |             sys.exit(1)
18 |         fid.seek(0)
19 |         arr = fid.read().split()
20 |         assert (arr[0] == '[')
21 |         assert (arr[-2] == '0')
22 |         assert (arr[-1] == ']')
23 |         feat_dim = int((len(arr) - 2 - 2) / 2)
24 |         for i in range(1, feat_dim + 1):
25 |             means.append(float(arr[i]))
26 |         count = float(arr[feat_dim + 1])
27 |         for i in range(feat_dim + 2, 2 * feat_dim + 2):
28 |             variance.append(float(arr[i]))
29 | 
30 |     cmvn_info = {'mean_stat': means, 'var_stat': variance, 'frame_num': count}
31 |     return cmvn_info
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     with open(sys.argv[2], 'w') as fout:
36 |         cmvn = kaldi2json(sys.argv[1])
37 |         fout.write(json.dumps(cmvn))
38 | 


--------------------------------------------------------------------------------
/tools/flake8_hook.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # encoding: utf-8
 3 | import sys
 4 | 
 5 | from flake8.main import git
 6 | 
 7 | if __name__ == '__main__':
 8 |     sys.exit(git.hook(
 9 |         strict=True,
10 |         lazy=git.config_for('lazy'),
11 |     ))
12 | 


--------------------------------------------------------------------------------
/tools/fst/ctc_token_fst.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | 
 5 | print('0 1 <eps> <eps>')
 6 | print('1 1 <blank> <eps>')
 7 | print('2 2 <blank> <eps>')
 8 | print('2 0 <eps> <eps>')
 9 | 
10 | with open(sys.argv[1], 'r', encoding='utf8') as fin:
11 |     node = 3
12 |     for entry in fin:
13 |         fields = entry.strip().split(' ')
14 |         phone = fields[0]
15 |         if phone == '<eps>' or phone == '<blank>':
16 |             continue
17 |         elif '#' in phone:  # disambiguous phone
18 |             print('{} {} {} {}'.format(0, 0, '<eps>', phone))
19 |         else:
20 |             print('{} {} {} {}'.format(1, node, phone, phone))
21 |             print('{} {} {} {}'.format(node, node, phone, '<eps>'))
22 |             print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
23 |         node += 1
24 | print('0')
25 | 


--------------------------------------------------------------------------------
/tools/fst/ctc_token_fst_compact.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | 
 5 | print('0 0 <blank> <eps>')
 6 | 
 7 | with open(sys.argv[1], 'r', encoding='utf8') as fin:
 8 |     node = 1
 9 |     for entry in fin:
10 |         fields = entry.strip().split(' ')
11 |         phone = fields[0]
12 |         if phone == '<eps>' or phone == '<blank>':
13 |             continue
14 |         elif '#' in phone:  # disambiguous phone
15 |             print('{} {} {} {}'.format(0, 0, '<eps>', phone))
16 |         else:
17 |             print('{} {} {} {}'.format(0, node, phone, phone))
18 |             print('{} {} {} {}'.format(node, node, phone, '<eps>'))
19 |             print('{} {} {} {}'.format(node, 0, '<eps>', '<eps>'))
20 |         node += 1
21 | print('0')
22 | 


--------------------------------------------------------------------------------
/tools/fst/eps2disambig.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | #                2015 Guoguo Chen
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # This script replaces epsilon with #0 on the input side only, of the G.fst
19 | # acceptor.
20 | 
21 | while(<>){
22 |   if (/\s+#0\s+/) {
23 |     print STDERR "$0: ERROR: LM has word #0, " .
24 |                  "which is reserved as disambiguation symbol\n";
25 |     exit 1;
26 |   }
27 |   s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
28 |   print;
29 | }
30 | 


--------------------------------------------------------------------------------
/tools/fst/rnnt_token_fst.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | 
 5 | print('0 0 <blank> <eps>')
 6 | 
 7 | with open(sys.argv[1], 'r', encoding='utf8') as fin:
 8 |     for entry in fin:
 9 |         fields = entry.strip().split(' ')
10 |         phone = fields[0]
11 |         if phone == '<eps>' or phone == '<blank>':
12 |             continue
13 |         elif '#' in phone:  # disambiguous phone
14 |             print('{} {} {} {}'.format(0, 0, '<eps>', phone))
15 |         else:
16 |             print('{} {} {} {}'.format(0, 0, phone, phone))
17 | print('0')
18 | 


--------------------------------------------------------------------------------
/tools/fst/s2eps.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script replaces <s> and </s> with <eps> (on both input and output sides),
18 | # for the G.fst acceptor.
19 | 
20 | while(<>){
21 |     @A = split(" ", $_);
22 |     if ( @A >= 4 ) {
23 |         if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
24 |         if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
25 |     }
26 |     print join("\t", @A) . "\n";
27 | }
28 | 


--------------------------------------------------------------------------------
/tools/git-pre-commit:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | 
4 | echo "Running pre-commit flake8"
5 | python tools/flake8_hook.py
6 | 


--------------------------------------------------------------------------------
/tools/spk2utt_to_utt2spk.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | while(<>){
19 |     @A = split(" ", $_);
20 |     @A > 1 || die "Invalid line in spk2utt file: $_";
21 |     $s = shift @A;
22 |     foreach $u ( @A ) {
23 |         print "$u $s\n";
24 |     }
25 | }
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/tools/spm_train:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under the license found in the
 6 | # https://github.com/pytorch/fairseq/blob/master/LICENSE
 7 | import sys
 8 | 
 9 | import sentencepiece as spm
10 | 
11 | if __name__ == "__main__":
12 |     spm.SentencePieceTrainer.Train(" ".join(sys.argv[1:]))
13 | 


--------------------------------------------------------------------------------
/tools/utt2spk_to_spk2utt.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # converts an utt2spk file to a spk2utt file.
18 | # Takes input from the stdin or from a file argument;
19 | # output goes to the standard out.
20 | 
21 | if ( @ARGV > 1 ) {
22 |     die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
23 | }
24 | 
25 | while(<>){
26 |     @A = split(" ", $_);
27 |     @A == 2 || die "Invalid line in utt2spk file: $_";
28 |     ($u,$s) = @A;
29 |     if(!$seen_spk{$s}) {
30 |         $seen_spk{$s} = 1;
31 |         push @spklist, $s;
32 |     }
33 |     push (@{$spk_hash{$s}}, "$u");
34 | }
35 | foreach $s (@spklist) {
36 |     $l = join(' ',@{$spk_hash{$s}});
37 |     print "$s $l\n";
38 | }
39 | 


--------------------------------------------------------------------------------
/tools/wav2dur.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # encoding: utf-8
 3 | 
 4 | import sys
 5 | 
 6 | import torchaudio
 7 | 
 8 | scp = sys.argv[1]
 9 | dur_scp = sys.argv[2]
10 | 
11 | with open(scp, 'r') as f, open(dur_scp, 'w') as fout:
12 |     cnt = 0
13 |     total_duration = 0
14 |     for l in f:
15 |         items = l.strip().split()
16 |         wav_id = items[0]
17 |         fname = items[1]
18 |         cnt += 1
19 |         waveform, rate = torchaudio.load(fname)
20 |         frames = len(waveform[0])
21 |         duration = frames / float(rate)
22 |         total_duration += duration
23 |         fout.write('{} {}\n'.format(wav_id, duration))
24 |     print('process {} utts'.format(cnt))
25 |     print('total {} s'.format(total_duration))
26 | 


--------------------------------------------------------------------------------
/tools/wav_to_duration.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # split the wav scp, calculate duration and merge
 3 | nj=4
 4 | . tools/parse_options.sh || exit 1;
 5 | 
 6 | inscp=$1
 7 | outscp=$2
 8 | data=$(dirname ${inscp})
 9 | if [ $# -eq 3 ]; then
10 |   logdir=$3
11 | else
12 |   logdir=${data}/log
13 | fi
14 | mkdir -p ${logdir}
15 | 
16 | rm -f $logdir/wav_*.slice
17 | rm -f $logdir/wav_*.shape
18 | split --additional-suffix .slice -d -n l/$nj $inscp $logdir/wav_
19 | 
20 | for slice in `ls $logdir/wav_*.slice`; do
21 | {
22 |     name=`basename -s .slice $slice`
23 |     tools/wav2dur.py $slice $logdir/$name.shape 1>$logdir/$name.log
24 | } &
25 | done
26 | wait
27 | cat $logdir/wav_*.shape > $outscp
28 | 


--------------------------------------------------------------------------------
/wenet/__init__.py:
--------------------------------------------------------------------------------
1 | from wenet.cli.model import load_feature, load_model, load_tokenizer  # noqa
2 | 


--------------------------------------------------------------------------------
/wenet/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/bin/__init__.py


--------------------------------------------------------------------------------
/wenet/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/cli/__init__.py


--------------------------------------------------------------------------------
/wenet/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/dataset/__init__.py


--------------------------------------------------------------------------------
/wenet/dataset/deprecated/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/dataset/deprecated/__init__.py


--------------------------------------------------------------------------------
/wenet/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/__init__.py


--------------------------------------------------------------------------------
/wenet/models/branchformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/branchformer/__init__.py


--------------------------------------------------------------------------------
/wenet/models/ctl_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/ctl_model/__init__.py


--------------------------------------------------------------------------------
/wenet/models/e_branchformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/e_branchformer/__init__.py


--------------------------------------------------------------------------------
/wenet/models/efficient_conformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/efficient_conformer/__init__.py


--------------------------------------------------------------------------------
/wenet/models/finetune/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/finetune/__init__.py


--------------------------------------------------------------------------------
/wenet/models/finetune/lora/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/finetune/lora/__init__.py


--------------------------------------------------------------------------------
/wenet/models/finetune/lora/config.yaml:
--------------------------------------------------------------------------------
 1 | init_batch_size: 2
 2 | init_iters: 8
 3 | init_config:
 4 |   mode: "gradient"  # option: "simple", "svd", "gradient"
 5 |   lora_A: "unit"  # option: "gaussian", "kaiming", "fan_out_kaiming", "xavier", "zeros", "unit", "orthogonal"
 6 |   lora_A_std: 0.01  # only needed when lora_A is "gaussian"
 7 |   lora_B: "unit"  # option: "gaussian", "kaiming", "fan_out_kaiming", "xavier", "zeros", "unit", "orthogonal"
 8 |   lora_B_std: 0.01  # only needed when lora_B is "gaussian"
 9 |   scale: "stable"  # option: "default", "stable", "unit", "normalized", "gd", "weightS"
10 |   stable_gamma: 2  # only needed when scale is "stable"
11 |   direction: "ArB2r"  # option: "ArBr", "A2rBr", "ArB2r"（only needed when mode is "gradient"）
12 |   dtype: "fp32"  # option: "bf16", "fp32"
13 |   norm_clip: false  # norm clipping
14 | 


--------------------------------------------------------------------------------
/wenet/models/firered/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/firered/__init__.py


--------------------------------------------------------------------------------
/wenet/models/k2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/k2/__init__.py


--------------------------------------------------------------------------------
/wenet/models/paraformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/paraformer/__init__.py


--------------------------------------------------------------------------------
/wenet/models/paraformer/embedding.py:
--------------------------------------------------------------------------------
 1 | from wenet.models.transformer.embedding import WhisperPositionalEncoding
 2 | 
 3 | 
 4 | class ParaformerPositinoalEncoding(WhisperPositionalEncoding):
 5 |     """ Sinusoids position encoding used in paraformer.encoder
 6 |     """
 7 | 
 8 |     def __init__(self,
 9 |                  depth: int,
10 |                  d_model: int,
11 |                  dropout_rate: float = 0.1,
12 |                  max_len: int = 1500):
13 |         super().__init__(depth, dropout_rate, max_len)
14 |         self.xscale = d_model**0.5
15 | 


--------------------------------------------------------------------------------
/wenet/models/squeezeformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/squeezeformer/__init__.py


--------------------------------------------------------------------------------
/wenet/models/ssl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/ssl/__init__.py


--------------------------------------------------------------------------------
/wenet/models/ssl/bestrq/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/ssl/bestrq/__init__.py


--------------------------------------------------------------------------------
/wenet/models/ssl/init_model.py:
--------------------------------------------------------------------------------
 1 | from wenet.models.ssl.bestrq.bestrq_model import BestRQModel
 2 | from wenet.models.ssl.w2vbert.w2vbert_model import W2VBERTModel
 3 | from wenet.models.ssl.wav2vec2.wav2vec2_model import Wav2vec2Model
 4 | 
 5 | WENET_SSL_MODEL_CLASS = {
 6 |     "w2vbert_model": W2VBERTModel,
 7 |     "wav2vec_model": Wav2vec2Model,
 8 |     "bestrq_model": BestRQModel
 9 | }
10 | 
11 | 
12 | def init_model(configs, encoder):
13 | 
14 |     assert 'model' in configs
15 |     model_type = configs['model']
16 |     assert model_type in WENET_SSL_MODEL_CLASS.keys()
17 |     model = WENET_SSL_MODEL_CLASS[model_type](encoder=encoder,
18 |                                               **configs['model_conf'])
19 |     return model
20 | 


--------------------------------------------------------------------------------
/wenet/models/ssl/w2vbert/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/ssl/w2vbert/__init__.py


--------------------------------------------------------------------------------
/wenet/models/ssl/wav2vec2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/ssl/wav2vec2/__init__.py


--------------------------------------------------------------------------------
/wenet/models/transducer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/transducer/__init__.py


--------------------------------------------------------------------------------
/wenet/models/transducer/search/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/transducer/search/__init__.py


--------------------------------------------------------------------------------
/wenet/models/transformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/transformer/__init__.py


--------------------------------------------------------------------------------
/wenet/models/transformer/norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class RMSNorm(torch.nn.Module):
 5 |     """ https://arxiv.org/pdf/1910.07467.pdf
 6 |     """
 7 | 
 8 |     def __init__(
 9 |         self,
10 |         dim: int,
11 |         eps: float = 1e-6,
12 |         add_unit_offset: bool = True,
13 |     ):
14 |         super().__init__()
15 |         self.eps = eps
16 |         self.weight = torch.nn.Parameter(torch.ones(dim))
17 |         self.add_unit_offset = add_unit_offset
18 | 
19 |     def _norm(self, x):
20 |         return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
21 | 
22 |     def forward(self, x):
23 |         x = self._norm(x.float()).type_as(x)
24 |         if self.add_unit_offset:
25 |             return x * (1 + self.weight)
26 |         else:
27 |             return x * self.weight
28 | 


--------------------------------------------------------------------------------
/wenet/models/transformer/swish.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 Johns Hopkins University (Shinji Watanabe)
 2 | #               2020 Northwestern Polytechnical University (Pengcheng Guo)
 3 | #               2020 Mobvoi Inc (Binbin Zhang)
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | """Swish() activation function for Conformer."""
17 | 
18 | import torch
19 | 
20 | 
21 | class Swish(torch.nn.Module):
22 |     """Construct an Swish object."""
23 | 
24 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
25 |         """Return Swish activation function."""
26 |         return x * torch.sigmoid(x)
27 | 


--------------------------------------------------------------------------------
/wenet/models/whisper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/models/whisper/__init__.py


--------------------------------------------------------------------------------
/wenet/text/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/text/__init__.py


--------------------------------------------------------------------------------
/wenet/text/base_tokenizer.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod, abstractproperty
 2 | from typing import Dict, List, Tuple, Union
 3 | 
 4 | T = Union[str, bytes]
 5 | 
 6 | 
 7 | class BaseTokenizer(ABC):
 8 | 
 9 |     def tokenize(self, line: str) -> Tuple[List[T], List[int]]:
10 |         tokens = self.text2tokens(line)
11 |         ids = self.tokens2ids(tokens)
12 |         return tokens, ids
13 | 
14 |     def detokenize(self, ids: List[int]) -> Tuple[str, List[T]]:
15 |         tokens = self.ids2tokens(ids)
16 |         text = self.tokens2text(tokens)
17 |         return text, tokens
18 | 
19 |     @abstractmethod
20 |     def text2tokens(self, line: str) -> List[T]:
21 |         raise NotImplementedError("abstract method")
22 | 
23 |     @abstractmethod
24 |     def tokens2text(self, tokens: List[T]) -> str:
25 |         raise NotImplementedError("abstract method")
26 | 
27 |     @abstractmethod
28 |     def tokens2ids(self, tokens: List[T]) -> List[int]:
29 |         raise NotImplementedError("abstract method")
30 | 
31 |     @abstractmethod
32 |     def ids2tokens(self, ids: List[int]) -> List[T]:
33 |         raise NotImplementedError("abstract method")
34 | 
35 |     @abstractmethod
36 |     def vocab_size(self) -> int:
37 |         raise NotImplementedError("abstract method")
38 | 
39 |     @abstractproperty
40 |     def symbol_table(self) -> Dict[T, int]:
41 |         raise NotImplementedError("abstract method")
42 | 


--------------------------------------------------------------------------------
/wenet/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenet-e2e/wenet/8423e175e4feffa017948a59084eba707f862973/wenet/utils/__init__.py


--------------------------------------------------------------------------------