├── VERSION ├── recipe ├── data │ └── local │ │ ├── lang │ │ ├── lex_ndisambig │ │ └── phone_map.txt │ │ └── dict │ │ ├── optional_silence.txt │ │ ├── silence_phones.txt │ │ ├── extra_questions.txt │ │ └── nonsilence_phones.txt ├── lm │ └── README ├── conf │ ├── mfcc.conf │ ├── online_cmvn.conf │ └── mfcc_hires.conf ├── path.sh ├── cmd.sh ├── local │ ├── score.sh │ └── nnet3 │ │ └── run_ivector_common.sh └── run.sh ├── requirements.txt ├── welcome.wav ├── .gitignore ├── RESULTS.txt ├── LICENSE ├── README.md └── transcribe.py /VERSION: -------------------------------------------------------------------------------- 1 | 1.0 2 | -------------------------------------------------------------------------------- /recipe/data/local/lang/lex_ndisambig: -------------------------------------------------------------------------------- 1 | 13 2 | -------------------------------------------------------------------------------- /recipe/lm/README: -------------------------------------------------------------------------------- 1 | Put lm.arpa.gz here 2 | -------------------------------------------------------------------------------- /recipe/data/local/dict/optional_silence.txt: -------------------------------------------------------------------------------- 1 | SIL 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | librosa~=0.8.0 2 | vosk~=0.3.0 3 | -------------------------------------------------------------------------------- /recipe/data/local/dict/silence_phones.txt: -------------------------------------------------------------------------------- 1 | NSN 2 | SIL 3 | SPN 4 | -------------------------------------------------------------------------------- /recipe/conf/mfcc.conf: -------------------------------------------------------------------------------- 1 | --use-energy=false 2 | --sample-frequency=16000 3 | -------------------------------------------------------------------------------- /welcome.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/fa_kaldi-rhasspy/HEAD/welcome.wav -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | *.log 4 | tmp/ 5 | 6 | model/ 7 | .venv/ 8 | *.gz 9 | -------------------------------------------------------------------------------- /recipe/data/local/dict/extra_questions.txt: -------------------------------------------------------------------------------- 1 | SIL SPN NSN 2 | æ ɒː e̞ iː o uː b p t d t͡ʃ d͡ʒ k g ʔ f v s z ʃ ʒ x ɢ h m n l ɾ j 3 | -------------------------------------------------------------------------------- /recipe/conf/online_cmvn.conf: -------------------------------------------------------------------------------- 1 | # configuration file for apply-cmvn-online, used in the script ../local/online/run_online_decoding_nnet2.sh 2 | -------------------------------------------------------------------------------- /RESULTS.txt: -------------------------------------------------------------------------------- 1 | %WER 15.57 [ 17728 / 113894, 3384 ins, 1474 del, 12870 sub ] exp/nnet3_chain/tdnn_250/decode_test/wer_8_1.0 2 | %WER 13.58 [ 15472 / 113894, 2559 ins, 990 del, 11923 sub ] exp/nnet3_chain/tdnn_f/decode_test/wer_7_1.0 3 | -------------------------------------------------------------------------------- /recipe/data/local/dict/nonsilence_phones.txt: -------------------------------------------------------------------------------- 1 | æ 2 | ɒː 3 | e̞ 4 | iː 5 | o 6 | uː 7 | b 8 | p 9 | t 10 | d 11 | t͡ʃ 12 | d͡ʒ 13 | k 14 | g 15 | ʔ 16 | f 17 | v 18 | s 19 | z 20 | ʃ 21 | ʒ 22 | x 23 | ɢ 24 | h 25 | m 26 | n 27 | l 28 | ɾ 29 | j 30 | -------------------------------------------------------------------------------- /recipe/path.sh: -------------------------------------------------------------------------------- 1 | if [ -d /opt/kaldi ]; then 2 | export KALDI_ROOT=/opt/kaldi 3 | else 4 | export KALDI_ROOT="$(realpath ${PWD}/../../..)" 5 | fi 6 | 7 | [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh 8 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH:$KALDI_ROOT/tools/sph2pipe_v2.5 9 | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 10 | . $KALDI_ROOT/tools/config/common_path.sh 11 | 12 | # Add for mkgraph_lookahead.sh 13 | export LD_LIBRARY_PATH="${KALDI_ROOT}/tools/openfst/lib/fst:${LD_LIBRARY_PATH}" 14 | 15 | export LC_ALL=C 16 | -------------------------------------------------------------------------------- /recipe/conf/mfcc_hires.conf: -------------------------------------------------------------------------------- 1 | # config for high-resolution MFCC features, intended for neural network training 2 | # Note: we keep all cepstra, so it has the same info as filterbank features, 3 | # but MFCC is more easily compressible (because less correlated) which is why 4 | # we prefer this method. 5 | --use-energy=false # use average of log energy, not energy. 6 | --num-mel-bins=40 # similar to Google's setup. 7 | --num-ceps=40 # there is no dimensionality reduction. 8 | --low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so 9 | # there might be some information at the low end. 10 | --high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) 11 | -------------------------------------------------------------------------------- /recipe/cmd.sh: -------------------------------------------------------------------------------- 1 | # "queue.pl" uses qsub. The options to it are 2 | # options to qsub. If you have GridEngine installed, 3 | # change this to a queue you have access to. 4 | # Otherwise, use "run.pl", which will run jobs locally 5 | # (make sure your --num-jobs options are no more than 6 | # the number of cpus on your machine. 7 | 8 | 9 | #activate this if you want to run the corpus with gridengine (http://gridengine.org/) 10 | #export train_cmd="queue.pl -l 'arch=*64*'" 11 | #export decode_cmd="queue.pl -l 'arch=*64*'" 12 | #export cuda_cmd="queue.pl -l gpu=1" 13 | 14 | export train_cmd="utils/run.pl" 15 | export decode_cmd="utils/run.pl" 16 | export cuda_cmd="utils/run.pl -l gpu=1" 17 | 18 | export nJobs=12 19 | export nDecodeJobs=12 20 | -------------------------------------------------------------------------------- /recipe/data/local/lang/phone_map.txt: -------------------------------------------------------------------------------- 1 | NSN NSN NSN_B NSN_E NSN_I NSN_S 2 | SIL SIL SIL_B SIL_E SIL_I SIL_S 3 | SPN SPN SPN_B SPN_E SPN_I SPN_S 4 | æ æ_B æ_E æ_I æ_S 5 | ɒː ɒː_B ɒː_E ɒː_I ɒː_S 6 | e̞ e̞_B e̞_E e̞_I e̞_S 7 | iː iː_B iː_E iː_I iː_S 8 | o o_B o_E o_I o_S 9 | uː uː_B uː_E uː_I uː_S 10 | b b_B b_E b_I b_S 11 | p p_B p_E p_I p_S 12 | t t_B t_E t_I t_S 13 | d d_B d_E d_I d_S 14 | t͡ʃ t͡ʃ_B t͡ʃ_E t͡ʃ_I t͡ʃ_S 15 | d͡ʒ d͡ʒ_B d͡ʒ_E d͡ʒ_I d͡ʒ_S 16 | k k_B k_E k_I k_S 17 | g g_B g_E g_I g_S 18 | ʔ ʔ_B ʔ_E ʔ_I ʔ_S 19 | f f_B f_E f_I f_S 20 | v v_B v_E v_I v_S 21 | s s_B s_E s_I s_S 22 | z z_B z_E z_I z_S 23 | ʃ ʃ_B ʃ_E ʃ_I ʃ_S 24 | ʒ ʒ_B ʒ_E ʒ_I ʒ_S 25 | x x_B x_E x_I x_S 26 | ɢ ɢ_B ɢ_E ɢ_I ɢ_S 27 | h h_B h_E h_I h_S 28 | m m_B m_E m_I m_S 29 | n n_B n_E n_I n_S 30 | l l_B l_E l_I l_S 31 | ɾ ɾ_B ɾ_E ɾ_I ɾ_S 32 | j j_B j_E j_I j_S 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Michael Hansen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Persian Kaldi Profile 2 | 3 | A [Rhasspy](https://github.com/rhasspy/rhasspy) profile for Persian (`fa`). 4 | 5 | Trained from approximately 293 hours of audio from [Common Voice](https://commonvoice.mozilla.org/) (Persian 7.0 dataset, validated, 10% test). 6 | 7 | Available [Vosk](https://alphacephei.com/vosk) models: 8 | 9 | * [Small nnet3](https://github.com/rhasspy/fa_kaldi-rhasspy/releases/download/v1.0/vosk-model-small-fa-rhasspy-0.15.zip) 10 | * WER: 15.57% 11 | * [Large nnet3](https://github.com/rhasspy/fa_kaldi-rhasspy/releases/download/v1.0/vosk-model-large-fa-rhasspy-0.15.zip) 12 | * WER: 13.58% 13 | 14 | ## Installation 15 | 16 | Get started by first installing [Vosk](https://alphacephei.com/vosk): 17 | 18 | ``` sh 19 | # Create virtual environment 20 | python3 -m venv .venv 21 | source .venv/bin/activate 22 | pip3 install --upgrade pip 23 | pip3 install --upgrade wheel setuptools 24 | 25 | # Install Vosk 26 | pip3 install vosk 27 | ``` 28 | 29 | Next, [download the model](https://github.com/rhasspy/fa_kaldi-rhasspy/releases/download/v1.0/vosk-model-small-fa-rhasspy-0.15.zip) and extract it: 30 | 31 | ``` sh 32 | wget 'https://github.com/rhasspy/fa_kaldi-rhasspy/releases/download/v1.0/vosk-model-small-fa-rhasspy-0.15.zip' 33 | unzip vosk-model-small-fa-rhasspy-0.15.zip 34 | ``` 35 | 36 | Finally, run the `transcribe.py` Python program with the model and an audio file: 37 | 38 | ``` sh 39 | python3 transcribe.py vosk-model-small-fa-rhasspy-0.15 welcome.wav 40 | 41 | {"result": [{"conf": 1.0, "end": 0.48, "start": 0.06, "word": "خوش"}, {"conf": 1.0, "end": 1.11, "start": 0.48, "word": "آمدید"}], "text": "خوش آمدید"} 42 | ``` 43 | 44 | For each audio file given to `transcribe.py`, a line of JSON will be printed in the output with the transcription details. 45 | -------------------------------------------------------------------------------- /transcribe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Transcribes audio files with Vosk (https://alphacephei.com/vosk)""" 3 | import argparse 4 | import json 5 | import sys 6 | 7 | import numpy as np 8 | import librosa 9 | from vosk import Model, KaldiRecognizer 10 | 11 | 12 | def main(): 13 | """Main entry point""" 14 | parser = argparse.ArgumentParser("vosk_example") 15 | parser.add_argument("model", help="Directory with speech to text model") 16 | parser.add_argument("audio", nargs="+", help="Audio file(s) to transcribe") 17 | parser.add_argument( 18 | "--sample-rate", default=16000, help="Sample rate of model in Hertz" 19 | ) 20 | args = parser.parse_args() 21 | 22 | model = Model(args.model) 23 | 24 | for audio_path in args.audio: 25 | # Load and re-sample audio if necessary 26 | audio, _sample_rate = librosa.load(audio_path, sr=args.sample_rate, mono=True) 27 | audio = audio_float_to_int16(audio).tobytes() 28 | 29 | rec = KaldiRecognizer(model, args.sample_rate) 30 | rec.SetWords(True) 31 | 32 | rec.AcceptWaveform(audio) 33 | 34 | # Parse JSON result and re-print so it's all on one line (JSONL) 35 | result = json.loads(rec.FinalResult()) 36 | json.dump(result, sys.stdout, ensure_ascii=False) 37 | print("") 38 | 39 | 40 | def audio_float_to_int16( 41 | audio: np.ndarray, max_wav_value: float = 32767.0 42 | ) -> np.ndarray: 43 | """Normalize audio and convert to int16 range""" 44 | audio_norm = audio * (max_wav_value / max(0.01, np.max(np.abs(audio)))) 45 | audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value) 46 | audio_norm = audio_norm.astype("int16") 47 | return audio_norm 48 | 49 | 50 | # ----------------------------------------------------------------------------- 51 | 52 | if __name__ == "__main__": 53 | main() 54 | -------------------------------------------------------------------------------- /recipe/local/score.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey, Yenda Trmal) 3 | # Apache 2.0 4 | 5 | [ -f ./path.sh ] && . ./path.sh 6 | 7 | # begin configuration section. 8 | cmd=run.pl 9 | stage=0 10 | decode_mbr=false 11 | reverse=false 12 | stats=true 13 | beam=6 14 | word_ins_penalty=0.0,0.5,1.0 15 | min_lmwt=7 16 | max_lmwt=17 17 | iter=final 18 | #end configuration section. 19 | 20 | echo "$0 $@" # Print the command line for logging 21 | [ -f ./path.sh ] && . ./path.sh 22 | . parse_options.sh || exit 1; 23 | 24 | if [ $# -ne 3 ]; then 25 | echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " 26 | echo " Options:" 27 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 28 | echo " --stage (0|1|2) # start scoring script from part-way through." 29 | echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." 30 | echo " --min_lmwt # minumum LM-weight for lattice rescoring " 31 | echo " --max_lmwt # maximum LM-weight for lattice rescoring " 32 | echo " --reverse (true/false) # score with time reversed features " 33 | exit 1; 34 | fi 35 | 36 | data=$1 37 | lang_or_graph=$2 38 | dir=$3 39 | 40 | symtab=$lang_or_graph/words.txt 41 | 42 | for f in $symtab $dir/lat.1.gz $data/text; do 43 | [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; 44 | done 45 | 46 | 47 | ref_filtering_cmd="cat" 48 | [ -x local/wer_output_filter ] && ref_filtering_cmd="local/wer_output_filter" 49 | [ -x local/wer_ref_filter ] && ref_filtering_cmd="local/wer_ref_filter" 50 | hyp_filtering_cmd="cat" 51 | [ -x local/wer_output_filter ] && hyp_filtering_cmd="local/wer_output_filter" 52 | [ -x local/wer_hyp_filter ] && hyp_filtering_cmd="local/wer_hyp_filter" 53 | 54 | 55 | if $decode_mbr ; then 56 | echo "$0: scoring with MBR, word insertion penalty=$word_ins_penalty" 57 | else 58 | echo "$0: scoring with word insertion penalty=$word_ins_penalty" 59 | fi 60 | 61 | 62 | mkdir -p $dir/scoring_kaldi 63 | cat $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt.txt || exit 1; 64 | 65 | if [ $stage -le 0 ]; then 66 | 67 | for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do 68 | mkdir -p $dir/scoring_kaldi/penalty_$wip/log 69 | 70 | if $decode_mbr ; then 71 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \ 72 | acwt=\`perl -e \"print 1.0/LMWT\"\`\; \ 73 | lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ 74 | lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ 75 | lattice-prune --beam=$beam ark:- ark:- \| \ 76 | lattice-mbr-decode --word-symbol-table=$symtab \ 77 | ark:- ark,t:- \| \ 78 | utils/int2sym.pl -f 2- $symtab \| \ 79 | $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1; 80 | 81 | else 82 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \ 83 | lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ 84 | lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ 85 | lattice-best-path --word-symbol-table=$symtab ark:- ark,t:- \| \ 86 | utils/int2sym.pl -f 2- $symtab \| \ 87 | $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1; 88 | fi 89 | 90 | if $reverse; then # rarely-used option, ignore this. 91 | for lmwt in `seq $min_lmwt $max_lmwt`; do 92 | mv $dir/scoring_kaldi/penalty_$wip/$lmwt.txt $dir/scoring_kaldi/penalty_$wip/$lmwt.txt.orig 93 | awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \ 94 | <$dir/scoring_kaldi/penalty_$wip/$lmwt.txt.orig >$dir/scoring_kaldi/penalty_$wip/$lmwt.txt 95 | done 96 | fi 97 | 98 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.LMWT.log \ 99 | cat $dir/scoring_kaldi/penalty_$wip/LMWT.txt \| \ 100 | compute-wer --text --mode=present \ 101 | ark:$dir/scoring_kaldi/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1; 102 | 103 | done 104 | fi 105 | 106 | 107 | 108 | if [ $stage -le 1 ]; then 109 | 110 | for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do 111 | for lmwt in $(seq $min_lmwt $max_lmwt); do 112 | # adding /dev/null to the command list below forces grep to output the filename 113 | grep WER $dir/wer_${lmwt}_${wip} /dev/null 114 | done 115 | done | utils/best_wer.sh >& $dir/scoring_kaldi/best_wer || exit 1 116 | 117 | best_wer_file=$(awk '{print $NF}' $dir/scoring_kaldi/best_wer) 118 | best_wip=$(echo $best_wer_file | awk -F_ '{print $NF}') 119 | best_lmwt=$(echo $best_wer_file | awk -F_ '{N=NF-1; print $N}') 120 | 121 | if [ -z "$best_lmwt" ]; then 122 | echo "$0: we could not get the details of the best WER from the file $dir/wer_*. Probably something went wrong." 123 | exit 1; 124 | fi 125 | 126 | if $stats; then 127 | mkdir -p $dir/scoring_kaldi/wer_details 128 | echo $best_lmwt > $dir/scoring_kaldi/wer_details/lmwt # record best language model weight 129 | echo $best_wip > $dir/scoring_kaldi/wer_details/wip # record best word insertion penalty 130 | 131 | $cmd $dir/scoring_kaldi/log/stats1.log \ 132 | cat $dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \| \ 133 | align-text --special-symbol="'***'" ark:$dir/scoring_kaldi/test_filt.txt ark:- ark,t:- \| \ 134 | utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \| tee $dir/scoring_kaldi/wer_details/per_utt \|\ 135 | utils/scoring/wer_per_spk_details.pl $data/utt2spk \> $dir/scoring_kaldi/wer_details/per_spk || exit 1; 136 | 137 | $cmd $dir/scoring_kaldi/log/stats2.log \ 138 | cat $dir/scoring_kaldi/wer_details/per_utt \| \ 139 | utils/scoring/wer_ops_details.pl --special-symbol "'***'" \| \ 140 | sort -b -i -k 1,1 -k 4,4rn -k 2,2 -k 3,3 \> $dir/scoring_kaldi/wer_details/ops || exit 1; 141 | 142 | $cmd $dir/scoring_kaldi/log/wer_bootci.log \ 143 | compute-wer-bootci --mode=present \ 144 | ark:$dir/scoring_kaldi/test_filt.txt ark:$dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \ 145 | '>' $dir/scoring_kaldi/wer_details/wer_bootci || exit 1; 146 | 147 | fi 148 | fi 149 | 150 | # If we got here, the scoring was successful. 151 | # As a small aid to prevent confusion, we remove all wer_{?,??} files; 152 | # these originate from the previous version of the scoring files 153 | rm $dir/wer_{?,??} 2>/dev/null 154 | 155 | exit 0; 156 | -------------------------------------------------------------------------------- /recipe/local/nnet3/run_ivector_common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e -o pipefail 4 | 5 | 6 | # This script is called from local/nnet3/run_tdnn.sh and local/chain/run_tdnn.sh (and may eventually 7 | # be called by more scripts). It contains the common feature preparation and iVector-related parts 8 | # of the script. See those scripts for examples of usage. 9 | 10 | 11 | stage=0 12 | nj=30 13 | min_seg_len=1.55 # min length in seconds... we do this because chain training 14 | # will discard segments shorter than 1.5 seconds. Must remain in sync 15 | # with the same option given to prepare_lores_feats_and_alignments.sh 16 | train_set=train_cleaned # you might set this to e.g. train. 17 | gmm=tri3_cleaned # This specifies a GMM-dir from the features of the type you're training the system on; 18 | # it should contain alignments for 'train_set'. 19 | 20 | num_threads_ubm=32 21 | nnet3_affix=_cleaned # affix for exp/nnet3 directory to put iVector stuff in, so it 22 | # becomes exp/nnet3_cleaned or whatever. 23 | 24 | . ./cmd.sh 25 | . ./path.sh 26 | . utils/parse_options.sh 27 | 28 | gmm_dir=exp/${gmm} 29 | ali_dir=exp/${gmm}_ali_${train_set}_sp_comb 30 | 31 | for f in data/${train_set}/feats.scp ${gmm_dir}/final.mdl; do 32 | if [ ! -f $f ]; then 33 | echo "$0: expected file $f to exist" 34 | exit 1 35 | fi 36 | done 37 | 38 | 39 | 40 | if [ $stage -le 2 ] && [ -f data/${train_set}_sp_hires/feats.scp ]; then 41 | echo "$0: data/${train_set}_sp_hires/feats.scp already exists." 42 | echo " ... Please either remove it, or rerun this script with stage > 2." 43 | exit 1 44 | fi 45 | 46 | 47 | if [ $stage -le 1 ]; then 48 | echo "$0: preparing directory for speed-perturbed data" 49 | utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp 50 | fi 51 | 52 | if [ $stage -le 2 ]; then 53 | echo "$0: creating high-resolution MFCC features" 54 | 55 | # this shows how you can split across multiple file-systems. we'll split the 56 | # MFCC dir across multiple locations. You might want to be careful here, if you 57 | # have multiple copies of Kaldi checked out and run the same recipe, not to let 58 | # them overwrite each other. 59 | mfccdir=data/${train_set}_sp_hires/data 60 | if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then 61 | utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage 62 | fi 63 | 64 | for datadir in ${train_set}_sp test; do 65 | utils/copy_data_dir.sh data/$datadir data/${datadir}_hires 66 | done 67 | 68 | # do volume-perturbation on the training data prior to extracting hires 69 | # features; this helps make trained nnets more invariant to test data volume. 70 | utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires 71 | 72 | for datadir in ${train_set}_sp test; do 73 | steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ 74 | --cmd "$train_cmd" data/${datadir}_hires 75 | steps/compute_cmvn_stats.sh data/${datadir}_hires 76 | utils/fix_data_dir.sh data/${datadir}_hires 77 | done 78 | fi 79 | 80 | if [ $stage -le 3 ]; then 81 | echo "$0: combining short segments of speed-perturbed high-resolution MFCC training data" 82 | # we have to combine short segments or we won't be able to train chain models 83 | # on those segments. 84 | utils/data/combine_short_segments.sh \ 85 | data/${train_set}_sp_hires $min_seg_len data/${train_set}_sp_hires_comb 86 | 87 | # just copy over the CMVN to avoid having to recompute it. 88 | cp data/${train_set}_sp_hires/cmvn.scp data/${train_set}_sp_hires_comb/ 89 | utils/fix_data_dir.sh data/${train_set}_sp_hires_comb/ 90 | fi 91 | 92 | if [ $stage -le 4 ]; then 93 | echo "$0: selecting segments of hires training data that were also present in the" 94 | echo " ... original training data." 95 | 96 | # note, these data-dirs are temporary; we put them in a sub-directory 97 | # of the place where we'll make the alignments. 98 | temp_data_root=exp/nnet3${nnet3_affix}/tri5 99 | mkdir -p $temp_data_root 100 | 101 | utils/data/subset_data_dir.sh --utt-list data/${train_set}/feats.scp \ 102 | data/${train_set}_sp_hires $temp_data_root/${train_set}_hires 103 | 104 | # note: essentially all the original segments should be in the hires data. 105 | n1=$(wc -l . 19 | # 20 | # adapted from kaldi's egs/tedlium/s5_r2/local/chain/run_tdnn.sh 21 | 22 | mfccdir=mfcc_chain 23 | 24 | stage=0 25 | min_seg_len=1.55 26 | train_set=train 27 | gmm=tri2b_chain # the gmm for the target data 28 | nnet3_affix=_chain # cleanup affix for nnet3 and chain dirs, e.g. _cleaned 29 | num_threads_ubm=12 30 | get_egs_stage=-10 31 | 32 | xent_regularize=0.1 33 | train_stage=-10 34 | common_egs_dir= # you can set this to use previously dumped egs. 35 | dropout_schedule='0,0@0.20,0.5@0.50,0' 36 | frames_per_eg=150,110,100 37 | 38 | # pre-flight checks 39 | 40 | if [ -f cmd.sh ]; then 41 | . cmd.sh; else 42 | echo "missing cmd.sh"; exit 1; 43 | fi 44 | 45 | # Path also sets LC_ALL=C for Kaldi, otherwise you will experience strange (and hard to debug!) bugs. It should be set here, after the python scripts and not at the beginning of this script 46 | if [ -f path.sh ]; then 47 | . path.sh; else 48 | echo "missing path.sh"; exit 1; 49 | 50 | fi 51 | 52 | # At this script level we don't support not running on GPU, as it would be painfully slow. 53 | # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, 54 | # --num-threads 16 and --minibatch-size 128. 55 | 56 | if ! cuda-compiled; then 57 | cat < data/local/dict/lexicon.txt 93 | fi 94 | 95 | utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang 96 | 97 | fi 98 | 99 | # 100 | # adapt our LM for kaldi 101 | # 102 | 103 | if [ $stage -le 2 ]; then 104 | 105 | echo 106 | echo "adapt our LM for kaldi..." 107 | echo 108 | 109 | rm -rf data/lang_test 110 | cp -r data/lang data/lang_test 111 | 112 | echo 113 | echo "creating G.fst..." 114 | 115 | mkdir -p data/local/lm/ 116 | zcat lm/lm.arpa.gz | utils/find_arpa_oovs.pl data/lang_test/words.txt > data/local/lm/oovs_lm.txt 117 | 118 | zcat lm/lm.arpa.gz | \ 119 | grep -v ' ' | \ 120 | grep -v ' ' | \ 121 | grep -v ' ' | \ 122 | arpa2fst - | fstprint | \ 123 | utils/remove_oovs.pl data/local/lm/oovs_lm.txt | \ 124 | utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=data/lang_test/words.txt \ 125 | --osymbols=data/lang_test/words.txt --keep_isymbols=false --keep_osymbols=false | \ 126 | fstrmepsilon > data/lang_test/G.fst 127 | 128 | fi 129 | 130 | if [ $stage -le 3 ]; then 131 | echo 132 | echo make mfcc 133 | echo 134 | 135 | rm -rf exp/ 136 | 137 | for datadir in train test; do 138 | utils/fix_data_dir.sh data/$datadir 139 | 140 | mkdir -p data/$datadir/wav.scp exp/make_mfcc_chain/$datadir 141 | 142 | for f in wav.scp utt2spk spk2utt; do 143 | cp data/$datadir/$f exp/make_mfcc_chain/$datadir/ 144 | done 145 | 146 | steps/make_mfcc.sh --cmd "$train_cmd" --nj $nJobs data/$datadir exp/make_mfcc_chain/$datadir $mfccdir || exit 1; 147 | utils/fix_data_dir.sh data/${datadir} # some files fail to get mfcc for many reasons 148 | steps/compute_cmvn_stats.sh data/${datadir} exp/make_mfcc_chain/$datadir $mfccdir || exit 1; 149 | utils/fix_data_dir.sh data/${datadir} # some files fail to get mfcc for many reasons 150 | done 151 | fi 152 | 153 | if [ $stage -le 4 ]; then 154 | echo 155 | echo mono0a_chain 156 | echo 157 | 158 | steps/train_mono.sh --nj $nJobs --cmd "$train_cmd" \ 159 | data/train data/lang exp/mono0a_chain || exit 1; 160 | fi 161 | 162 | if [ $stage -le 5 ]; then 163 | echo 164 | echo tri1_chain 165 | echo 166 | 167 | steps/align_si.sh --nj $nJobs --cmd "$train_cmd" \ 168 | data/train data/lang exp/mono0a_chain exp/mono0a_ali_chain || exit 1; 169 | 170 | steps/train_deltas.sh --cmd "$train_cmd" 2000 10000 \ 171 | data/train data/lang exp/mono0a_ali_chain exp/tri1_chain || exit 1; 172 | fi 173 | 174 | if [ $stage -le 6 ]; then 175 | echo 176 | echo tri2b_chain 177 | echo 178 | 179 | steps/align_si.sh --nj $nJobs --cmd "$train_cmd" \ 180 | data/train data/lang exp/tri1_chain exp/tri1_ali_chain || exit 1; 181 | 182 | steps/train_lda_mllt.sh --cmd "$train_cmd" \ 183 | --splice-opts "--left-context=3 --right-context=3" 2500 15000 \ 184 | data/train data/lang exp/tri1_ali_chain exp/tri2b_chain || exit 1; 185 | 186 | utils/mkgraph.sh data/lang_test \ 187 | exp/tri2b_chain exp/tri2b_chain/graph || exit 1; 188 | fi 189 | 190 | gmm_dir=exp/$gmm 191 | ali_dir=exp/${gmm}_ali_${train_set}_sp_comb 192 | tree_dir=exp/nnet3${nnet3_affix}/tree_sp 193 | lang=data/lang_chain 194 | lat_dir=exp/nnet3${nnet3_affix}/${gmm}_${train_set}_sp_comb_lats 195 | dir=exp/nnet3${nnet3_affix}/tdnn_250 196 | train_data_dir=data/${train_set}_sp_hires_comb 197 | lores_train_data_dir=data/${train_set}_sp_comb 198 | train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires_comb 199 | 200 | if [ $stage -le 7 ]; then 201 | echo 202 | echo run_ivector_common.sh 203 | echo 204 | 205 | local/nnet3/run_ivector_common.sh --stage 0 \ 206 | --nj $nJobs \ 207 | --min-seg-len $min_seg_len \ 208 | --train-set $train_set \ 209 | --gmm $gmm \ 210 | --num-threads-ubm $num_threads_ubm \ 211 | --nnet3-affix "$nnet3_affix" 212 | 213 | for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \ 214 | $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz; do 215 | [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1 216 | done 217 | fi 218 | 219 | if [ $stage -le 8 ]; then 220 | echo 221 | echo creating lang directory with one state per phone. 222 | echo 223 | 224 | if [ -d data/lang_chain ]; then 225 | if [ data/lang_chain/L.fst -nt data/lang/L.fst ]; then 226 | echo "$0: data/lang_chain already exists, not overwriting it; continuing" 227 | else 228 | echo "$0: data/lang_chain already exists and seems to be older than data/lang..." 229 | echo " ... not sure what to do. Exiting." 230 | exit 1; 231 | fi 232 | else 233 | cp -r data/lang data/lang_chain 234 | silphonelist=$(cat data/lang_chain/phones/silence.csl) || exit 1; 235 | nonsilphonelist=$(cat data/lang_chain/phones/nonsilence.csl) || exit 1; 236 | # Use our special topology... note that later on may have to tune this 237 | # topology. 238 | steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >data/lang_chain/topo 239 | fi 240 | fi 241 | 242 | if [ $stage -le 9 ]; then 243 | echo 244 | echo 'Get the alignments as lattices (gives the chain training more freedom).' 245 | echo 246 | 247 | steps/align_fmllr_lats.sh --nj $nJobs --cmd "$train_cmd" ${lores_train_data_dir} \ 248 | data/lang $gmm_dir $lat_dir 249 | rm $lat_dir/fsts.*.gz # save space 250 | fi 251 | 252 | if [ $stage -le 10 ]; then 253 | echo 254 | echo 'Build a tree using our new topology. We know we have alignments for the' 255 | echo 'speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use' 256 | echo 'those.' 257 | echo 258 | 259 | if [ -f $tree_dir/final.mdl ]; then 260 | echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." 261 | exit 1; 262 | fi 263 | steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ 264 | --context-opts "--context-width=2 --central-position=1" \ 265 | --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir 266 | 267 | fi 268 | 269 | # 270 | # smaller model for embedded use 271 | # 272 | 273 | if [ $stage -le 11 ]; then 274 | 275 | mkdir -p $dir 276 | 277 | echo 278 | echo "$0: creating neural net configs using the xconfig parser"; 279 | echo 280 | 281 | num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') 282 | learning_rate_factor=$(echo "print(0.5/$xent_regularize)" | python) 283 | 284 | mkdir -p $dir/configs 285 | cat < $dir/configs/network.xconfig 286 | input dim=100 name=ivector 287 | input dim=40 name=input 288 | 289 | # please note that it is important to have input layer with the name=input 290 | # as the layer immediately preceding the fixed-affine-layer to enable 291 | # the use of short notation for the descriptor 292 | fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat 293 | 294 | # the first splicing is moved before the lda layer, so no splicing here 295 | relu-batchnorm-layer name=tdnn1 dim=250 self-repair-scale=1.0e-04 296 | relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=250 297 | relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1,2) dim=250 298 | relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=250 299 | relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=250 300 | relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=250 301 | 302 | ## adding the layers for chain branch 303 | relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=250 target-rms=0.5 304 | output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 305 | 306 | # adding the layers for xent branch 307 | # This block prints the configs for a separate output that will be 308 | # trained with a cross-entropy objective in the 'chain' models... this 309 | # has the effect of regularizing the hidden parts of the model. we use 310 | # 0.5 / args.xent_regularize as the learning rate factor- the factor of 311 | # 0.5 / args.xent_regularize is suitable as it means the xent 312 | # final-layer learns at a rate independent of the regularization 313 | # constant; and the 0.5 was tuned so as to make the relative progress 314 | # similar in the xent and regular final layers. 315 | relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=250 target-rms=0.5 316 | output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 317 | 318 | EOF 319 | steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ 320 | 321 | echo 322 | echo train.py 323 | echo 324 | 325 | steps/nnet3/chain/train.py --stage $train_stage \ 326 | --cmd "$decode_cmd" \ 327 | --feat.online-ivector-dir $train_ivector_dir \ 328 | --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ 329 | --chain.xent-regularize 0.1 \ 330 | --chain.leaky-hmm-coefficient 0.1 \ 331 | --chain.l2-regularize 0.00005 \ 332 | --chain.apply-deriv-weights false \ 333 | --chain.lm-opts="--num-extra-lm-states=2000" \ 334 | --egs.dir "$common_egs_dir" \ 335 | --egs.opts "--frames-overlap-per-eg 0" \ 336 | --egs.chunk-width 150 \ 337 | --trainer.num-chunk-per-minibatch 512 \ 338 | --trainer.frames-per-iter 1500000 \ 339 | --trainer.num-epochs 4 \ 340 | --trainer.optimization.proportional-shrink 20 \ 341 | --trainer.optimization.num-jobs-initial 1 \ 342 | --trainer.optimization.num-jobs-final 1 \ 343 | --trainer.optimization.initial-effective-lrate 0.001 \ 344 | --trainer.optimization.final-effective-lrate 0.0001 \ 345 | --trainer.max-param-change 2.0 \ 346 | --use-gpu wait \ 347 | --cleanup.remove-egs true \ 348 | --feat-dir $train_data_dir \ 349 | --tree-dir $tree_dir \ 350 | --lat-dir $lat_dir \ 351 | --dir $dir 352 | 353 | echo 354 | echo mkgraph 355 | echo 356 | 357 | utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph 358 | 359 | if [[ -f utils/mkgraph_lookahead.sh ]]; then 360 | utils/mkgraph_lookahead.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph_lookahead 361 | fi 362 | fi 363 | 364 | if [ $stage -le 12 ]; then 365 | echo 366 | echo decode 367 | echo 368 | 369 | steps/nnet3/decode.sh --num-threads 1 --nj $nDecodeJobs --cmd "$decode_cmd" \ 370 | --acwt 1.0 --post-decode-acwt 10.0 \ 371 | --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ 372 | --scoring-opts "--min-lmwt 5 " \ 373 | $dir/graph data/test_hires $dir/decode_test || exit 1; 374 | 375 | grep WER $dir/decode_test/scoring_kaldi/best_wer >>RESULTS.txt 376 | fi 377 | 378 | # 379 | # larger tdnn_f model for higher end machines 380 | # 381 | # network config based on 382 | # egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh 383 | # 384 | 385 | dir=exp/nnet3${nnet3_affix}/tdnn_f 386 | 387 | num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') 388 | learning_rate_factor=$(echo "print(0.5/$xent_regularize)" | python3) 389 | affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true" 390 | tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" 391 | linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" 392 | prefinal_opts="l2-regularize=0.01" 393 | output_opts="l2-regularize=0.002" 394 | 395 | if [ $stage -le 13 ]; then 396 | 397 | mkdir -p $dir 398 | 399 | echo 400 | echo "$0: creating neural net configs using the xconfig parser"; 401 | echo 402 | 403 | mkdir -p $dir/configs 404 | cat < $dir/configs/network.xconfig 405 | input dim=100 name=ivector 406 | input dim=40 name=input 407 | # please note that it is important to have input layer with the name=input 408 | # as the layer immediately preceding the fixed-affine-layer to enable 409 | # the use of short notation for the descriptor 410 | fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat 411 | # the first splicing is moved before the lda layer, so no splicing here 412 | relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536 413 | tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 414 | tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 415 | tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1 416 | tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0 417 | tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 418 | tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 419 | tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 420 | tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 421 | tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 422 | tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 423 | tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 424 | tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 425 | tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 426 | tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3 427 | linear-component name=prefinal-l dim=256 $linear_opts 428 | prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256 429 | output-layer name=output include-log-softmax=false dim=$num_targets $output_opts 430 | prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256 431 | output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts 432 | EOF 433 | steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ 434 | 435 | fi 436 | 437 | if [ $stage -le 14 ]; then 438 | 439 | echo 440 | echo train.py 441 | echo 442 | 443 | steps/nnet3/chain/train.py --stage $train_stage \ 444 | --cmd "$decode_cmd" \ 445 | --feat.online-ivector-dir $train_ivector_dir \ 446 | --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ 447 | --chain.xent-regularize $xent_regularize \ 448 | --chain.leaky-hmm-coefficient 0.1 \ 449 | --chain.l2-regularize 0.0 \ 450 | --chain.apply-deriv-weights false \ 451 | --chain.lm-opts="--num-extra-lm-states=2000" \ 452 | --trainer.dropout-schedule $dropout_schedule \ 453 | --trainer.add-option="--optimization.memory-compression-level=2" \ 454 | --egs.dir "$common_egs_dir" \ 455 | --egs.stage $get_egs_stage \ 456 | --egs.opts "--frames-overlap-per-eg 0 --constrained false" \ 457 | --egs.chunk-width $frames_per_eg \ 458 | --trainer.num-chunk-per-minibatch 288 \ 459 | --trainer.frames-per-iter 1500000 \ 460 | --trainer.num-epochs 6 \ 461 | --trainer.optimization.num-jobs-initial 1 \ 462 | --trainer.optimization.num-jobs-final 1 \ 463 | --trainer.optimization.initial-effective-lrate 0.00025 \ 464 | --trainer.optimization.final-effective-lrate 0.000025 \ 465 | --trainer.max-param-change 2.0 \ 466 | --use-gpu wait \ 467 | --cleanup.remove-egs true \ 468 | --feat-dir $train_data_dir \ 469 | --tree-dir $tree_dir \ 470 | --lat-dir $lat_dir \ 471 | --dir $dir || exit 1; 472 | 473 | echo 474 | echo mkgraph 475 | echo 476 | 477 | utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph 478 | 479 | if [[ -f utils/mkgraph_lookahead.sh ]]; then 480 | utils/mkgraph_lookahead.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph_lookahead 481 | fi 482 | 483 | echo 484 | echo decode 485 | echo 486 | 487 | steps/nnet3/decode.sh --num-threads 1 --nj $nDecodeJobs --cmd "$decode_cmd" \ 488 | --acwt 1.0 --post-decode-acwt 10.0 \ 489 | --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_test_hires \ 490 | --scoring-opts "--min-lmwt 5 " \ 491 | $dir/graph data/test_hires $dir/decode_test || exit 1; 492 | 493 | grep WER $dir/decode_test/scoring_kaldi/best_wer >>RESULTS.txt 494 | 495 | fi 496 | --------------------------------------------------------------------------------