├── log.log ├── proto ├── PASE.proto ├── channelAvg.proto ├── adam.proto ├── logMelFb.proto ├── sgd.proto ├── GRU_cudnn.proto ├── LSTM_cudnn.proto ├── RNN_cudnn.cfg ├── QLSTM.proto ├── RNN_cudnn.proto ├── rmsprop.proto ├── MLP.proto ├── decoding.proto ├── GRU.proto ├── RNN.proto ├── LSTM.proto ├── liGRU.proto ├── model.proto ├── CNN.proto ├── minimalGRU.proto ├── SincNet.proto ├── SRU.proto ├── global_chunk.proto ├── global_chunk_refac01.proto ├── global.proto └── global_refac01.proto ├── kaldi_decoding_scripts ├── conf │ ├── mfcc.conf │ ├── fbank.conf │ ├── test_spk.list │ ├── dev_spk.list │ └── phones.60-48-39.map ├── utils │ ├── filt.py │ ├── fix_ctm.sh │ ├── spk2utt_to_utt2spk.pl │ ├── s2eps.pl │ ├── eps2disambig.pl │ ├── build_const_arpa_lm.sh │ ├── summarize_warnings.pl │ ├── utt2spk_to_spk2utt.pl │ ├── shuffle_list.pl │ ├── analyze_segments.pl │ ├── show_lattice.sh │ ├── best_wer.sh │ ├── remove_oovs.pl │ ├── add_disambig.pl │ ├── remove_data_links.sh │ ├── nnet │ │ ├── gen_hamm_mat.py │ │ ├── gen_splice.py │ │ ├── gen_dct_mat.py │ │ ├── make_lstm_proto.py │ │ └── make_blstm_proto.py │ ├── ln.pl │ ├── make_unigram_grammar.pl │ ├── int2sym.pl │ ├── reduce_data_dir_by_reclist.sh │ ├── reduce_data_dir.sh │ ├── scoring │ │ └── wer_report.pl │ ├── create_split_dir.pl │ ├── find_arpa_oovs.pl │ ├── prepare_online_nnet_dist_build.sh │ ├── convert_slf_parallel.sh │ ├── combine_data.sh │ ├── apply_map.pl │ ├── format_lm.sh │ ├── filter_scp.pl │ ├── gen_topo.pl │ ├── subset_scp.pl │ ├── convert_ctm.pl │ ├── rnnlm_compute_scores.sh │ ├── perturb_data_dir_speed.sh │ ├── summarize_logs.pl │ ├── sym2int.pl │ ├── copy_data_dir.sh │ ├── parse_options.sh │ ├── reverse_lm.sh │ ├── subset_data_dir_tr_cv.sh │ ├── map_arpa_lm.pl │ ├── pinyin_map.pl │ ├── filter_scps.pl │ ├── create_data_link.pl │ ├── reverse_lm_test.sh │ └── format_lm_sri.sh ├── path.sh ├── local │ ├── nnet │ │ ├── run_autoencoder.sh │ │ └── run_dnn.sh │ ├── score_basic.sh │ ├── timit_format_data.sh │ ├── score_sclite.sh │ ├── score.sh │ ├── score_phrich.sh │ ├── timit_prepare_dict.sh │ ├── timit_norm_trans.pl │ └── score_wsj.sh ├── cmd.sh ├── decode_dnn.sh └── parse_options.sh ├── requirements.txt ├── env.sh ├── pytorch-kaldi_logo.png ├── .gitignore ├── check_res_dec.sh ├── RESULTS ├── plot_acc_and_loss.py ├── best_wer.sh ├── tune_hyperparameters.py ├── cfg ├── TIMIT_baselines │ ├── TIMIT_MLP_mfcc_basic.cfg │ ├── TIMIT_MLP_mfcc_basic_flex.cfg │ └── TIMIT_MLP_fbank_autoencoder.cfg └── Librispeech_baselines │ └── libri_MLP_fmllr.cfg └── save_raw_fea.py /log.log: -------------------------------------------------------------------------------- 1 | prov 2 | dopo 3 | prima 4 | -------------------------------------------------------------------------------- /proto/PASE.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | pase_cfg=str 3 | pase_model=path 4 | 5 | -------------------------------------------------------------------------------- /proto/channelAvg.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | chAvg_channelWeights=str 3 | 4 | 5 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/conf/mfcc.conf: -------------------------------------------------------------------------------- 1 | --use-energy=false # only non-default option. 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib>=2.1.0 2 | scipy>=1.0.0 3 | numpy>=1.14.2 4 | blockdiag>=1.0 5 | -------------------------------------------------------------------------------- /env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export PYTORCH_KALDI_DIR=`pwd` 4 | export PYTORCH_EXP=`pwd`/exp -------------------------------------------------------------------------------- /pytorch-kaldi_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mravanelli/pytorch-kaldi/HEAD/pytorch-kaldi_logo.png -------------------------------------------------------------------------------- /kaldi_decoding_scripts/conf/fbank.conf: -------------------------------------------------------------------------------- 1 | --htk-compat=true 2 | --window-type=hamming 3 | --num-mel-bins=23 4 | 5 | -------------------------------------------------------------------------------- /proto/adam.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | opt_betas=float_list(0,inf) 3 | opt_eps=float 4 | opt_weight_decay=float(0,inf) 5 | opt_amsgrad=bool 6 | 7 | -------------------------------------------------------------------------------- /proto/logMelFb.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | logmelfb_nr_filt=int 3 | logmelfb_stft_window_size=int 4 | logmelfb_stft_window_shift=int 5 | 6 | 7 | -------------------------------------------------------------------------------- /proto/sgd.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | opt_momentum=float(0,inf) 3 | opt_weight_decay=float(0,inf) 4 | opt_dampening=float(0,inf) 5 | opt_nesterov=bool -------------------------------------------------------------------------------- /proto/GRU_cudnn.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | hidden_size=int 3 | num_layers=int 4 | bias=bool 5 | batch_first=bool 6 | dropout=float(0,1) 7 | bidirectional=bool -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore temporary python files / dirs 2 | *.pyc 3 | __pycache__ 4 | 5 | # Ignore user-generated exp directories 6 | exp 7 | .DS_Store 8 | -------------------------------------------------------------------------------- /proto/LSTM_cudnn.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | hidden_size=int 3 | num_layers=int 4 | bias=bool 5 | batch_first=bool 6 | dropout=float(0,1) 7 | bidirectional=bool -------------------------------------------------------------------------------- /proto/RNN_cudnn.cfg: -------------------------------------------------------------------------------- 1 | [proto] 2 | hidden_size=int 3 | num_layers=int 4 | nonlinearity=str 5 | bias=bool 6 | batch_first=bool 7 | dropout=float(0,1) 8 | bidirectional=bool -------------------------------------------------------------------------------- /proto/QLSTM.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | lstm_lay=str_list 3 | lstm_drop=float_list(0.0,1.0) 4 | lstm_bidir=bool 5 | lstm_act=str_list 6 | quaternion_init=str 7 | autograd=bool 8 | -------------------------------------------------------------------------------- /proto/RNN_cudnn.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | hidden_size=int 3 | num_layers=int 4 | nonlinearity=str 5 | bias=bool 6 | batch_first=bool 7 | dropout=float(0,1) 8 | bidirectional=bool -------------------------------------------------------------------------------- /proto/rmsprop.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | opt_momentum=float(0,inf) 3 | opt_alpha=float(0,inf) 4 | opt_eps=float 5 | opt_centered=bool 6 | opt_weight_decay=float(0,inf) 7 | 8 | 9 | -------------------------------------------------------------------------------- /proto/MLP.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | dnn_lay=str_list 3 | dnn_drop=str_list 4 | dnn_use_laynorm_inp=bool 5 | dnn_use_batchnorm_inp=bool 6 | dnn_use_batchnorm=bool_list 7 | dnn_use_laynorm=bool_list 8 | dnn_act=str_list 9 | -------------------------------------------------------------------------------- /proto/decoding.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | min_active=int(0,inf) 3 | max_active=int(0,inf) 4 | max_mem=int(0,inf) 5 | beam=float(0,inf) 6 | latbeam=float(0,inf) 7 | acwt=float(0,inf) 8 | max_arcs=int(-inf,inf) 9 | scoring_opts=str 10 | norm_vars=bool 11 | skip_scoring=bool 12 | -------------------------------------------------------------------------------- /proto/GRU.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | gru_lay=str_list 3 | gru_drop=str_list 4 | gru_use_laynorm_inp=bool 5 | gru_use_batchnorm_inp=bool 6 | gru_use_laynorm=bool_list 7 | gru_use_batchnorm=bool_list 8 | gru_bidir=bool 9 | gru_act=str_list 10 | gru_orthinit=bool 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /proto/RNN.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | rnn_lay=str_list 3 | rnn_drop=str_list 4 | rnn_use_laynorm_inp=bool 5 | rnn_use_batchnorm_inp=bool 6 | rnn_use_laynorm=bool_list 7 | rnn_use_batchnorm=bool_list 8 | rnn_bidir=bool 9 | rnn_act=str_list 10 | rnn_orthinit=bool 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /proto/LSTM.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | lstm_lay=str_list 3 | lstm_drop=str_list 4 | lstm_use_laynorm_inp=bool 5 | lstm_use_batchnorm_inp=bool 6 | lstm_use_laynorm=bool_list 7 | lstm_use_batchnorm=bool_list 8 | lstm_bidir=bool 9 | lstm_act=str_list 10 | lstm_orthinit=bool 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /proto/liGRU.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | ligru_lay=str_list 3 | ligru_drop=str_list 4 | ligru_use_laynorm_inp=bool 5 | ligru_use_batchnorm_inp=bool 6 | ligru_use_laynorm=bool_list 7 | ligru_use_batchnorm=bool_list 8 | ligru_bidir=bool 9 | ligru_act=str_list 10 | ligru_orthinit=bool 11 | 12 | 13 | -------------------------------------------------------------------------------- /proto/model.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | compute(architecture,input) 3 | concatenate(input,input) 4 | cost_nll(input,label) 5 | cost_err(input,label) 6 | mult(input,input) 7 | mult_constant(input,float) 8 | sum(input,input) 9 | sum_constant(input,float) 10 | avg(input,input) 11 | mse(input,input) 12 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/conf/test_spk.list: -------------------------------------------------------------------------------- 1 | mdab0 2 | mwbt0 3 | felc0 4 | mtas1 5 | mwew0 6 | fpas0 7 | mjmp0 8 | mlnt0 9 | fpkt0 10 | mlll0 11 | mtls0 12 | fjlm0 13 | mbpm0 14 | mklt0 15 | fnlp0 16 | mcmj0 17 | mjdh0 18 | fmgd0 19 | mgrt0 20 | mnjm0 21 | fdhc0 22 | mjln0 23 | mpam0 24 | fmld0 25 | -------------------------------------------------------------------------------- /proto/CNN.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | cnn_N_filt=int_list(1,inf) 3 | cnn_len_filt=int_list(1,inf) 4 | cnn_max_pool_len=int_list(1,inf) 5 | cnn_use_laynorm_inp=bool 6 | cnn_use_batchnorm_inp=bool 7 | cnn_use_laynorm=bool_list 8 | cnn_use_batchnorm=bool_list 9 | cnn_act=list_str 10 | cnn_drop=str_list 11 | -------------------------------------------------------------------------------- /check_res_dec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for x in $1; do [ -d $x ] && echo $x | grep "${1:-.*}" >/dev/null && grep WER $x/wer_* 2>/dev/null | ./best_wer.sh; done 3 | for x in $1; do [ -d $x ] && echo $x | grep "${1:-.*}" >/dev/null && grep Sum $x/*score_*/*.sys 2>/dev/null | ./best_wer.sh; done 4 | exit 0 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /proto/minimalGRU.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | minimalgru_lay=str_list 3 | minimalgru_drop=float_list(0.0,1.0) 4 | minimalgru_use_laynorm_inp=bool 5 | minimalgru_use_batchnorm_inp=bool 6 | minimalgru_use_laynorm=bool_list 7 | minimalgru_use_batchnorm=bool_list 8 | minimalgru_bidir=bool 9 | minimalgru_act=str_list 10 | minimalgru_orthinit=bool 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /RESULTS: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for x in $(find $1 -type d -name "decode_*"); do [ -d $x ] && echo $x | grep "${1:-.*}" >/dev/null && grep WER $x/wer_* 2>/dev/null | ./best_wer.sh; done 3 | for x in $(find $1 -type d -name "decode_*"); do [ -d $x ] && echo $x | grep "${1:-.*}" >/dev/null && grep Sum $x/*score_*/*.sys 2>/dev/null | ./best_wer.sh; done 4 | exit 0 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /proto/SincNet.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | sinc_N_filt=int_list(1,inf) 3 | sinc_len_filt=int_list(1,inf) 4 | sinc_max_pool_len=int_list(1,inf) 5 | sinc_sample_rate=int 6 | sinc_min_low_hz=int 7 | sinc_min_band_hz=int 8 | sinc_use_laynorm_inp=bool 9 | sinc_use_batchnorm_inp=bool 10 | sinc_use_laynorm=bool_list 11 | sinc_use_batchnorm=bool_list 12 | sinc_act=list_str 13 | sinc_drop=str_list 14 | -------------------------------------------------------------------------------- /proto/SRU.proto: -------------------------------------------------------------------------------- 1 | [proto] 2 | sru_hidden_size=int 3 | sru_num_layers=int 4 | sru_dropout=float(0,1) 5 | sru_rnn_dropout=float(0,1) 6 | sru_use_tanh=bool 7 | sru_use_relu=bool 8 | sru_use_selu=bool 9 | sru_weight_norm=bool 10 | sru_layer_norm=bool 11 | sru_bidirectional=bool 12 | sru_is_input_normalized=bool 13 | sru_has_skip_term=bool 14 | sru_rescale=bool 15 | sru_highway_bias=float(-inf,0) 16 | sru_n_proj=int 17 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/filt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Apache 2.0 4 | from __future__ import print_function 5 | 6 | import sys 7 | 8 | vocab = set() 9 | with open(sys.argv[1]) as vocabfile: 10 | for line in vocabfile: 11 | vocab.add(line.strip()) 12 | 13 | with open(sys.argv[2]) as textfile: 14 | for line in textfile: 15 | print(" ".join(map(lambda word: word if word in vocab else "", line.strip().split()))) 16 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/path.sh: -------------------------------------------------------------------------------- 1 | #export KALDI_ROOT=~/kaldi-trunk/ 2 | [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh 3 | export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH 4 | export LC_ALL=C 5 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/conf/dev_spk.list: -------------------------------------------------------------------------------- 1 | faks0 2 | fdac1 3 | fjem0 4 | mgwt0 5 | mjar0 6 | mmdb1 7 | mmdm2 8 | mpdf0 9 | fcmh0 10 | fkms0 11 | mbdg0 12 | mbwm0 13 | mcsh0 14 | fadg0 15 | fdms0 16 | fedw0 17 | mgjf0 18 | mglb0 19 | mrtk0 20 | mtaa0 21 | mtdt0 22 | mthc0 23 | mwjg0 24 | fnmr0 25 | frew0 26 | fsem0 27 | mbns0 28 | mmjr0 29 | mdls0 30 | mdlf0 31 | mdvc0 32 | mers0 33 | fmah0 34 | fdrw0 35 | mrcs0 36 | mrjm4 37 | fcal1 38 | mmwh0 39 | fjsj0 40 | majc0 41 | mjsw0 42 | mreb0 43 | fgjd0 44 | fjmg0 45 | mroa0 46 | mteb0 47 | mjfc0 48 | mrjr0 49 | fmml0 50 | mrws1 51 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/conf/phones.60-48-39.map: -------------------------------------------------------------------------------- 1 | aa aa aa 2 | ae ae ae 3 | ah ah ah 4 | ao ao aa 5 | aw aw aw 6 | ax ax ah 7 | ax-h ax ah 8 | axr er er 9 | ay ay ay 10 | b b b 11 | bcl vcl sil 12 | ch ch ch 13 | d d d 14 | dcl vcl sil 15 | dh dh dh 16 | dx dx dx 17 | eh eh eh 18 | el el l 19 | em m m 20 | en en n 21 | eng ng ng 22 | epi epi sil 23 | er er er 24 | ey ey ey 25 | f f f 26 | g g g 27 | gcl vcl sil 28 | h# sil sil 29 | hh hh hh 30 | hv hh hh 31 | ih ih ih 32 | ix ix ih 33 | iy iy iy 34 | jh jh jh 35 | k k k 36 | kcl cl sil 37 | l l l 38 | m m m 39 | n n n 40 | ng ng ng 41 | nx n n 42 | ow ow ow 43 | oy oy oy 44 | p p p 45 | pau sil sil 46 | pcl cl sil 47 | q 48 | r r r 49 | s s s 50 | sh sh sh 51 | t t t 52 | tcl cl sil 53 | th th th 54 | uh uh uh 55 | uw uw uw 56 | ux uw uw 57 | v v v 58 | w w w 59 | y y y 60 | z z z 61 | zh zh sh 62 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/local/nnet/run_autoencoder.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . path.sh 4 | . cmd.sh 5 | 6 | # Train, 7 | dir=exp/autoencoder 8 | data_fmllr=data-fmllr-tri3 9 | labels="ark:feat-to-post scp:$data_fmllr/train/feats.scp ark:- |" 10 | $cuda_cmd $dir/log/train_nnet.log \ 11 | steps/nnet/train.sh --hid-layers 2 --hid-dim 200 --learn-rate 0.00001 \ 12 | --labels "$labels" --num-tgt 40 --train-tool "nnet-train-frmshuff --objective-function=mse" \ 13 | --proto-opts "--no-softmax --activation-type= --hid-bias-mean=0.0 --hid-bias-range=1.0 --param-stddev-factor=0.01" \ 14 | $data_fmllr/train_tr90 $data_fmllr/train_cv10 dummy-dir dummy-dir dummy-dir $dir || exit 1; 15 | 16 | # Forward the data, 17 | output_dir=data-autoencoded/test 18 | steps/nnet/make_bn_feats.sh --nj 1 --cmd "$train_cmd" --remove-last-components 0 \ 19 | $output_dir $data_fmllr/test $dir $output_dir/{log,data} || exit 1 20 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/fix_ctm.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | stmfile=$1 4 | ctmfile=$2 5 | 6 | segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u` 7 | segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u` 8 | 9 | segments_stm_count=`echo "$segments_stm" | wc -l ` 10 | segments_ctm_count=`echo "$segments_ctm" | wc -l ` 11 | 12 | #echo $segments_stm_count 13 | #echo $segments_ctm_count 14 | 15 | if [ "$segments_stm_count" -gt "$segments_ctm_count" ] ; then 16 | pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g") 17 | ( 18 | for elem in $pp ; do 19 | echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE" 20 | done 21 | ) >> $ctmfile 22 | echo "FIXED CTM FILE" 23 | exit 0 24 | elif [ "$segments_stm_count" -lt "$segments_ctm_count" ] ; then 25 | echo "Segment STM count: $segments_stm_count" 26 | echo "Segment CTM count: $segments_ctm_count" 27 | echo "FAILURE FIXING CTM FILE" 28 | exit 1 29 | else 30 | exit 0 31 | fi 32 | 33 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/spk2utt_to_utt2spk.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | while(<>){ 19 | @A = split(" ", $_); 20 | @A > 1 || die "Invalid line in spk2utt file: $_"; 21 | $s = shift @A; 22 | foreach $u ( @A ) { 23 | print "$u $s\n"; 24 | } 25 | } 26 | 27 | 28 | -------------------------------------------------------------------------------- /proto/global_chunk.proto: -------------------------------------------------------------------------------- 1 | [cfg_proto] 2 | cfg_proto=path 3 | cfg_proto_chunk=path 4 | 5 | [exp] 6 | cmd=str 7 | run_nn_script=str 8 | to_do={train,valid,forward} 9 | seed=int(-inf,inf) 10 | use_cuda=bool 11 | multi_gpu=bool 12 | save_gpumem=bool 13 | out_info=str 14 | N_epochs_tr=int(1,inf) 15 | 16 | 17 | [data_chunk] 18 | fea=str 19 | lab=str 20 | 21 | 22 | [batches] 23 | batch_size_train=int(0,inf) 24 | max_seq_length_train=int(20,inf) 25 | batch_size_valid=int(1,inf) 26 | max_seq_length_valid=int(20,inf) 27 | 28 | 29 | [architecture] 30 | arch_name=str 31 | arch_proto=path 32 | arch_library=str 33 | arch_class=str 34 | arch_pretrain_file=str 35 | arch_freeze=bool 36 | arch_seq_model=bool 37 | arch_lr=float(0,inf) 38 | arch_halving_factor=float(0,inf) 39 | arch_improvement_threshold=float(0,inf) 40 | arch_opt={sgd,rmsprop,adam} 41 | 42 | [model] 43 | model_proto=path 44 | model=str 45 | 46 | 47 | [forward] 48 | forward_out=str 49 | normalize_posteriors=bool_list 50 | normalize_with_counts_from=str 51 | save_out_file=bool_list 52 | require_decoding=bool_list 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /proto/global_chunk_refac01.proto: -------------------------------------------------------------------------------- 1 | [cfg_proto] 2 | cfg_proto=path 3 | cfg_proto_chunk=path 4 | 5 | [exp] 6 | cmd=str 7 | run_nn_script=str 8 | to_do={train,valid,forward} 9 | seed=int(-inf,inf) 10 | use_cuda=bool 11 | multi_gpu=bool 12 | save_gpumem=bool 13 | out_info=str 14 | N_epochs_tr=int(1,inf) 15 | 16 | 17 | [data_chunk] 18 | fea=str 19 | lab=str 20 | 21 | 22 | [batches] 23 | batch_size_train=int(0,inf) 24 | max_seq_length_train=list_str 25 | batch_size_valid=int(1,inf) 26 | max_seq_length_valid=list_str 27 | 28 | 29 | [architecture] 30 | arch_name=str 31 | arch_proto=path 32 | arch_library=str 33 | arch_class=str 34 | arch_pretrain_file=str 35 | arch_freeze=bool 36 | arch_seq_model=bool 37 | arch_lr=float(0,inf) 38 | arch_halving_factor=float(0,inf) 39 | arch_improvement_threshold=float(0,inf) 40 | arch_opt={sgd,rmsprop,adam,none} 41 | 42 | [model] 43 | model_proto=path 44 | model=str 45 | 46 | 47 | [forward] 48 | forward_out=str 49 | normalize_posteriors=bool_list 50 | normalize_with_counts_from=str 51 | save_out_file=bool_list 52 | require_decoding=bool_list 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/s2eps.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # This script replaces and with (on both input and output sides), 18 | # for the G.fst acceptor. 19 | 20 | while(<>){ 21 | @A = split(" ", $_); 22 | if ( @A >= 4 ) { 23 | if ($A[2] eq "" || $A[2] eq "") { $A[2] = ""; } 24 | if ($A[3] eq "" || $A[3] eq "") { $A[3] = ""; } 25 | } 26 | print join("\t", @A) . "\n"; 27 | } 28 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/eps2disambig.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | # 2015 Guoguo Chen 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # This script replaces epsilon with #0 on the input side only, of the G.fst 19 | # acceptor. 20 | 21 | while(<>){ 22 | if (/\s+#0\s+/) { 23 | print STDERR "$0: ERROR: LM has word #0, " . 24 | "which is reserved as disambiguation symbol\n"; 25 | exit 1; 26 | } 27 | s:^(\d+\s+\d+\s+)\(\s+):$1#0$2:; 28 | print; 29 | } 30 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/build_const_arpa_lm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Guoguo Chen 4 | # Apache 2.0 5 | 6 | # This script reads in an Arpa format language model, and converts it into the 7 | # ConstArpaLm format language model. 8 | 9 | # begin configuration section 10 | # end configuration section 11 | 12 | [ -f path.sh ] && . ./path.sh; 13 | 14 | . utils/parse_options.sh 15 | 16 | if [ $# != 3 ]; then 17 | echo "Usage: " 18 | echo " $0 [options] " 19 | echo "e.g.:" 20 | echo " $0 data/local/lm/3-gram.full.arpa.gz data/lang/ data/lang_test_tgmed" 21 | echo "Options" 22 | exit 1; 23 | fi 24 | 25 | export LC_ALL=C 26 | 27 | arpa_lm=$1 28 | old_lang=$2 29 | new_lang=$3 30 | 31 | mkdir -p $new_lang 32 | 33 | mkdir -p $new_lang 34 | cp -r $old_lang/* $new_lang 35 | 36 | 37 | unk=`cat $new_lang/oov.int` 38 | bos=`grep "" $new_lang/words.txt | awk '{print $2}'` 39 | eos=`grep "" $new_lang/words.txt | awk '{print $2}'` 40 | if [[ -z $bos || -z $eos ]]; then 41 | echo "$0: and symbols are not in $new_lang/words.txt" 42 | exit 1 43 | fi 44 | 45 | 46 | arpa-to-const-arpa --bos-symbol=$bos \ 47 | --eos-symbol=$eos --unk-symbol=$unk \ 48 | "gunzip -c $arpa_lm | utils/map_arpa_lm.pl $new_lang/words.txt|" $new_lang/G.carpa || exit 1; 49 | 50 | exit 0; 51 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/summarize_warnings.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 4 | 5 | @ARGV != 1 && print STDERR "Usage: summarize_warnings.pl \n" && exit 1; 6 | 7 | $dir = $ARGV[0]; 8 | 9 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" && exit 1; 10 | 11 | $dir =~ s:/$::; # Remove trailing slash. 12 | 13 | 14 | # Group the files into categories where all have the same base-name. 15 | foreach $f (glob ("$dir/*.log")) { 16 | $f_category = $f; 17 | # do next expression twice; s///g doesn't work as they overlap. 18 | $f_category =~ s:\.\d+\.:.*.:; 19 | $f_category =~ s:\.\d+\.:.*.:; 20 | $fmap{$f_category} .= " $f"; 21 | } 22 | 23 | sub split_hundreds { # split list of filenames into groups of 100. 24 | my $names = shift @_; 25 | my @A = split(" ", $names); 26 | my @ans = (); 27 | while (@A > 0) { 28 | my $group = ""; 29 | for ($x = 0; $x < 100 && @A>0; $x++) { 30 | $fname = pop @A; 31 | $group .= "$fname "; 32 | } 33 | push @ans, $group; 34 | } 35 | return @ans; 36 | } 37 | 38 | foreach $c (keys %fmap) { 39 | $n = 0; 40 | foreach $fgroup (split_hundreds($fmap{$c})) { 41 | $n += `grep -w WARNING $fgroup | wc -l`; 42 | } 43 | if ($n != 0) { 44 | print "$n warnings in $c\n" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /plot_acc_and_loss.py: -------------------------------------------------------------------------------- 1 | ########################################################## 2 | # pytorch-kaldi v.0.1 3 | # Mirco Ravanelli, Titouan Parcollet 4 | # Mila, University of Montreal 5 | # October 2018 6 | ########################################################## 7 | 8 | import sys 9 | import configparser 10 | import os 11 | from utils import create_curves 12 | 13 | # Checking arguments 14 | if len(sys.argv) != 2: 15 | print("ERROR: Please provide only the path of the cfg_file as : python plot_acc_and_loss.py cfg/TIMIT_MLP_mfcc.cfg") 16 | 17 | # Checking if the cfg_file exists and loading it 18 | cfg_file = sys.argv[1] 19 | if not (os.path.exists(cfg_file)): 20 | sys.stderr.write("ERROR: The config file %s does not exist !\n" % (cfg_file)) 21 | sys.exit(0) 22 | else: 23 | config = configparser.ConfigParser() 24 | config.read(cfg_file) 25 | 26 | # Getting the parameters 27 | valid_data_lst = config["data_use"]["valid_with"].split(",") 28 | out_folder = config["exp"]["out_folder"] 29 | N_ep = int(config["exp"]["N_epochs_tr"]) 30 | 31 | # Handling call without running run_exp.py before 32 | if not (os.path.exists(out_folder + "res.res")): 33 | sys.stderr.write("ERROR: Please run the experiment in order to get results to plot first !\n") 34 | sys.exit(0) 35 | 36 | # Creating files and curves 37 | create_curves(out_folder, N_ep, valid_data_lst) 38 | -------------------------------------------------------------------------------- /proto/global.proto: -------------------------------------------------------------------------------- 1 | [cfg_proto] 2 | cfg_proto=path 3 | cfg_proto_chunk=path 4 | 5 | [exp] 6 | cmd=str 7 | run_nn_script=str 8 | out_folder=str 9 | seed=int(-inf,inf) 10 | use_cuda=bool 11 | multi_gpu=bool 12 | save_gpumem=bool 13 | N_epochs_tr=int(1,inf) 14 | 15 | [dataset] 16 | data_name=str 17 | fea=str 18 | lab=str 19 | N_chunks=int(1,inf) 20 | 21 | [data_use] 22 | train_with=list_str 23 | valid_with=list_str 24 | forward_with=list_str 25 | 26 | 27 | [batches] 28 | batch_size_train=list_str 29 | max_seq_length_train=list_str 30 | increase_seq_length_train=Bool 31 | start_seq_len_train=int(20,inf) 32 | multply_factor_seq_len_train=int(0,inf) 33 | batch_size_valid=int(1,inf) 34 | max_seq_length_valid=int(20,inf) 35 | 36 | [architecture] 37 | arch_name=str 38 | arch_proto=path 39 | arch_library=str 40 | arch_class=str 41 | arch_pretrain_file=str 42 | arch_freeze=bool 43 | arch_seq_model=bool 44 | arch_lr=list_str 45 | arch_halving_factor=float(0,inf) 46 | arch_improvement_threshold=float(0,inf) 47 | arch_opt={sgd,rmsprop,adam} 48 | 49 | 50 | [model] 51 | model_proto=path 52 | model=str 53 | 54 | [forward] 55 | forward_out=str 56 | normalize_posteriors=bool_list 57 | normalize_with_counts_from=str 58 | save_out_file=bool_list 59 | require_decoding=bool_list 60 | 61 | [decoding] 62 | decoding_script_folder=path 63 | decoding_script=str 64 | decoding_proto=path 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /proto/global_refac01.proto: -------------------------------------------------------------------------------- 1 | [cfg_proto] 2 | cfg_proto=path 3 | cfg_proto_chunk=path 4 | 5 | [exp] 6 | cmd=str 7 | run_nn_script=str 8 | out_folder=str 9 | seed=int(-inf,inf) 10 | use_cuda=bool 11 | multi_gpu=bool 12 | save_gpumem=bool 13 | N_epochs_tr=int(1,inf) 14 | 15 | [dataset] 16 | data_name=str 17 | fea=str 18 | lab=str 19 | N_chunks=int(1,inf) 20 | 21 | [data_use] 22 | train_with=list_str 23 | valid_with=list_str 24 | forward_with=list_str 25 | 26 | 27 | [batches] 28 | batch_size_train=list_str 29 | max_seq_length_train=list_str 30 | increase_seq_length_train=Bool 31 | start_seq_len_train=list_str 32 | multply_factor_seq_len_train=int(0,inf) 33 | batch_size_valid=int(1,inf) 34 | max_seq_length_valid=list_str 35 | 36 | [architecture] 37 | arch_name=str 38 | arch_proto=path 39 | arch_library=str 40 | arch_class=str 41 | arch_pretrain_file=str 42 | arch_freeze=bool 43 | arch_seq_model=bool 44 | arch_lr=list_str 45 | arch_halving_factor=float(0,inf) 46 | arch_improvement_threshold=float(0,inf) 47 | arch_opt={sgd,rmsprop,adam,none} 48 | 49 | 50 | [model] 51 | model_proto=path 52 | model=str 53 | 54 | [forward] 55 | forward_out=str 56 | normalize_posteriors=bool_list 57 | normalize_with_counts_from=str 58 | save_out_file=bool_list 59 | require_decoding=bool_list 60 | 61 | [decoding] 62 | decoding_script_folder=path 63 | decoding_script=str 64 | decoding_proto=path 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/utt2spk_to_spk2utt.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # converts an utt2spk file to a spk2utt file. 18 | # Takes input from the stdin or from a file argument; 19 | # output goes to the standard out. 20 | 21 | if ( @ARGV > 1 ) { 22 | die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt"; 23 | } 24 | 25 | while(<>){ 26 | @A = split(" ", $_); 27 | @A == 2 || die "Invalid line in utt2spk file: $_"; 28 | ($u,$s) = @A; 29 | if(!$seen_spk{$s}) { 30 | $seen_spk{$s} = 1; 31 | push @spklist, $s; 32 | } 33 | push (@{$spk_hash{$s}}, "$u"); 34 | } 35 | foreach $s (@spklist) { 36 | $l = join(' ',@{$spk_hash{$s}}); 37 | print "$s $l\n"; 38 | } 39 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/shuffle_list.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2013 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | 19 | if ($ARGV[0] eq "--srand") { 20 | $n = $ARGV[1]; 21 | $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\""; 22 | srand($ARGV[1]); 23 | shift; 24 | shift; 25 | } else { 26 | srand(0); # Gives inconsistent behavior if we don't seed. 27 | } 28 | 29 | if (@ARGV > 1 || $ARGV[0] =~ m/^-.+/) { # >1 args, or an option we 30 | # don't understand. 31 | print "Usage: shuffle_list.pl [--srand N] [input file] > output\n"; 32 | print "randomizes the order of lines of input.\n"; 33 | exit(1); 34 | } 35 | 36 | @lines; 37 | while (<>) { 38 | push @lines, [ (rand(), $_)] ; 39 | } 40 | 41 | @lines = sort { $a->[0] cmp $b->[0] } @lines; 42 | foreach $l (@lines) { 43 | print $l->[1]; 44 | } 45 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/analyze_segments.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Copyright 2015 GoVivace Inc. (Author: Nagendra Kumar Goel) 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Analyze a segments file and print important stats on it. 18 | 19 | $dur = $total = 0; 20 | $maxDur = 0; 21 | $minDur = 9999999999; 22 | $n = 0; 23 | while(<>){ 24 | chomp; 25 | @t = split(/\s+/); 26 | $dur = $t[3] - $t[2]; 27 | $total += $dur; 28 | if ($dur > $maxDur) { 29 | $maxSegId = $t[0]; 30 | $maxDur = $dur; 31 | } 32 | if ($dur < $minDur) { 33 | $minSegId = $t[0]; 34 | $minDur = $dur; 35 | } 36 | $n++; 37 | } 38 | $avg=$total/$n; 39 | $hrs = $total/3600; 40 | print "Total $hrs hours of data\n"; 41 | print "Average segment length $avg seconds\n"; 42 | print "Segment $maxSegId has length of $maxDur seconds\n"; 43 | print "Segment $minSegId has length of $minDur seconds\n"; 44 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/show_lattice.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | format=pdf # pdf svg 4 | mode=save # display save 5 | lm_scale=0.0 6 | acoustic_scale=0.0 7 | #end of config 8 | 9 | . utils/parse_options.sh 10 | 11 | if [ $# != 3 ]; then 12 | echo "usage: $0 [--mode display|save] [--format pdf|svg] " 13 | echo "e.g.: $0 utt-0001 \"test/lat.*.gz\" tri1/graph/words.txt" 14 | exit 1; 15 | fi 16 | 17 | . path.sh 18 | 19 | uttid=$1 20 | lat=$2 21 | words=$3 22 | 23 | tmpdir=$(mktemp -d /tmp/kaldi.XXXX); # trap "rm -r $tmpdir" EXIT # cleanup 24 | 25 | gunzip -c $lat | lattice-to-fst --lm-scale=$lm_scale --acoustic-scale=$acoustic_scale ark:- "scp,p:echo $uttid $tmpdir/$uttid.fst|" || exit 1; 26 | ! [ -s $tmpdir/$uttid.fst ] && \ 27 | echo "Failed to extract lattice for utterance $uttid (not present?)" && exit 1; 28 | fstdraw --portrait=true --osymbols=$words $tmpdir/$uttid.fst | dot -T${format} > $tmpdir/$uttid.${format} 29 | 30 | if [ "$(uname)" == "Darwin" ]; then 31 | doc_open=open 32 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 33 | doc_open=xdg-open 34 | elif [ $mode == "display" ] ; then 35 | echo "Can not automaticaly open file on your operating system" 36 | mode=save 37 | fi 38 | 39 | [ $mode == "display" ] && $doc_open $tmpdir/$uttid.${format} 40 | [[ $mode == "display" && $? -ne 0 ]] && echo "Failed to open ${format} format." && mode=save 41 | [ $mode == "save" ] && echo "Saving to $uttid.${format}" && cp $tmpdir/$uttid.${format} . 42 | 43 | exit 0 44 | -------------------------------------------------------------------------------- /best_wer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2010-2011 Microsoft Corporation 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # To be run from one directory above this script. 19 | 20 | perl -e 'while(<>){ 21 | s/\|(\d)/\| $1/g; s/(\d)\|/$1 \|/g; 22 | if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool. 23 | elsif (m: (Mean|Sum/Avg|)\s*\|\s*\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|: 24 | && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } } # sclite. 25 | if (defined $bestline){ print $bestline; } ' | \ 26 | awk 'BEGIN{ FS="%WER"; } { if(NF == 2) { print FS$2" "$1; } else { print $0; }}' | \ 27 | awk 'BEGIN{ FS="Sum/Avg"; } { if(NF == 2) { print $2" "$1; } else { print $0; }}' | \ 28 | awk '{ if($1!~/%WER/) { print "%WER "$9" "$0; } else { print $0; }}' | \ 29 | sed -e 's|\s\s*| |g' -e 's|\:$||' -e 's|\:\s*\|\s*$||' 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/best_wer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2010-2011 Microsoft Corporation 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # To be run from one directory above this script. 19 | 20 | perl -e 'while(<>){ 21 | s/\|(\d)/\| $1/g; s/(\d)\|/$1 \|/g; 22 | if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool. 23 | elsif (m: (Mean|Sum/Avg|)\s*\|\s*\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|: 24 | && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } } # sclite. 25 | if (defined $bestline){ print $bestline; } ' | \ 26 | awk 'BEGIN{ FS="%WER"; } { if(NF == 2) { print FS$2" "$1; } else { print $0; }}' | \ 27 | awk 'BEGIN{ FS="Sum/Avg"; } { if(NF == 2) { print $2" "$1; } else { print $0; }}' | \ 28 | awk '{ if($1!~/%WER/) { print "%WER "$9" "$0; } else { print $0; }}' | \ 29 | sed -e 's|\s\s*| |g' -e 's|\:$||' -e 's|\:\s*\|\s*$||' 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/remove_oovs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # This script removes lines that contain these OOVs on either the 18 | # third or fourth fields of the line. It is intended to remove arcs 19 | # with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in). 20 | 21 | if ( @ARGV < 1 && @ARGV > 2) { 22 | die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n"; 23 | } 24 | 25 | $unklist = shift @ARGV; 26 | open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n"; 27 | while(){ 28 | @A = split(" ", $_); 29 | @A == 1 || die "Bad line in unknown-symbol list: $_"; 30 | $unk{$A[0]} = 1; 31 | } 32 | 33 | $num_removed = 0; 34 | while(<>){ 35 | @A = split(" ", $_); 36 | if(defined $unk{$A[2]} || defined $unk{$A[3]}) { 37 | $num_removed++; 38 | } else { 39 | print; 40 | } 41 | } 42 | print STDERR "remove_oovs.pl: removed $num_removed lines.\n"; 43 | 44 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/cmd.sh: -------------------------------------------------------------------------------- 1 | # "queue.pl" uses qsub. The options to it are 2 | # options to qsub. If you have GridEngine installed, 3 | # change this to a queue you have access to. 4 | # Otherwise, use "run.pl", which will run jobs locally 5 | # (make sure your --num-jobs options are no more than 6 | # the number of cpus on your machine. 7 | 8 | #a) JHU cluster options 9 | #export train_cmd="queue.pl -l arch=*64" 10 | #export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G" 11 | #export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G" 12 | #export cuda_cmd=run.pl 13 | 14 | 15 | #if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then 16 | # export train_cmd="queue.pl -l arch=*64*" 17 | # export decode_cmd="queue.pl -l arch=*64* --mem 3G" 18 | # export mkgraph_cmd="queue.pl -l arch=*64* --mem 4G" 19 | # export cuda_cmd="queue.pl -l gpu=1" 20 | #elif [[ $(hostname -f) == *.fit.vutbr.cz ]]; then 21 | # #b) BUT cluster options 22 | # queue="all.q@@blade,all.q@@speech,all.q@dellgpu*,all.q@supergpu*" 23 | # export train_cmd="queue.pl -q $queue -l ram_free=2500M,mem_free=2500M,matylda5=0.5" 24 | # export decode_cmd="queue.pl -q $queue -l ram_free=3000M,mem_free=3000M,matylda5=0.1" 25 | # export mkgraph_cmd="queue.pl -q $queue -l ram_free=4G,mem_free=4G,matylda5=3" 26 | # export cuda_cmd="queue.pl -q long.q@pcspeech-gpu,long.q@dellgpu1,long.q@pcgpu*,long.q@supergpu1 -l gpu=1" 27 | #else 28 | # echo "$0: you need to define options for your cluster." 29 | # exit 1; 30 | #fi 31 | 32 | #c) run locally... 33 | export train_cmd=utils/run.pl 34 | export decode_cmd=utils/run.pl 35 | export cuda_cmd=utils/run.pl 36 | export mkgraph_cmd=utils/run.pl 37 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/add_disambig.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | # Adds some specified number of disambig symbols to a symbol table. 19 | # Adds these as #1, #2, etc. 20 | # If the --include-zero option is specified, includes an extra one 21 | # #0. 22 | 23 | $include_zero = 0; 24 | if($ARGV[0] eq "--include-zero") { 25 | $include_zero = 1; 26 | shift @ARGV; 27 | } 28 | 29 | if(@ARGV != 2) { 30 | die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt "; 31 | } 32 | 33 | 34 | $input = $ARGV[0]; 35 | $nsyms = $ARGV[1]; 36 | 37 | open(F, "<$input") || die "Opening file $input"; 38 | 39 | while() { 40 | @A = split(" ", $_); 41 | @A == 2 || die "Bad line $_"; 42 | $lastsym = $A[1]; 43 | print; 44 | } 45 | 46 | if(!defined($lastsym)){ 47 | die "Empty symbol file?"; 48 | } 49 | 50 | if($include_zero) { 51 | $lastsym++; 52 | print "#0 $lastsym\n"; 53 | } 54 | 55 | for($n = 1; $n <= $nsyms; $n++) { 56 | $y = $n + $lastsym; 57 | print "#$n $y\n"; 58 | } 59 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/remove_data_links.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This program searches within a directory for soft links that 4 | # appear to be created by 'create_data_link.pl' to a 'storage/' subdirectory, 5 | # and it removes both the soft links and the things they point to. 6 | # for instance, if you have a soft link 7 | # foo/egs/1.1.egs -> storage/2/1.1.egs 8 | # it will remove both foo/egs/storage/2/1.1.egs, and foo/egs/1.1.egs. 9 | 10 | ret=0 11 | 12 | dry_run=false 13 | 14 | if [ "$1" == "--dry-run" ]; then 15 | dry_run=true 16 | shift 17 | fi 18 | 19 | if [ $# == 0 ]; then 20 | echo "Usage: $0 [--dry-run] " 21 | echo "e.g.: $0 exp/nnet4a/egs/" 22 | echo " Removes from any subdirectories of the command-line arguments, soft links that " 23 | echo " appear to have been created by utils/create_data_link.pl, as well as the things" 24 | echo " that those soft links point to. Will typically be called on a directory prior" 25 | echo " to 'rm -r' on that directory, to ensure that data that was distributed on other" 26 | echo " volumes also gets deleted." 27 | echo " With --dry-run, just prints what it would do." 28 | fi 29 | 30 | for dir in $*; do 31 | if [ ! -d $dir ]; then 32 | echo "$0: not a directory: $dir" 33 | ret=1 34 | else 35 | for subdir in $(find $dir -type d); do 36 | if [ -d $subdir/storage ]; then 37 | for x in $(ls $subdir); do 38 | f=$subdir/$x 39 | if [ -L $f ] && [[ $(readlink $f) == storage/* ]]; then 40 | target=$subdir/$(readlink $f) 41 | if $dry_run; then 42 | echo rm $f $target 43 | else 44 | rm $f $target 45 | fi 46 | fi 47 | done 48 | fi 49 | done 50 | fi 51 | done 52 | 53 | exit $ret 54 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/nnet/gen_hamm_mat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Brno University of Technology (author: Karel Vesely) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # ./gen_hamm_mat.py 19 | # script generates diagonal matrix with hamming window values 20 | from __future__ import print_function 21 | 22 | from math import * 23 | import sys 24 | 25 | 26 | from optparse import OptionParser 27 | 28 | parser = OptionParser() 29 | parser.add_option("--fea-dim", dest="dim", help="feature dimension") 30 | parser.add_option("--splice", dest="splice", help="applied splice value") 31 | (options, args) = parser.parse_args() 32 | 33 | if options.dim == None: 34 | parser.print_help() 35 | sys.exit(1) 36 | 37 | dim = int(options.dim) 38 | splice = int(options.splice) 39 | 40 | 41 | # generate the diagonal matrix with hammings 42 | M_2PI = 6.283185307179586476925286766559005 43 | 44 | dim_mat = (2 * splice + 1) * dim 45 | timeContext = 2 * splice + 1 46 | print("[") 47 | for row in range(dim_mat): 48 | for col in range(dim_mat): 49 | if col != row: 50 | print("0", end=" ") 51 | else: 52 | i = int(row / dim) 53 | print(str(0.54 - 0.46 * cos((M_2PI * i) / (timeContext - 1))), end=" ") 54 | print() 55 | 56 | print("]") 57 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/nnet/gen_splice.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Brno University of Technology (author: Karel Vesely) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # ./gen_splice.py 19 | # generates Component 20 | from __future__ import print_function 21 | 22 | from math import * 23 | import sys 24 | 25 | 26 | from optparse import OptionParser 27 | 28 | parser = OptionParser() 29 | parser.add_option("--fea-dim", dest="dim_in", help="feature dimension") 30 | parser.add_option("--splice", dest="splice", help="number of frames to concatenate with the central frame") 31 | parser.add_option( 32 | "--splice-step", 33 | dest="splice_step", 34 | help="splicing step (frames dont need to be consecutive, --splice 3 --splice-step 2 will select offsets: -6 -4 -2 0 2 4 6)", 35 | default="1", 36 | ) 37 | (options, args) = parser.parse_args() 38 | 39 | if options.dim_in == None: 40 | parser.print_help() 41 | sys.exit(1) 42 | 43 | dim_in = int(options.dim_in) 44 | splice = int(options.splice) 45 | splice_step = int(options.splice_step) 46 | 47 | dim_out = (2 * splice + 1) * dim_in 48 | 49 | print("", dim_out, dim_in) 50 | print("[", end=" ") 51 | 52 | splice_vec = range(-splice * splice_step, splice * splice_step + 1, splice_step) 53 | for idx in range(len(splice_vec)): 54 | print(splice_vec[idx], end=" ") 55 | 56 | print("]") 57 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/ln.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use File::Spec; 3 | 4 | if ( @ARGV < 2 ) { 5 | print STDERR "usage: ln.pl input1 input2 dest-dir\n" . 6 | "This script does a soft link of input1, input2, etc." . 7 | "to dest-dir, using relative links where possible\n" . 8 | "Note: input-n and dest-dir may both be absolute pathnames,\n" . 9 | "or relative pathnames, relative to the current directlory.\n"; 10 | exit(1); 11 | } 12 | 13 | $dir = pop @ARGV; 14 | if ( ! -d $dir ) { 15 | print STDERR "ln.pl: last argument must be a directory ($dir is not a directory)\n"; 16 | exit(1); 17 | } 18 | 19 | $ans = 1; # true. 20 | 21 | $absdir = File::Spec->rel2abs($dir); # Get $dir as abs path. 22 | defined $absdir || die "No such directory $dir"; 23 | foreach $file (@ARGV) { 24 | $absfile = File::Spec->rel2abs($file); # Get $file as abs path. 25 | defined $absfile || die "No such file or directory: $file"; 26 | @absdir_split = split("/", $absdir); 27 | @absfile_split = split("/", $absfile); 28 | 29 | $newfile = $absdir . "/" . $absfile_split[$#absfile_split]; # we'll use this 30 | # as the destination in the link command. 31 | $num_removed = 0; 32 | while (@absdir_split > 0 && $absdir_split[0] eq $absfile_split[0]) { 33 | shift @absdir_split; 34 | shift @absfile_split; 35 | $num_removed++; 36 | } 37 | if (-l $newfile) { # newfile is already a link -> safe to delete it. 38 | unlink($newfile); # "unlink" just means delete. 39 | } 40 | if ($num_removed == 0) { # will use absolute pathnames. 41 | $oldfile = "/" . join("/", @absfile_split); 42 | $ret = symlink($oldfile, $newfile); 43 | } else { 44 | $num_dots = @absdir_split; 45 | $oldfile = join("/", @absfile_split); 46 | for ($n = 0; $n < $num_dots; $n++) { 47 | $oldfile = "../" . $oldfile; 48 | } 49 | $ret = symlink($oldfile, $newfile); 50 | } 51 | $ans = $ans && $ret; 52 | if (! $ret) { 53 | print STDERR "Error linking $oldfile to $newfile\n"; 54 | } 55 | } 56 | 57 | exit ($ans == 1 ? 0 : 1); 58 | 59 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/make_unigram_grammar.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # This script is used in discriminative training. 18 | # This script makes a simple unigram-loop version of G.fst 19 | # using a unigram grammar estimated from some training transcripts. 20 | # This is for MMI training. 21 | # We don't have any silences in G.fst; these are supplied by the 22 | # optional silences in the lexicon. 23 | 24 | # Note: the symbols in the transcripts become the input and output 25 | # symbols of G.txt; these can be numeric or not. 26 | 27 | if(@ARGV != 0) { 28 | die "Usage: make_unigram_grammar.pl < text-transcripts > G.txt" 29 | } 30 | 31 | $totcount = 0; 32 | $nl = 0; 33 | while (<>) { 34 | @A = split(" ", $_); 35 | foreach $a (@A) { 36 | $count{$a}++; 37 | $totcount++; 38 | } 39 | $nl++; 40 | $totcount++; # Treat end-of-sentence as a symbol for purposes of 41 | # $totcount, so the grammar is properly stochastic. This doesn't 42 | # become , it just becomes the final-prob. 43 | } 44 | 45 | foreach $a (keys %count) { 46 | $prob = $count{$a} / $totcount; 47 | $cost = -log($prob); # Negated natural-log probs. 48 | print "0\t0\t$a\t$a\t$cost\n"; 49 | } 50 | # Zero final-cost. 51 | $final_prob = $nl / $totcount; 52 | $final_cost = -log($final_prob); 53 | print "0\t$final_cost\n"; 54 | 55 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/int2sym.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) 3 | # Apache 2.0. 4 | 5 | undef $field_begin; 6 | undef $field_end; 7 | 8 | 9 | if ($ARGV[0] eq "-f") { 10 | shift @ARGV; 11 | $field_spec = shift @ARGV; 12 | if ($field_spec =~ m/^\d+$/) { 13 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1; 14 | } 15 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) 16 | if ($1 ne "") { 17 | $field_begin = $1 - 1; # Change to zero-based indexing. 18 | } 19 | if ($2 ne "") { 20 | $field_end = $2 - 1; # Change to zero-based indexing. 21 | } 22 | } 23 | if (!defined $field_begin && !defined $field_end) { 24 | die "Bad argument to -f option: $field_spec"; 25 | } 26 | } 27 | $symtab = shift @ARGV; 28 | if(!defined $symtab) { 29 | print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" . 30 | "options: [-f (|-)]\n" . 31 | "e.g.: -f 2, or -f 3-4\n"; 32 | exit(1); 33 | } 34 | 35 | open(F, "<$symtab") || die "Error opening symbol table file $symtab"; 36 | while() { 37 | @A = split(" ", $_); 38 | @A == 2 || die "bad line in symbol table file: $_"; 39 | $int2sym{$A[1]} = $A[0]; 40 | } 41 | 42 | sub int2sym { 43 | my $a = shift @_; 44 | my $pos = shift @_; 45 | if($a !~ m:^\d+$:) { # not all digits.. 46 | $pos1 = $pos+1; # make it one-based. 47 | die "int2sym.pl: found noninteger token $a [in position $pos1]\n"; 48 | } 49 | $s = $int2sym{$a}; 50 | if(!defined ($s)) { 51 | die "int2sym.pl: integer $a not in symbol table $symtab."; 52 | } 53 | return $s; 54 | } 55 | 56 | $error = 0; 57 | while (<>) { 58 | @A = split(" ", $_); 59 | for ($pos = 0; $pos <= $#A; $pos++) { 60 | $a = $A[$pos]; 61 | if ( (!defined $field_begin || $pos >= $field_begin) 62 | && (!defined $field_end || $pos <= $field_end)) { 63 | $a = int2sym($a, $pos); 64 | } 65 | print $a . " "; 66 | } 67 | print "\n"; 68 | } 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/reduce_data_dir_by_reclist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # koried, 10/29/2012 4 | 5 | # Reduce a data set based on a list of recordings 6 | 7 | if [ $# != 3 ]; then 8 | echo "usage: $0 srcdir reclist destdir" 9 | exit 1; 10 | fi 11 | 12 | srcdir=$1 13 | reclist=$2 14 | destdir=$3 15 | 16 | if [ ! -f $srcdir/utt2spk ]; then 17 | echo "$0: no such file $srcdir/utt2spk" 18 | exit 1; 19 | fi 20 | 21 | function do_filtering { 22 | # assumes the utt2spk and spk2utt files already exist. 23 | [ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp 24 | [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp 25 | [ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text 26 | [ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender 27 | [ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp 28 | if [ -f $srcdir/segments ]; then 29 | utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments 30 | awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings. 31 | # The next line would override the command above for wav.scp, which would be incorrect. 32 | [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp 33 | [ -f $srcdir/reco2file_and_channel ] && \ 34 | utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel 35 | [ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm 36 | rm $destdir/reco 37 | fi 38 | srcutts=`cat $srcdir/utt2spk | wc -l` 39 | destutts=`cat $destdir/utt2spk | wc -l` 40 | echo "Reduced #utt from $srcutts to $destutts" 41 | } 42 | 43 | mkdir -p $destdir 44 | 45 | # filter the utt2spk based on the set of recordings 46 | rm -f $destdir/utt2spk 47 | for i in `cat $reclist`; do 48 | cat $srcdir/utt2spk | grep ^$i >> $destdir/utt2spk 49 | done 50 | 51 | utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt 52 | do_filtering; 53 | 54 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/reduce_data_dir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # koried, 10/29/2012 4 | 5 | # Reduce a data set based on a list of turn-ids 6 | 7 | if [ $# != 3 ]; then 8 | echo "usage: $0 srcdir turnlist destdir" 9 | exit 1; 10 | fi 11 | 12 | srcdir=$1 13 | reclist=$2 14 | destdir=$3 15 | 16 | if [ ! -f $srcdir/utt2spk ]; then 17 | echo "$0: no such file $srcdir/utt2spk" 18 | exit 1; 19 | fi 20 | 21 | function do_filtering { 22 | # assumes the utt2spk and spk2utt files already exist. 23 | [ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp 24 | [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp 25 | [ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text 26 | [ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender 27 | [ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp 28 | if [ -f $srcdir/segments ]; then 29 | utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments 30 | awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings. 31 | # The next line would override the command above for wav.scp, which would be incorrect. 32 | [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp 33 | [ -f $srcdir/reco2file_and_channel ] && \ 34 | utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel 35 | 36 | # Filter the STM file for proper sclite scoring (this will also remove the comments lines) 37 | [ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm 38 | rm $destdir/reco 39 | fi 40 | srcutts=`cat $srcdir/utt2spk | wc -l` 41 | destutts=`cat $destdir/utt2spk | wc -l` 42 | echo "Reduced #utt from $srcutts to $destutts" 43 | } 44 | 45 | mkdir -p $destdir 46 | 47 | # filter the utt2spk based on the set of recordings 48 | utils/filter_scp.pl $reclist < $srcdir/utt2spk > $destdir/utt2spk 49 | 50 | utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt 51 | do_filtering; 52 | 53 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/scoring/wer_report.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2015 Johns Hopkins University (author: Jan Trmal ) 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | # This script reads per-utt table generated for example during scoring 19 | # and outpus the WER similar to the format the compute-wer utility 20 | # or the utils/best_wer.pl produces 21 | # i.e. from table containing lines in this format 22 | # SUM raw 23344 243230 176178 46771 9975 20281 77027 16463 23 | # produces something output like this 24 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] 25 | # NB: if the STDIN stream will contain more of the SUM raw entries, 26 | # the best one will be found and printed 27 | # 28 | # If the script is called with parameters, it uses them pro provide 29 | # a description of the output 30 | # i.e. 31 | # cat per-spk-report | utils/scoring/wer_report.pl Full set 32 | # the following output will be produced 33 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] Full set 34 | 35 | 36 | while () { 37 | if ( m:SUM\s+raw:) { 38 | @F = split; 39 | if ((!defined $wer) || ($wer > $F[8])) { 40 | $corr=$F[4]; 41 | $sub=$F[5]; 42 | $ins=$F[6]; 43 | $del=$F[7]; 44 | $wer=$F[8]; 45 | $words=$F[3]; 46 | } 47 | } 48 | } 49 | 50 | if (defined $wer) { 51 | $wer_str = sprintf("%.2f", (100.0 * $wer) / $words); 52 | print "%WER $wer_str [ $wer / $words, $ins ins, $del del, $sub sub ]"; 53 | print " " . join(" ", @ARGV) if @ARGV > 0; 54 | print "\n"; 55 | } 56 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/create_split_dir.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2013 Guoguo Chen 4 | # Apache 2.0. 5 | # 6 | # This script creates storage directories on different file systems, and creates 7 | # symbolic links to those directories. For example, a command 8 | # 9 | # utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage 10 | # 11 | # will mkdir -p all of those directories, and will create links 12 | # 13 | # egs/storage/1 -> /export/gpu-03/egs/storage 14 | # egs/storage/2 -> /export/gpu-03/egs/storage 15 | # ... 16 | # 17 | use strict; 18 | use warnings; 19 | use File::Spec; 20 | use Getopt::Long; 21 | 22 | my $Usage = < 27 | e.g.: utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage 28 | 29 | Allowed options: 30 | --suffix : Common suffix to (string, default = "") 31 | 32 | See also create_data_link.pl, which is intended to work with the resulting 33 | directory structure, and remove_data_links.sh 34 | EOU 35 | 36 | my $suffix=""; 37 | GetOptions('suffix=s' => \$suffix); 38 | 39 | if (@ARGV < 2) { 40 | die $Usage; 41 | } 42 | 43 | my $ans = 1; 44 | 45 | my $dir = pop(@ARGV); 46 | system("mkdir -p $dir 2>/dev/null"); 47 | my $index = 1; 48 | foreach my $file (@ARGV) { 49 | $file = $file . "/" . $suffix; 50 | my $actual_storage = File::Spec->rel2abs($file); 51 | my $pseudo_storage = "$dir/$index"; 52 | 53 | # If the symbolic link already exists, delete it. 54 | if (-l $pseudo_storage) { 55 | print STDERR "$0: link $pseudo_storage already exists, not overwriting.\n"; 56 | next; 57 | } 58 | 59 | # Create the destination directory and make the link. 60 | system("mkdir -p $actual_storage 2>/dev/null"); 61 | my $ret = symlink($actual_storage, $pseudo_storage); 62 | 63 | # Process the returned values 64 | $ans = $ans && $ret; 65 | if (! $ret) { 66 | print STDERR "Error linking $actual_storage to $pseudo_storage\n"; 67 | } 68 | 69 | $index++; 70 | } 71 | 72 | exit($ans == 1 ? 0 : 1); 73 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/local/score_basic.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) 3 | # Apache 2.0 4 | 5 | # begin configuration section. 6 | cmd=run.pl 7 | stage=0 8 | min_lmwt=1 9 | max_lmwt=10 10 | #end configuration section. 11 | 12 | [ -f ./path.sh ] && . ./path.sh 13 | . parse_options.sh || exit 1; 14 | 15 | if [ $# -ne 3 ]; then 16 | echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " 17 | echo " Options:" 18 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 19 | echo " --stage (0|1|2) # start scoring script from part-way through." 20 | echo " --min_lmwt # minumum LM-weight for lattice rescoring " 21 | echo " --max_lmwt # maximum LM-weight for lattice rescoring " 22 | exit 1; 23 | fi 24 | 25 | data=$1 26 | lang_or_graph=$2 27 | dir=$3 28 | 29 | phonemap="conf/phones.60-48-39.map" 30 | nj=$(cat $dir/num_jobs) 31 | 32 | symtab=$lang_or_graph/words.txt 33 | 34 | for f in $symtab $dir/lat.1.gz $data/text; do 35 | [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; 36 | done 37 | 38 | mkdir -p $dir/scoring/log 39 | 40 | # Map reference to 39 phone classes: 41 | cat $data/text | local/timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 > $dir/scoring/test_filt.txt 42 | 43 | # Get the phone-sequence on the best-path: 44 | for LMWT in $(seq $min_lmwt $max_lmwt); do 45 | $cmd JOB=1:$nj $dir/scoring/log/best_path_basic.$LMWT.JOB.log \ 46 | lattice-best-path --lm-scale=$LMWT --word-symbol-table=$symtab --verbose=2 \ 47 | "ark:gunzip -c $dir/lat.JOB.gz|" ark,t:$dir/scoring/$LMWT.JOB.tra || exit 1; 48 | cat $dir/scoring/$LMWT.*.tra | sort > $dir/scoring/$LMWT.tra 49 | rm $dir/scoring/$LMWT.*.tra 50 | done 51 | 52 | # Map hypothesis to 39 phone classes: 53 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score_basic.LMWT.log \ 54 | cat $dir/scoring/LMWT.tra \| \ 55 | utils/int2sym.pl -f 2- $symtab \| \ 56 | local/timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 \| \ 57 | compute-wer --text --mode=all \ 58 | ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; 59 | 60 | exit 0; 61 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/find_arpa_oovs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | if ( @ARGV < 1 && @ARGV > 2) { 19 | die "Usage: find_arpa_oovs.pl words.txt [lm.arpa]\n"; 20 | # This program finds words in the arpa file that are not symbols 21 | # in the OpenFst-format symbol table words.txt. It prints them 22 | # on the standard output, one per line. 23 | } 24 | 25 | $symtab = shift @ARGV; 26 | open(S, "<$symtab") || die "Failed opening symbol table file $symtab\n"; 27 | while(){ 28 | @A = split(" ", $_); 29 | @A == 2 || die "Bad line in symbol table file: $_"; 30 | $seen{$A[0]} = 1; 31 | } 32 | 33 | $curgram=0; 34 | while(<>) { # Find the \data\ marker. 35 | if(m:^\\data\\$:) { last; } 36 | } 37 | while(<>) { 38 | if(m/^\\(\d+)\-grams:\s*$/) { 39 | $curgram = $1; 40 | if($curgram > 1) { 41 | last; # This is an optimization as we can get the vocab from the 1-grams 42 | } 43 | } elsif($curgram > 0) { 44 | @A = split(" ", $_); 45 | if(@A > 1) { 46 | shift @A; 47 | for($n=0;$n<$curgram;$n++) { 48 | $word = $A[$n]; 49 | if(!defined $word) { print STDERR "Unusual line $_ (line $.) in arpa file.\n"; } 50 | $in_arpa{$word} = 1; 51 | } 52 | } else { 53 | if(@A > 0 && $A[0] !~ m:\\end\\:) { 54 | print STDERR "Unusual line $_ (line $.) in arpa file\n"; 55 | } 56 | } 57 | } 58 | } 59 | 60 | foreach $w (keys %in_arpa) { 61 | if(!defined $seen{$w} && $w ne "" && $w ne "") { 62 | print "$w\n"; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/nnet/gen_dct_mat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Brno University of Technology (author: Karel Vesely) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # ./gen_dct_mat.py 19 | # script generates matrix with DCT transform, which is sparse 20 | # and takes into account that data-layout is along frequency axis, 21 | # while DCT is done along temporal axis. 22 | from __future__ import print_function 23 | 24 | from math import * 25 | import sys 26 | 27 | 28 | from optparse import OptionParser 29 | 30 | parser = OptionParser() 31 | parser.add_option("--fea-dim", dest="dim", help="feature dimension") 32 | parser.add_option("--splice", dest="splice", help="applied splice value") 33 | parser.add_option("--dct-basis", dest="dct_basis", help="number of DCT basis") 34 | (options, args) = parser.parse_args() 35 | 36 | if options.dim == None: 37 | parser.print_help() 38 | sys.exit(1) 39 | 40 | dim = int(options.dim) 41 | splice = int(options.splice) 42 | dct_basis = int(options.dct_basis) 43 | 44 | timeContext = 2 * splice + 1 45 | 46 | 47 | # generate the DCT matrix 48 | M_PI = 3.1415926535897932384626433832795 49 | M_SQRT2 = 1.4142135623730950488016887 50 | 51 | 52 | # generate sparse DCT matrix 53 | print("[") 54 | for k in range(dct_basis): 55 | for m in range(dim): 56 | for n in range(timeContext): 57 | if n == 0: 58 | print(m * "0 ", end=" ") 59 | else: 60 | print((dim - 1) * "0 ", end=" ") 61 | print(str(sqrt(2.0 / timeContext) * cos(M_PI / timeContext * k * (n + 0.5))), end=" ") 62 | if n == timeContext - 1: 63 | print((dim - m - 1) * "0 ", end=" ") 64 | print() 65 | print() 66 | 67 | print("]") 68 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/prepare_online_nnet_dist_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Johns Hopkins University (Author: Vijayaditya Peddinti) 4 | # Guoguo Chen 5 | # Apache 2.0 6 | # Script to prepare the distribution from the online-nnet build 7 | 8 | other_files= #other files to be included in the build 9 | other_dirs= 10 | conf_files="ivector_extractor.conf mfcc.conf online_cmvn.conf online_nnet2_decoding.conf splice.conf" 11 | ivec_extractor_files="final.dubm final.ie final.mat global_cmvn.stats online_cmvn.conf splice_opts" 12 | 13 | echo "$0 $@" # Print the command line for logging 14 | [ -f path.sh ] && . ./path.sh; 15 | . parse_options.sh || exit 1; 16 | 17 | if [ $# -ne 3 ]; then 18 | echo "Usage: $0 " 19 | echo "e.g.: $0 data/lang exp/nnet2_online/nnet_ms_a_online tedlium.tgz" 20 | exit 1; 21 | fi 22 | 23 | lang=$1 24 | modeldir=$2 25 | tgzfile=$3 26 | 27 | for f in $lang/phones.txt $other_files; do 28 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 29 | done 30 | 31 | build_files= 32 | for d in $modeldir/conf $modeldir/ivector_extractor; do 33 | [ ! -d $d ] && echo "$0: no such directory $d" && exit 1; 34 | done 35 | 36 | for f in $ivec_extractor_files; do 37 | f=$modeldir/ivector_extractor/$f 38 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 39 | build_files="$build_files $f" 40 | done 41 | 42 | # Makes a copy of the original config files, as we will change the absolute path 43 | # to relative. 44 | rm -rf $modeldir/conf_abs_path 45 | mkdir -p $modeldir/conf_abs_path 46 | cp -r $modeldir/conf/* $modeldir/conf_abs_path 47 | 48 | for f in $conf_files; do 49 | [ ! -f $modeldir/conf/$f ] && \ 50 | echo "$0: no such file $modeldir/conf/$f" && exit 1; 51 | # Changes absolute path to relative path. The path entries in the config file 52 | # are generated by scripts and it is safe to assume that they have structure: 53 | # variable=path 54 | cat $modeldir/conf_abs_path/$f | perl -e ' 55 | use File::Spec; 56 | while() { 57 | chomp; 58 | @col = split("=", $_); 59 | if (@col == 2 && (-f $col[1])) { 60 | $col[1] = File::Spec->abs2rel($col[1]); 61 | print "$col[0]=$col[1]\n"; 62 | } else { 63 | print "$_\n"; 64 | } 65 | } 66 | ' > $modeldir/conf/$f 67 | build_files="$build_files $modeldir/conf/$f" 68 | done 69 | 70 | tar -hczvf $tgzfile $lang $build_files $other_files $other_dirs \ 71 | $modeldir/final.mdl $modeldir/tree >/dev/null 72 | 73 | # Changes back to absolute path. 74 | rm -rf $modeldir/conf 75 | mv $modeldir/conf_abs_path $modeldir/conf 76 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/convert_slf_parallel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright Brno University of Technology (Author: Karel Vesely) 2014. Apache 2.0. 3 | 4 | # This script converts lattices to HTK format compatible with other toolkits. 5 | # We can choose to put words to nodes or arcs, as both is valid in the SLF format. 6 | 7 | # begin configuration section. 8 | cmd=run.pl 9 | dirname=lats-in-htk-slf 10 | parallel_opts="-tc 50" # We should limit disk stress 11 | word_to_node=false # Words in arcs or nodes? [default:arcs] 12 | #end configuration section. 13 | 14 | echo "$0 $@" 15 | 16 | [ -f ./path.sh ] && . ./path.sh 17 | . parse_options.sh || exit 1; 18 | 19 | if [ $# -ne 3 ]; then 20 | echo "Usage: $0 [options] " 21 | echo " Options:" 22 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 23 | echo " --word-to-link (true|false) # put word symbols on links or nodes." 24 | echo " --parallel-opts STR # parallelization options (def.: '-tc 50')." 25 | echo "e.g.:" 26 | echo "$0 data/dev data/lang exp/tri4a/decode_dev" 27 | exit 1; 28 | fi 29 | 30 | data=$1 31 | lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. 32 | dir=$3 33 | 34 | model=$(dirname $dir)/final.mdl # assume model one level up from decoding dir. 35 | 36 | for f in $lang/words.txt $lang/phones/word_boundary.int $model $dir/lat.1.gz; do 37 | [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; 38 | done 39 | 40 | [ ! -d $dir/$dirname/log ] && mkdir -p $dir/$dirname 41 | 42 | echo "$0: Converting lattices into '$dir/$dirname'" 43 | 44 | # Words in arcs or nodes? [default:nodes] 45 | word_to_link_arg= 46 | $word_to_node && word_to_node_arg="--word-to-node" 47 | 48 | nj=$(cat $dir/num_jobs) 49 | 50 | # convert the lattices (individually, gzipped) 51 | $cmd $parallel_opts JOB=1:$nj $dir/$dirname/log/lat_convert.JOB.log \ 52 | mkdir -p $dir/$dirname/JOB/ '&&' \ 53 | lattice-align-words-lexicon --output-error-lats=true --output-if-empty=true $lang/phones/align_lexicon.int $model "ark:gunzip -c $dir/lat.JOB.gz |" ark,t:- \| \ 54 | utils/int2sym.pl -f 3 $lang/words.txt \| \ 55 | utils/convert_slf.pl $word_to_node_arg - $dir/$dirname/JOB/ || exit 1 56 | 57 | # make list of lattices 58 | find -L $PWD/$dir/$dirname -name *.lat.gz > $dir/$dirname/lat_htk.scp || exit 1 59 | 60 | # check number of lattices: 61 | nseg=$(cat $data/segments | wc -l) 62 | nlat_out=$(cat $dir/$dirname/lat_htk.scp | wc -l) 63 | echo "segments $nseg, saved-lattices $nlat_out" 64 | # 65 | [ $nseg -ne $nlat_out ] && echo "WARNING: missing $((nseg-nlat_out)) lattices for some segments!" \ 66 | && exit 1 67 | 68 | echo "success, converted lats to HTK : $PWD/$dir/$dirname/lat_htk.scp" 69 | exit 0 70 | 71 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/combine_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 3 | # 2014 David Snyder 4 | 5 | # This script operates on a data directory, such as in data/train/. 6 | # See http://kaldi.sourceforge.net/data_prep.html#data_prep_data 7 | # for what these directories contain. 8 | 9 | # Begin configuration section. 10 | extra_files= # specify addtional files in 'src-data-dir' to merge, ex. "file1 file2 ..." 11 | skip_fix=false # skip the fix_data_dir.sh in the end 12 | # End configuration section. 13 | 14 | echo "$0 $@" # Print the command line for logging 15 | 16 | if [ -f path.sh ]; then . ./path.sh; fi 17 | . parse_options.sh || exit 1; 18 | 19 | if [ $# -lt 2 ]; then 20 | echo "Usage: combine_data.sh [--extra-files 'file1 file2'] ..." 21 | echo "Note, files that don't appear in first source dir will not be added even if they appear in later ones." 22 | exit 1 23 | fi 24 | 25 | dest=$1; 26 | shift; 27 | 28 | first_src=$1; 29 | 30 | rm -r $dest 2>/dev/null 31 | mkdir -p $dest; 32 | 33 | export LC_ALL=C 34 | 35 | for dir in $*; do 36 | if [ ! -f $dir/utt2spk ]; then 37 | echo "$0: no such file $dir/utt2spk" 38 | exit 1; 39 | fi 40 | done 41 | 42 | # W.r.t. utt2uniq file the script has different behavior compared to other files 43 | # it is not compulsary for it to exist in src directories, but if it exists in 44 | # even one it should exist in all. We will create the files where necessary 45 | has_utt2uniq=false 46 | for in_dir in $*; do 47 | if [ -f $in_dir/utt2uniq ]; then 48 | has_utt2uniq=true 49 | break 50 | fi 51 | done 52 | 53 | if $has_utt2uniq; then 54 | # we are going to create an utt2uniq file in the destdir 55 | for in_dir in $*; do 56 | if [ ! -f $in_dir/utt2uniq ]; then 57 | # we assume that utt2uniq is a one to one mapping 58 | cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}' 59 | else 60 | cat $in_dir/utt2uniq 61 | fi 62 | done | sort -k1 > $dest/utt2uniq 63 | echo "$0: combined utt2uniq" 64 | fi 65 | # some of the old scripts might provide utt2uniq as an extrafile, so just remove it 66 | extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g") 67 | 68 | for file in utt2spk utt2lang feats.scp text cmvn.scp segments reco2file_and_channel wav.scp spk2gender $extra_files; do 69 | if [ -f $first_src/$file ]; then 70 | ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1; 71 | echo "$0: combined $file" 72 | else 73 | echo "$0 [info]: not combining $file as it does not exist" 74 | fi 75 | done 76 | 77 | utils/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt 78 | 79 | if ! $skip_fix ; then 80 | utils/fix_data_dir.sh $dest || exit 1; 81 | fi 82 | 83 | exit 0 84 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/apply_map.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use warnings; #sed replacement for -w perl parameter 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) 4 | # Apache 2.0. 5 | 6 | # This program is a bit like ./sym2int.pl in that it applies a map 7 | # to things in a file, but it's a bit more general in that it doesn't 8 | # assume the things being mapped to are single tokens, they could 9 | # be sequences of tokens. See the usage message. 10 | 11 | 12 | if (@ARGV > 0 && $ARGV[0] eq "-f") { 13 | shift @ARGV; 14 | $field_spec = shift @ARGV; 15 | if ($field_spec =~ m/^\d+$/) { 16 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1; 17 | } 18 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) 19 | if ($1 ne "") { 20 | $field_begin = $1 - 1; # Change to zero-based indexing. 21 | } 22 | if ($2 ne "") { 23 | $field_end = $2 - 1; # Change to zero-based indexing. 24 | } 25 | } 26 | if (!defined $field_begin && !defined $field_end) { 27 | die "Bad argument to -f option: $field_spec"; 28 | } 29 | } 30 | 31 | # Mapping is obligatory 32 | $permissive = 0; 33 | if (@ARGV > 0 && $ARGV[0] eq '--permissive') { 34 | shift @ARGV; 35 | # Mapping is optional (missing key is printed to output) 36 | $permissive = 1; 37 | } 38 | 39 | if(@ARGV != 1) { 40 | print STDERR "Invalid usage: " . join(" ", @ARGV) . "\n"; 41 | print STDERR "Usage: apply_map.pl [options] map output\n" . 42 | "options: [-f ]\n" . 43 | "Applies the map 'map' to all input text, where each line of the map\n" . 44 | "is interpreted as a map from the first field to the list of the other fields\n" . 45 | "Note: can look like 4-5, or 4-, or 5-, or 1, it means the field\n" . 46 | "range in the input to apply the map to.\n" . 47 | "e.g.: echo A B | apply_map.pl a.txt\n" . 48 | "where a.txt is:\n" . 49 | "A a1 a2\n" . 50 | "B b\n" . 51 | "will produce:\n" . 52 | "a1 a2 b\n"; 53 | exit(1); 54 | } 55 | 56 | ($map) = @ARGV; 57 | open(M, "<$map") || die "Error opening map file $map: $!"; 58 | 59 | while () { 60 | @A = split(" ", $_); 61 | @A >= 1 || die "apply_map.pl: empty line."; 62 | $i = shift @A; 63 | $o = join(" ", @A); 64 | $map{$i} = $o; 65 | } 66 | 67 | while() { 68 | @A = split(" ", $_); 69 | for ($x = 0; $x < @A; $x++) { 70 | if ( (!defined $field_begin || $x >= $field_begin) 71 | && (!defined $field_end || $x <= $field_end)) { 72 | $a = $A[$x]; 73 | if (!defined $map{$a}) { 74 | if (!$permissive) { 75 | die "apply_map.pl: undefined key $a\n"; 76 | } else { 77 | print STDERR "apply_map.pl: warning! missing key $a\n"; 78 | } 79 | } else { 80 | $A[$x] = $map{$a}; 81 | } 82 | } 83 | } 84 | print join(" ", @A) . "\n"; 85 | } 86 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/local/timit_format_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2013 (Author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # This script takes data prepared in a corpus-dependent way 7 | # in data/local/, and converts it into the "canonical" form, 8 | # in various subdirectories of data/, e.g. data/lang, data/train, etc. 9 | 10 | . ./path.sh || exit 1; 11 | 12 | echo "Preparing train, dev and test data" 13 | srcdir=data/local/data 14 | lmdir=data/local/nist_lm 15 | tmpdir=data/local/lm_tmp 16 | lexicon=data/local/dict/lexicon.txt 17 | mkdir -p $tmpdir 18 | 19 | for x in train dev test; do 20 | mkdir -p data/$x 21 | cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1; 22 | cp $srcdir/$x.text data/$x/text || exit 1; 23 | cp $srcdir/$x.spk2utt data/$x/spk2utt || exit 1; 24 | cp $srcdir/$x.utt2spk data/$x/utt2spk || exit 1; 25 | utils/filter_scp.pl data/$x/spk2utt $srcdir/$x.spk2gender > data/$x/spk2gender || exit 1; 26 | cp $srcdir/${x}.stm data/$x/stm 27 | cp $srcdir/${x}.glm data/$x/glm 28 | utils/validate_data_dir.sh --no-feats data/$x || exit 1 29 | done 30 | 31 | # Next, for each type of language model, create the corresponding FST 32 | # and the corresponding lang_test_* directory. 33 | 34 | echo Preparing language models for test 35 | 36 | for lm_suffix in bg; do 37 | test=data/lang_test_${lm_suffix} 38 | mkdir -p $test 39 | cp -r data/lang/* $test 40 | 41 | gunzip -c $lmdir/lm_phone_${lm_suffix}.arpa.gz | \ 42 | egrep -v ' | | ' | \ 43 | arpa2fst - | fstprint | \ 44 | utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$test/words.txt \ 45 | --osymbols=$test/words.txt --keep_isymbols=false --keep_osymbols=false | \ 46 | fstrmepsilon | fstarcsort --sort_type=ilabel > $test/G.fst 47 | fstisstochastic $test/G.fst 48 | # The output is like: 49 | # 9.14233e-05 -0.259833 50 | # we do expect the first of these 2 numbers to be close to zero (the second is 51 | # nonzero because the backoff weights make the states sum to >1). 52 | # Because of the fiasco for these particular LMs, the first number is not 53 | # as close to zero as it could be. 54 | 55 | # Everything below is only for diagnostic. 56 | # Checking that G has no cycles with empty words on them (e.g. , ); 57 | # this might cause determinization failure of CLG. 58 | # #0 is treated as an empty word. 59 | mkdir -p $tmpdir/g 60 | awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \ 61 | < "$lexicon" >$tmpdir/g/select_empty.fst.txt 62 | fstcompile --isymbols=$test/words.txt --osymbols=$test/words.txt $tmpdir/g/select_empty.fst.txt | \ 63 | fstarcsort --sort_type=olabel | fstcompose - $test/G.fst > $tmpdir/g/empty_words.fst 64 | fstinfo $tmpdir/g/empty_words.fst | grep cyclic | grep -w 'y' && 65 | echo "Language model has cycles with empty words" && exit 1 66 | rm -r $tmpdir/g 67 | done 68 | 69 | utils/validate_lang.pl data/lang_test_bg || exit 1 70 | 71 | echo "Succeeded in formatting data." 72 | rm -r $tmpdir 73 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/local/score_sclite.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) 3 | # 2014 Brno University of Technology (Author: Karel Vesely) 4 | # Apache 2.0 5 | 6 | # begin configuration section. 7 | cmd=run.pl 8 | stage=0 9 | min_lmwt=1 10 | max_lmwt=10 11 | mbr_scale=1.0 12 | #end configuration section. 13 | 14 | [ -f ./path.sh ] && . ./path.sh 15 | . parse_options.sh || exit 1; 16 | 17 | if [ $# -ne 3 ]; then 18 | echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " 19 | echo " Options:" 20 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 21 | echo " --stage (0|1|2) # start scoring script from part-way through." 22 | echo " --min_lmwt # minumum LM-weight for lattice rescoring " 23 | echo " --max_lmwt # maximum LM-weight for lattice rescoring " 24 | exit 1; 25 | fi 26 | 27 | data=$1 28 | lang_or_graph=$2 29 | dir=$3 30 | 31 | model=$dir/../final.mdl # assume model one level up from decoding dir. 32 | 33 | hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl 34 | [ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; 35 | hubdir=`dirname $hubscr` 36 | 37 | phonemap="conf/phones.60-48-39.map" 38 | nj=$(cat $dir/num_jobs) 39 | 40 | symtab=$lang_or_graph/words.txt 41 | 42 | for f in $symtab $dir/lat.1.gz $data/text; do 43 | [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; 44 | done 45 | 46 | mkdir -p $dir/scoring/log 47 | 48 | # Map reference to 39 phone classes, the silence is optional (.): 49 | local/timit_norm_trans.pl -i $data/stm -m $phonemap -from 48 -to 39 >$dir/scoring/stm_39phn 50 | cp $data/glm $dir/scoring/glm_39phn 51 | 52 | if [ $stage -le 0 ]; then 53 | # Get the phone-sequence on the best-path: 54 | for LMWT in $(seq $min_lmwt $max_lmwt); do 55 | $cmd JOB=1:$nj $dir/scoring/log/best_path.$LMWT.JOB.log \ 56 | lattice-align-phones $model "ark:gunzip -c $dir/lat.JOB.gz|" ark:- \| \ 57 | lattice-to-ctm-conf --acoustic-scale=$(bc <<<"scale=8; 1/$LMWT*$mbr_scale") --lm-scale=$mbr_scale ark:- $dir/scoring/$LMWT.JOB.ctm || exit 1; 58 | cat $dir/scoring/$LMWT.*.ctm | sort > $dir/scoring/$LMWT.ctm 59 | rm $dir/scoring/$LMWT.*.ctm 60 | done 61 | fi 62 | 63 | if [ $stage -le 1 ]; then 64 | # Map ctm to 39 phone classes: 65 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/map_ctm.LMWT.log \ 66 | mkdir $dir/score_LMWT ';' \ 67 | cat $dir/scoring/LMWT.ctm \| \ 68 | utils/int2sym.pl -f 5 $symtab \| \ 69 | local/timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 '>' \ 70 | $dir/scoring/LMWT.ctm_39phn || exit 1 71 | fi 72 | 73 | # Score the set... 74 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ 75 | cp $dir/scoring/stm_39phn $dir/score_LMWT/stm_39phn '&&' cp $dir/scoring/LMWT.ctm_39phn $dir/score_LMWT/ctm_39phn '&&' \ 76 | $hubscr -p $hubdir -V -l english -h hub5 -g $dir/scoring/glm_39phn -r $dir/score_LMWT/stm_39phn $dir/score_LMWT/ctm_39phn || exit 1; 77 | 78 | exit 0; 79 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/local/score.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) 3 | # 2014 Brno University of Technology (Author: Karel Vesely) 4 | # Apache 2.0 5 | 6 | # begin configuration section. 7 | cmd=run.pl 8 | stage=0 9 | min_lmwt=1 10 | max_lmwt=10 11 | mbr_scale=1.0 12 | #end configuration section. 13 | : 14 | [ -f ./path.sh ] && . ./path.sh 15 | . parse_options.sh || exit 1; 16 | 17 | if [ $# -ne 3 ]; then 18 | echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " 19 | echo " Options:" 20 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 21 | echo " --stage (0|1|2) # start scoring script from part-way through." 22 | echo " --min_lmwt # minumum LM-weight for lattice rescoring " 23 | echo " --max_lmwt # maximum LM-weight for lattice rescoring " 24 | exit 1; 25 | fi 26 | 27 | data=$1 28 | lang_or_graph=$2 29 | dir=$3 30 | 31 | model=$dir/../final.mdl # assume model one level up from decoding dir. 32 | 33 | hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl 34 | [ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; 35 | hubdir=`dirname $hubscr` 36 | 37 | phonemap="conf/phones.60-48-39.map" 38 | nj=$(cat $dir/num_jobs) 39 | 40 | symtab=$lang_or_graph/words.txt 41 | 42 | for f in $symtab $dir/lat.1.gz $data/text; do 43 | [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; 44 | done 45 | 46 | mkdir -p $dir/scoring/log 47 | 48 | # Map reference to 39 phone classes, the silence is optional (.): 49 | local/timit_norm_trans.pl -i $data/stm -m $phonemap -from 48 -to 39 | \ 50 | sed 's: sil: (sil):g' > $dir/scoring/stm_39phn 51 | cp $data/glm $dir/scoring/glm_39phn 52 | 53 | if [ $stage -le 0 ]; then 54 | # Get the phone-sequence on the best-path: 55 | for LMWT in $(seq $min_lmwt $max_lmwt); do 56 | $cmd JOB=1:$nj $dir/scoring/log/best_path.$LMWT.JOB.log \ 57 | lattice-align-phones $model "ark:gunzip -c $dir/lat.JOB.gz|" ark:- \| \ 58 | lattice-to-ctm-conf --acoustic-scale=$(bc <<<"scale=8; 1/$LMWT*$mbr_scale") --lm-scale=$mbr_scale ark:- $dir/scoring/$LMWT.JOB.ctm || exit 1; 59 | cat $dir/scoring/$LMWT.*.ctm | sort > $dir/scoring/$LMWT.ctm 60 | rm $dir/scoring/$LMWT.*.ctm 61 | done 62 | fi 63 | 64 | if [ $stage -le 1 ]; then 65 | # Map ctm to 39 phone classes: 66 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/map_ctm.LMWT.log \ 67 | mkdir $dir/score_LMWT ';' \ 68 | cat $dir/scoring/LMWT.ctm \| \ 69 | utils/int2sym.pl -f 5 $symtab \| \ 70 | local/timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 '>' \ 71 | $dir/scoring/LMWT.ctm_39phn || exit 1 72 | fi 73 | 74 | # Score the set... 75 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ 76 | cp $dir/scoring/stm_39phn $dir/score_LMWT/stm_39phn '&&' cp $dir/scoring/LMWT.ctm_39phn $dir/score_LMWT/ctm_39phn '&&' \ 77 | $hubscr -p $hubdir -V -l english -h hub5 -g $dir/scoring/glm_39phn -r $dir/score_LMWT/stm_39phn $dir/score_LMWT/ctm_39phn || exit 1; 78 | 79 | exit 0; 80 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/local/score_phrich.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) 3 | # 2014 Brno University of Technology (Author: Karel Vesely) 4 | # Apache 2.0 5 | 6 | # begin configuration section. 7 | cmd=run.pl 8 | stage=0 9 | min_lmwt=1 10 | max_lmwt=15 11 | mbr_scale=1.0 12 | #end configuration section. 13 | 14 | [ -f ./path.sh ] && . ./path.sh 15 | . parse_options.sh || exit 1; 16 | 17 | if [ $# -ne 3 ]; then 18 | echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " 19 | echo " Options:" 20 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 21 | echo " --stage (0|1|2) # start scoring script from part-way through." 22 | echo " --min_lmwt # minumum LM-weight for lattice rescoring " 23 | echo " --max_lmwt # maximum LM-weight for lattice rescoring " 24 | exit 1; 25 | fi 26 | 27 | data=$1 28 | lang_or_graph=$2 29 | dir=$3 30 | 31 | model=$dir/../final.mdl # assume model one level up from decoding dir. 32 | 33 | hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl 34 | [ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; 35 | hubdir=`dirname $hubscr` 36 | 37 | phonemap="conf/phones.60-48-39.map" 38 | nj=$(cat $dir/num_jobs) 39 | 40 | symtab=$lang_or_graph/words.txt 41 | 42 | for f in $symtab $dir/lat.1.gz $data/text; do 43 | [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; 44 | done 45 | 46 | mkdir -p $dir/scoring/log 47 | 48 | # Map reference to 39 phone classes, the silence is deleted (.): 49 | local/timit_norm_trans.pl -i $data/stm -m $phonemap -from 48 -to 39 | \ 50 | sed 's: sil::g' > $dir/scoring/stm_39phn 51 | cp $data/glm $dir/scoring/glm_39phn 52 | 53 | 54 | 55 | if [ $stage -le 0 ]; then 56 | # Get the phone-sequence on the best-path: 57 | for LMWT in $(seq $min_lmwt $max_lmwt); do 58 | $cmd JOB=1:$nj $dir/scoring/log/best_path.$LMWT.JOB.log \ 59 | lattice-align-phones $model "ark:gunzip -c $dir/lat.JOB.gz|" ark:- \| \ 60 | lattice-to-ctm-conf --acoustic-scale=$(bc <<<"scale=8; 1/$LMWT*$mbr_scale") --lm-scale=$mbr_scale ark:- $dir/scoring/$LMWT.JOB.ctm || exit 1; 61 | cat $dir/scoring/$LMWT.*.ctm | sort > $dir/scoring/$LMWT.ctm 62 | rm $dir/scoring/$LMWT.*.ctm 63 | done 64 | fi 65 | 66 | if [ $stage -le 1 ]; then 67 | # Map ctm to 39 phone classes: 68 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/map_ctm.LMWT.log \ 69 | mkdir $dir/score_LMWT ';' \ 70 | cat $dir/scoring/LMWT.ctm \| \ 71 | utils/int2sym.pl -f 5 $symtab \| \ 72 | local/timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 '|' grep -v 'sil' '>' \ 73 | $dir/scoring/LMWT.ctm_39phn || exit 1 74 | fi 75 | 76 | 77 | 78 | # Score the set... 79 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ 80 | cp $dir/scoring/stm_39phn $dir/score_LMWT/stm_39phn '&&' cp $dir/scoring/LMWT.ctm_39phn $dir/score_LMWT/ctm_39phn '&&' \ 81 | $hubscr -p $hubdir -V -l english -h hub5 -g $dir/scoring/glm_39phn -r $dir/score_LMWT/stm_39phn $dir/score_LMWT/ctm_39phn || exit 1; 82 | 83 | exit 0; 84 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/format_lm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -u 2 | 3 | # Copyright 2012 Arnab Ghoshal 4 | # Copyright 2010-2011 Microsoft Corporation 5 | 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 15 | # MERCHANTABLITY OR NON-INFRINGEMENT. 16 | # See the Apache 2 License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | set -o errexit 20 | 21 | if [ $# -ne 4 ]; then 22 | printf "Usage: %s lang_dir LM lexicon out_dir\n" `basename $0` 23 | echo " Convert ARPA-format language models to FSTs."; 24 | exit 1; 25 | fi 26 | 27 | lang_dir=$1 28 | lm=$2 29 | lexicon=$3 30 | out_dir=$4 31 | mkdir -p $out_dir 32 | 33 | [ -f ./path.sh ] && . ./path.sh 34 | 35 | echo "Converting '$lm' to FST" 36 | 37 | for f in phones.txt words.txt L.fst L_disambig.fst phones/; do 38 | cp -r $lang_dir/$f $out_dir 39 | done 40 | 41 | lm_base=$(basename $lm '.gz') 42 | gunzip -c $lm | utils/find_arpa_oovs.pl $out_dir/words.txt \ 43 | > $out_dir/oovs_${lm_base}.txt 44 | 45 | # Removing all "illegal" combinations of and , which are supposed to 46 | # occur only at being/end of utt. These can cause determinization failures 47 | # of CLG [ends up being epsilon cycles]. 48 | gunzip -c $lm \ 49 | | egrep -v ' | | ' \ 50 | | arpa2fst - | fstprint \ 51 | | utils/remove_oovs.pl $out_dir/oovs_${lm_base}.txt \ 52 | | utils/eps2disambig.pl | utils/s2eps.pl \ 53 | | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \ 54 | --keep_isymbols=false --keep_osymbols=false \ 55 | | fstrmepsilon | fstarcsort --sort_type=ilabel > $out_dir/G.fst 56 | set +e 57 | fstisstochastic $out_dir/G.fst 58 | set -e 59 | # The output is like: 60 | # 9.14233e-05 -0.259833 61 | # we do expect the first of these 2 numbers to be close to zero (the second is 62 | # nonzero because the backoff weights make the states sum to >1). 63 | 64 | # Everything below is only for diagnostic. 65 | # Checking that G has no cycles with empty words on them (e.g. , ); 66 | # this might cause determinization failure of CLG. 67 | # #0 is treated as an empty word. 68 | mkdir -p $out_dir/tmpdir.g 69 | awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} 70 | END{print "0 0 #0 #0"; print "0";}' \ 71 | < "$lexicon" > $out_dir/tmpdir.g/select_empty.fst.txt 72 | 73 | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \ 74 | $out_dir/tmpdir.g/select_empty.fst.txt \ 75 | | fstarcsort --sort_type=olabel \ 76 | | fstcompose - $out_dir/G.fst > $out_dir/tmpdir.g/empty_words.fst 77 | 78 | fstinfo $out_dir/tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' \ 79 | && echo "Language model has cycles with empty words" && exit 1 80 | 81 | rm -r $out_dir/tmpdir.g 82 | 83 | 84 | echo "Succeeded in formatting LM: '$lm'" 85 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/filter_scp.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2012 Microsoft Corporation 3 | # Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | 19 | # This script takes a list of utterance-ids or any file whose first field 20 | # of each line is an utterance-id, and filters an scp 21 | # file (or any file whose "n-th" field is an utterance id), printing 22 | # out only those lines whose "n-th" field is in id_list. The index of 23 | # the "n-th" field is 1, by default, but can be changed by using 24 | # the -f switch 25 | 26 | $exclude = 0; 27 | $field = 1; 28 | $shifted = 0; 29 | 30 | do { 31 | $shifted=0; 32 | if ($ARGV[0] eq "--exclude") { 33 | $exclude = 1; 34 | shift @ARGV; 35 | $shifted=1; 36 | } 37 | if ($ARGV[0] eq "-f") { 38 | $field = $ARGV[1]; 39 | shift @ARGV; shift @ARGV; 40 | $shifted=1 41 | } 42 | } while ($shifted); 43 | 44 | if(@ARGV < 1 || @ARGV > 2) { 45 | die "Usage: filter_scp.pl [--exclude] [-f ] id_list [in.scp] > out.scp \n" . 46 | "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" . 47 | "Note: only the first field of each line in id_list matters. With --exclude, prints\n" . 48 | "only the lines that were *not* in id_list.\n" . 49 | "Caution: previously, the -f option was interpreted as a zero-based field index.\n" . 50 | "If your older scripts (written before Oct 2014) stopped working and you used the\n" . 51 | "-f option, add 1 to the argument.\n" . 52 | "See also: utils/filter_scp.pl .\n"; 53 | } 54 | 55 | 56 | $idlist = shift @ARGV; 57 | open(F, "<$idlist") || die "Could not open id-list file $idlist"; 58 | while() { 59 | @A = split; 60 | @A>=1 || die "Invalid id-list file line $_"; 61 | $seen{$A[0]} = 1; 62 | } 63 | 64 | if ($field == 1) { # Treat this as special case, since it is common. 65 | while(<>) { 66 | $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field."; 67 | # $1 is what we filter on. 68 | if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) { 69 | print $_; 70 | } 71 | } 72 | } else { 73 | while(<>) { 74 | @A = split; 75 | @A > 0 || die "Invalid scp file line $_"; 76 | @A >= $field || die "Invalid scp file line $_"; 77 | if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) { 78 | print $_; 79 | } 80 | } 81 | } 82 | 83 | # tests: 84 | # the following should print "foo 1" 85 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo) 86 | # the following should print "bar 2". 87 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2) 88 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/gen_topo.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Generate a topology file. This allows control of the number of states in the 6 | # non-silence HMMs, and in the silence HMMs. 7 | 8 | if (@ARGV != 4) { 9 | print STDERR "Usage: utils/gen_topo.pl \n"; 10 | print STDERR "e.g.: utils/gen_topo.pl 3 5 4:5:6:7:8:9:10 1:2:3\n"; 11 | exit (1); 12 | } 13 | 14 | ($num_nonsil_states, $num_sil_states, $nonsil_phones, $sil_phones) = @ARGV; 15 | 16 | ( $num_nonsil_states >= 1 && $num_nonsil_states <= 100 ) || 17 | die "Unexpected number of nonsilence-model states $num_nonsil_states\n"; 18 | (( $num_sil_states == 1 || $num_sil_states >= 3) && $num_sil_states <= 100 ) || 19 | die "Unexpected number of silence-model states $num_sil_states\n"; 20 | 21 | $nonsil_phones =~ s/:/ /g; 22 | $sil_phones =~ s/:/ /g; 23 | $nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n"; 24 | $sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n"; 25 | 26 | print "\n"; 27 | print "\n"; 28 | print "\n"; 29 | print "$nonsil_phones\n"; 30 | print "\n"; 31 | for ($state = 0; $state < $num_nonsil_states; $state++) { 32 | $statep1 = $state+1; 33 | print " $state $state $state 0.75 $statep1 0.25 \n"; 34 | } 35 | print " $num_nonsil_states \n"; # non-emitting final state. 36 | print "\n"; 37 | # Now silence phones. They have a different topology-- apart from the first and 38 | # last states, it's fully connected, as long as you have >= 3 states. 39 | 40 | if ($num_sil_states > 1) { 41 | $transp = 1.0 / ($num_sil_states-1); 42 | print "\n"; 43 | print "\n"; 44 | print "$sil_phones\n"; 45 | print "\n"; 46 | print " 0 0 "; 47 | for ($nextstate = 0; $nextstate < $num_sil_states-1; $nextstate++) { # Transitions to all but last 48 | # emitting state. 49 | print " $nextstate $transp "; 50 | } 51 | print "\n"; 52 | for ($state = 1; $state < $num_sil_states-1; $state++) { # the central states all have transitions to 53 | # themselves and to the last emitting state. 54 | print " $state $state "; 55 | for ($nextstate = 1; $nextstate < $num_sil_states; $nextstate++) { 56 | print " $nextstate $transp "; 57 | } 58 | print "\n"; 59 | } 60 | # Final emitting state (non-skippable). 61 | $state = $num_sil_states-1; 62 | print " $state $state $state 0.75 $num_sil_states 0.25 \n"; 63 | # Final nonemitting state: 64 | print " $num_sil_states \n"; 65 | print "\n"; 66 | } else { 67 | print "\n"; 68 | print "\n"; 69 | print "$sil_phones\n"; 70 | print "\n"; 71 | print " 0 0 "; 72 | print " 0 0.75 "; 73 | print " 1 0.25 "; 74 | print "\n"; 75 | print " $num_nonsil_states \n"; # non-emitting final state. 76 | print "\n"; 77 | } 78 | 79 | print "\n"; 80 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/subset_scp.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use warnings; #sed replacement for -w perl parameter 3 | # Copyright 2010-2011 Microsoft Corporation 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # This program selects a subset of N elements in the scp. 19 | 20 | # By default, it selects them evenly from throughout the scp, in order to avoid 21 | # selecting too many from the same speaker. It prints them on the standard 22 | # output. 23 | # With the option --first, it just selects the N first utterances. 24 | # With the option --last, it just selects the N last utterances. 25 | 26 | # Last modified by JHU & HKUST @2013 27 | 28 | 29 | $quiet = 0; 30 | $first = 0; 31 | $last = 0; 32 | 33 | if (@ARGV > 0 && $ARGV[0] eq "--quiet") { 34 | shift; 35 | $quiet = 1; 36 | } 37 | if (@ARGV > 0 && $ARGV[0] eq "--first") { 38 | shift; 39 | $first = 1; 40 | } 41 | if (@ARGV > 0 && $ARGV[0] eq "--last") { 42 | shift; 43 | $last = 1; 44 | } 45 | 46 | if(@ARGV < 2 ) { 47 | die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" . 48 | " --quiet causes it to not die if N < num lines in scp.\n" . 49 | " --first and --last make it equivalent to head or tail.\n" . 50 | "See also: filter_scp.pl\n"; 51 | } 52 | 53 | $N = shift @ARGV; 54 | if($N == 0) { 55 | die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\""; 56 | } 57 | $inscp = shift @ARGV; 58 | open(I, "<$inscp") || die "Opening input scp file $inscp"; 59 | 60 | @F = (); 61 | while() { 62 | push @F, $_; 63 | } 64 | $numlines = @F; 65 | if($N > $numlines) { 66 | if ($quiet) { 67 | $N = $numlines; 68 | } else { 69 | die "You requested from subset_scp.pl more elements than available: $N > $numlines"; 70 | } 71 | } 72 | 73 | sub select_n { 74 | my ($start,$end,$num_needed) = @_; 75 | my $diff = $end - $start; 76 | if ($num_needed > $diff) { 77 | die "select_n: code error"; 78 | } 79 | if ($diff == 1 ) { 80 | if ($num_needed > 0) { 81 | print $F[$start]; 82 | } 83 | } else { 84 | my $halfdiff = int($diff/2); 85 | my $halfneeded = int($num_needed/2); 86 | select_n($start, $start+$halfdiff, $halfneeded); 87 | select_n($start+$halfdiff, $end, $num_needed - $halfneeded); 88 | } 89 | } 90 | 91 | if ( ! $first && ! $last) { 92 | if ($N > 0) { 93 | select_n(0, $numlines, $N); 94 | } 95 | } else { 96 | if ($first) { # --first option: same as head. 97 | for ($n = 0; $n < $N; $n++) { 98 | print $F[$n]; 99 | } 100 | } else { # --last option: same as tail. 101 | for ($n = @F - $N; $n < @F; $n++) { 102 | print $F[$n]; 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/convert_ctm.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 4 | 5 | # This takes as standard input a ctm file that's "relative to the utterance", 6 | # i.e. times are measured relative to the beginning of the segments, and it 7 | # uses a "segments" file (format: 8 | # utterance-id recording-id start-time end-time 9 | # ) and a "reco2file_and_channel" file (format: 10 | # recording-id basename-of-file 11 | 12 | $skip_unknown=undef; 13 | if ( $ARGV[0] eq "--skip-unknown" ) { 14 | $skip_unknown=1; 15 | shift @ARGV; 16 | } 17 | 18 | if (@ARGV < 2 || @ARGV > 3) { 19 | print STDERR "Usage: convert_ctm.pl [] > real-ctm\n"; 20 | exit(1); 21 | } 22 | 23 | $segments = shift @ARGV; 24 | $reco2file_and_channel = shift @ARGV; 25 | 26 | open(S, "<$segments") || die "opening segments file $segments"; 27 | while() { 28 | @A = split(" ", $_); 29 | @A == 4 || die "Bad line in segments file: $_"; 30 | ($utt, $recording_id, $begin_time, $end_time) = @A; 31 | $utt2reco{$utt} = $recording_id; 32 | $begin{$utt} = $begin_time; 33 | $end{$utt} = $end_time; 34 | } 35 | close(S); 36 | open(R, "<$reco2file_and_channel") || die "open reco2file_and_channel file $reco2file_and_channel"; 37 | while() { 38 | @A = split(" ", $_); 39 | @A == 3 || die "Bad line in reco2file_and_channel file: $_"; 40 | ($recording_id, $file, $channel) = @A; 41 | $reco2file{$recording_id} = $file; 42 | $reco2channel{$recording_id} = $channel; 43 | } 44 | 45 | 46 | # Now process the ctm file, which is either the standard input or the third 47 | # command-line argument. 48 | $num_done = 0; 49 | while(<>) { 50 | @A= split(" ", $_); 51 | ( @A == 5 || @A == 6 ) || die "Unexpected ctm format: $_"; 52 | # lines look like: 53 | # 1 [ confidence ] 54 | ($utt, $one, $wbegin, $wlen, $w, $conf) = @A; 55 | $reco = $utt2reco{$utt}; 56 | if (!defined $reco) { 57 | next if defined $skip_unknown; 58 | die "Utterance-id $utt not defined in segments file $segments"; 59 | } 60 | $file = $reco2file{$reco}; 61 | $channel = $reco2channel{$reco}; 62 | if (!defined $file || !defined $channel) { 63 | die "Recording-id $reco not defined in reco2file_and_channel file $reco2file_and_channel"; 64 | } 65 | $b = $begin{$utt}; 66 | $e = $end{$utt}; 67 | $wbegin_r = $wbegin + $b; # Make it relative to beginning of the recording. 68 | $wbegin_r = sprintf("%.2f", $wbegin_r); 69 | $wlen = sprintf("%.2f", $wlen); 70 | if (defined $conf) { 71 | $line = "$file $channel $wbegin_r $wlen $w $conf\n"; 72 | } else { 73 | $line = "$file $channel $wbegin_r $wlen $w\n"; 74 | } 75 | if ($wbegin_r + $wlen > $e + 0.01) { 76 | print STDERR "Warning: word appears to be past end of recording; line is $line"; 77 | } 78 | print $line; # goes to stdout. 79 | $num_done++; 80 | } 81 | 82 | if ($num_done == 0) { exit 1; } else { exit 0; } 83 | 84 | __END__ 85 | 86 | # Test example [also test it without the 0.5's] 87 | echo utt reco 10.0 20.0 > segments 88 | echo reco file A > reco2file_and_channel 89 | echo utt 1 8.0 1.0 word 0.5 > ctm_in 90 | echo file A 18.00 1.00 word 0.5 > ctm_out 91 | utils/convert_ctm.pl segments reco2file_and_channel ctm_in | cmp - ctm_out || echo error 92 | rm segments reco2file_and_channel ctm_in ctm_out 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /tune_hyperparameters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | ########################################################## 3 | # pytorch-kaldi v.0.1 4 | # Mirco Ravanelli, Titouan Parcollet 5 | # Mila, University of Montreal 6 | # October 2018 7 | # 8 | # Description: 9 | # This scripts generates config files with the random hyperparamters specified by the user. 10 | # python tune_hyperparameters.py cfg_file out_folder N_exp hyperparameters_spec 11 | # e.g., python tune_hyperparameters.py cfg/TIMIT_MLP_mfcc.cfg exp/TIMIT_MLP_mfcc_tuning 10 arch_lr=randfloat(0.001,0.01) batch_size_train=randint(32,256) dnn_act=choose_str{relu,relu,relu,relu,softmax|tanh,tanh,tanh,tanh,softmax} 12 | ########################################################## 13 | 14 | 15 | import random 16 | import re 17 | import os 18 | import sys 19 | from random import randint 20 | 21 | if __name__ == "__main__": 22 | cfg_file = sys.argv[1] 23 | output_folder = sys.argv[2] 24 | N_exp = int(sys.argv[3]) 25 | hyperparam_list = sys.argv[4:] 26 | seed = 1234 27 | 28 | print("Generating config file for hyperparameter tuning...") 29 | 30 | if not os.path.exists(output_folder): 31 | os.makedirs(output_folder) 32 | 33 | random.seed(seed) 34 | 35 | for i in range(N_exp): 36 | 37 | cfg_file_out = output_folder + "/exp" + str(i) + ".cfg" 38 | 39 | with open(cfg_file_out, "wt") as cfg_out, open(cfg_file, "rt") as cfg_in: 40 | for line in cfg_in: 41 | 42 | key = line.split("=")[0] 43 | 44 | if key == "out_folder": 45 | line = "out_folder=" + output_folder + "/exp" + str(i) + "\n" 46 | 47 | hyper_found = False 48 | for hyperparam in hyperparam_list: 49 | 50 | key_hyper = hyperparam.split("=")[0] 51 | 52 | if key == key_hyper: 53 | 54 | if "randint" in hyperparam: 55 | lower, higher = re.search("randint\((.+?)\)", hyperparam).group(1).split(",") 56 | value_hyper = randint(int(lower), int(higher)) 57 | hyper_found = True 58 | 59 | if "randfloat" in hyperparam: 60 | lower, higher = re.search("randfloat\((.+?)\)", hyperparam).group(1).split(",") 61 | value_hyper = random.uniform(float(lower), float(higher)) 62 | hyper_found = True 63 | 64 | if "choose_str" in hyperparam: 65 | value_hyper = random.choice(re.search("\{(.+?)\}", hyperparam).group(1).split("|")) 66 | hyper_found = True 67 | 68 | if "choose_int" in hyperparam: 69 | value_hyper = int(random.choice(re.search("\{(.+?)\}", hyperparam).group(1).split("|"))) 70 | hyper_found = True 71 | 72 | if "choose_float" in hyperparam: 73 | value_hyper = float(random.choice(re.search("\{(.+?)\}", hyperparam).group(1).split("|"))) 74 | hyper_found = True 75 | 76 | line_out = key + "=" + str(value_hyper) + "\n" 77 | 78 | if not hyper_found: 79 | line_out = line 80 | 81 | cfg_out.write(line_out) 82 | 83 | print("Done %s" % cfg_file_out) 84 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/rnnlm_compute_scores.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Compute scores from RNNLM. This script takes a directory 4 | # $dir (e.g. dir=local/rnnlm/rnnlm.voc30.hl30 ), 5 | # where it expects the files: 6 | # rnnlm wordlist.rnn unk.probs, 7 | # and also an input file location where it can get the sentences to score, and 8 | # an output file location to put the scores (negated logprobs) for each 9 | # sentence. This script uses the Kaldi-style "archive" format, so the input and 10 | # output files will have a first field that corresponds to some kind of 11 | # utterance-id or, in practice, utterance-id-1, utterance-id-2, etc., for the 12 | # N-best list. 13 | # 14 | # Here, "wordlist.rnn" is the set of words, like a vocabulary, 15 | # that the RNN was trained on (note, it won't include or ), 16 | # plus which is a kind of class where we put low-frequency 17 | # words; unk.probs gives the probs for words given this class, and it 18 | # has, on each line, "word prob". 19 | 20 | rnnlm_ver=rnnlm-0.3e 21 | 22 | . ./path.sh || exit 1; 23 | . utils/parse_options.sh 24 | 25 | rnnlm=$KALDI_ROOT/tools/$rnnlm_ver/rnnlm 26 | 27 | [ ! -f $rnnlm ] && echo No such program $rnnlm && exit 1; 28 | 29 | if [ $# != 4 ]; then 30 | echo "Usage: rnnlm_compute_scores.sh " 31 | exit 1; 32 | fi 33 | 34 | dir=$1 35 | tempdir=$2 36 | text_in=$3 37 | scores_out=$4 38 | 39 | for x in rnnlm wordlist.rnn unk.probs; do 40 | if [ ! -f $dir/$x ]; then 41 | echo "rnnlm_compute_scores.sh: expected file $dir/$x to exist." 42 | exit 1; 43 | fi 44 | done 45 | 46 | mkdir -p $tempdir 47 | cat $text_in | awk '{for (x=2;x<=NF;x++) {printf("%s ", $x)} printf("\n");}' >$tempdir/text 48 | cat $text_in | awk '{print $1}' > $tempdir/ids # e.g. utterance ids. 49 | cat $tempdir/text | awk -v voc=$dir/wordlist.rnn -v unk=$dir/unk.probs \ 50 | -v logprobs=$tempdir/loglikes.oov \ 51 | 'BEGIN{ while((getline0) { invoc[$1]=1; } while ((getline0){ unkprob[$1]=$2;} } 52 | { logprob=0; 53 | if (NF==0) { printf ""; logprob = log(1.0e-07); 54 | print "Warning: empty sequence." | "cat 1>&2"; } 55 | for (x=1;x<=NF;x++) { w=$x; 56 | if (invoc[w]) { printf("%s ",w); } else { 57 | printf(" "); 58 | if (unkprob[w] != 0) { logprob += log(unkprob[w]); } 59 | else { print "Warning: unknown word ", w | "cat 1>&2"; logprob += log(1.0e-07); }}} 60 | printf("\n"); print logprob > logprobs } ' > $tempdir/text.nounk 61 | 62 | # OK, now we compute the scores on the text with OOVs replaced 63 | # with 64 | 65 | if [ $rnnlm_ver == "faster-rnnlm" ]; then 66 | $rnnlm -independent -rnnlm $dir/rnnlm -test $tempdir/text.nounk -nbest -debug 0 | \ 67 | awk '{print $1*log(10);}' > $tempdir/loglikes.rnn 68 | else 69 | # add the utterance_id as required by Mikolove's rnnlm 70 | paste $tempdir/ids $tempdir/text.nounk > $tempdir/id_text.nounk 71 | 72 | $rnnlm -independent -rnnlm $dir/rnnlm -test $tempdir/id_text.nounk -nbest -debug 0 | \ 73 | awk '{print $1*log(10);}' > $tempdir/loglikes.rnn 74 | fi 75 | 76 | [ `cat $tempdir/loglikes.rnn | wc -l` -ne `cat $tempdir/loglikes.oov | wc -l` ] && \ 77 | echo "rnnlm rescoring failed" && exit 1; 78 | 79 | paste $tempdir/loglikes.rnn $tempdir/loglikes.oov | awk '{print -($1+$2);}' >$tempdir/scores 80 | 81 | # scores out, with utterance-ids. 82 | paste $tempdir/ids $tempdir/scores > $scores_out 83 | 84 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/decode_dnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # Copyright 2013 Yajie Miao Carnegie Mellon University 5 | # Apache 2.0 6 | 7 | # Decode the DNN model. The [srcdir] in this script should be the same as dir in 8 | # build_nnet_pfile.sh. Also, the DNN model has been trained and put in srcdir. 9 | # All these steps will be done automatically if you run the recipe file run-dnn.sh 10 | 11 | # Modified 2018 Mirco Ravanelli Univeristé de Montréal - Mila 12 | 13 | 14 | cfg_file=$1 15 | out_folder=$2 16 | 17 | 18 | 19 | # Reading the options in the cfg file 20 | source <(grep = $cfg_file | sed 's/ *= */=/g') 21 | 22 | cd $decoding_script_folder 23 | 24 | ./path.sh 25 | ./cmd.sh 26 | 27 | 28 | ## Begin configuration section 29 | num_threads=1 30 | stage=0 31 | cmd=utils/run.pl 32 | 33 | 34 | echo "$0 $@" # Print the command line for logging 35 | 36 | ./parse_options.sh || exit 1; 37 | 38 | if [ $# != 3 ]; then 39 | echo "Wrong #arguments ($#, expected 5)" 40 | echo "Usage: steps/decode_dnn.sh [options] " 41 | echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp/tri4_dnn/decode" 42 | echo "main options (for others, see top of script file)" 43 | echo " --stage # starts from which stage" 44 | echo " --nj # number of parallel jobs" 45 | echo " --cmd # command to run in parallel with" 46 | echo " --acwt # default 0.1 ... used to get posteriors" 47 | echo " --num-threads # number of threads to use, default 4." 48 | echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" 49 | echo " --scoring-opts # options to local/score.sh" 50 | exit 1; 51 | fi 52 | 53 | 54 | 55 | dir=`echo $out_folder | sed 's:/$::g'` # remove any trailing slash. 56 | featstring=$3 57 | srcdir=`dirname $dir`; # assume model directory one level up from decoding directory. 58 | sdata=$data/split$nj; 59 | 60 | thread_string= 61 | [ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 62 | 63 | 64 | mkdir -p $dir/log 65 | 66 | arr_ck=($(ls $featstring)) 67 | 68 | nj=${#arr_ck[@]} 69 | 70 | echo $nj > $dir/num_jobs 71 | 72 | # Some checks. Note: we don't need $srcdir/tree but we expect 73 | # it should exist, given the current structure of the scripts. 74 | for f in $graphdir/HCLG.fst $data/feats.scp; do 75 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 76 | done 77 | 78 | 79 | JOB=1 80 | for ck_data in "${arr_ck[@]}" 81 | do 82 | 83 | finalfeats="ark,s,cs: cat $ck_data |" 84 | latgen-faster-mapped$thread_string --min-active=$min_active --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt $alidir/final.mdl $graphdir/HCLG.fst "$finalfeats" "ark:|gzip -c > $dir/lat.$JOB.gz" &> $dir/log/decode.$JOB.log & 85 | JOB=$((JOB+1)) 86 | done 87 | wait 88 | 89 | 90 | 91 | # Copy the source model in order for scoring 92 | cp $alidir/final.mdl $srcdir 93 | 94 | 95 | if ! $skip_scoring ; then 96 | [ ! -x $scoring_script ] && \ 97 | echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1; 98 | $scoring_script $scoring_opts $data $graphdir $dir 99 | fi 100 | 101 | exit 0; 102 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/perturb_data_dir_speed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2013 Johns Hopkins University (author: Daniel Povey) 4 | # 2014 Tom Ko 5 | # Apache 2.0 6 | 7 | # This script operates on a directory, such as in data/train/, 8 | # that contains some subset of the following files: 9 | # wav.scp 10 | # spk2utt 11 | # utt2spk 12 | # text 13 | # 14 | # It generates the files which are used for perturbing the speed of the original data. 15 | 16 | . utils/parse_options.sh 17 | 18 | if [ $# != 3 ]; then 19 | echo "Usage: perturb_data_dir_speed.sh " 20 | echo "e.g.:" 21 | echo " $0 0.9 data/train_si284 data/train_si284p" 22 | exit 1 23 | fi 24 | 25 | export LC_ALL=C 26 | 27 | factor=$1 28 | srcdir=$2 29 | destdir=$3 30 | label="sp" 31 | spk_prefix=$label$factor"-" 32 | utt_prefix=$label$factor"-" 33 | 34 | #check is sox on the path 35 | which sox &>/dev/null 36 | ! [ $? -eq 0 ] && echo "sox: command not found" && exit 1; 37 | 38 | if [ ! -f $srcdir/utt2spk ]; then 39 | echo "$0: no such file $srcdir/utt2spk" 40 | exit 1; 41 | fi 42 | 43 | set -e; 44 | set -o pipefail 45 | 46 | mkdir -p $destdir 47 | 48 | cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/utt_map 49 | cat $srcdir/spk2utt | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/spk_map 50 | cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s%s %s\n", p, $1, $1);}' > $destdir/utt2uniq 51 | 52 | cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map | \ 53 | utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk 54 | 55 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt 56 | 57 | if [ -f $srcdir/segments ]; then 58 | # also apply the spk_prefix to the recording-ids. 59 | cat $srcdir/wav.scp | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/reco_map 60 | 61 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments | \ 62 | utils/apply_map.pl -f 2 $destdir/reco_map | \ 63 | awk -v factor=$factor \ 64 | '{printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);}' >$destdir/segments 65 | 66 | utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \ 67 | awk -v factor=$factor \ 68 | '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"} 69 | else {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp 70 | if [ -f $srcdir/reco2file_and_channel ]; then 71 | utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel 72 | fi 73 | 74 | rm $destdir/reco_map 2>/dev/null 75 | else # no segments->wav indexed by utterance. 76 | if [ -f $srcdir/wav.scp ]; then 77 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \ 78 | awk -v factor=$factor \ 79 | '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"} 80 | else {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp 81 | fi 82 | fi 83 | 84 | if [ -f $srcdir/text ]; then 85 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text 86 | fi 87 | if [ -f $srcdir/spk2gender ]; then 88 | utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender 89 | fi 90 | 91 | 92 | rm $destdir/spk_map $destdir/utt_map 2>/dev/null 93 | echo "$0: generated speed-perturbed version of data in $srcdir, in $destdir" 94 | utils/validate_data_dir.sh --no-feats $destdir 95 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/local/timit_prepare_dict.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2013 (Authors: Daniel Povey, Bagher BabaAli) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # Call this script from one level above, e.g. from the s3/ directory. It puts 19 | # its output in data/local/. 20 | 21 | # The parts of the output of this that will be needed are 22 | # [in data/local/dict/ ] 23 | # lexicon.txt 24 | # extra_questions.txt 25 | # nonsilence_phones.txt 26 | # optional_silence.txt 27 | # silence_phones.txt 28 | 29 | # run this from ../ 30 | srcdir=data/local/data 31 | dir=data/local/dict 32 | lmdir=data/local/nist_lm 33 | tmpdir=data/local/lm_tmp 34 | 35 | mkdir -p $dir $lmdir $tmpdir 36 | 37 | [ -f path.sh ] && . ./path.sh 38 | 39 | #(1) Dictionary preparation: 40 | 41 | # Make phones symbol-table (adding in silence and verbal and non-verbal noises at this point). 42 | # We are adding suffixes _B, _E, _S for beginning, ending, and singleton phones. 43 | 44 | # silence phones, one per line. 45 | echo sil > $dir/silence_phones.txt 46 | echo sil > $dir/optional_silence.txt 47 | 48 | # nonsilence phones; on each line is a list of phones that correspond 49 | # really to the same base phone. 50 | 51 | # Create the lexicon, which is just an identity mapping 52 | cut -d' ' -f2- $srcdir/train.text | tr ' ' '\n' | sort -u > $dir/phones.txt 53 | paste $dir/phones.txt $dir/phones.txt > $dir/lexicon.txt || exit 1; 54 | grep -v -F -f $dir/silence_phones.txt $dir/phones.txt > $dir/nonsilence_phones.txt 55 | 56 | # A few extra questions that will be added to those obtained by automatically clustering 57 | # the "real" phones. These ask about stress; there's also one for silence. 58 | cat $dir/silence_phones.txt| awk '{printf("%s ", $1);} END{printf "\n";}' > $dir/extra_questions.txt || exit 1; 59 | cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_)) { 60 | $p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \ 61 | >> $dir/extra_questions.txt || exit 1; 62 | 63 | # (2) Create the phone bigram LM 64 | if [ -z $IRSTLM ] ; then 65 | export IRSTLM=$KALDI_ROOT/tools/irstlm/ 66 | fi 67 | export PATH=${PATH}:$IRSTLM/bin 68 | if ! command -v prune-lm >/dev/null 2>&1 ; then 69 | echo "$0: Error: the IRSTLM is not available or compiled" >&2 70 | echo "$0: Error: We used to install it by default, but." >&2 71 | echo "$0: Error: this is no longer the case." >&2 72 | echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2 73 | echo "$0: Error: and run extras/install_irstlm.sh" >&2 74 | exit 1 75 | fi 76 | 77 | cut -d' ' -f2- $srcdir/train.text | sed -e 's:^: :' -e 's:$: :' \ 78 | > $srcdir/lm_train.text 79 | 80 | build-lm.sh -i $srcdir/lm_train.text -n 2 \ 81 | -o $tmpdir/lm_phone_bg.ilm.gz 82 | 83 | compile-lm $tmpdir/lm_phone_bg.ilm.gz -t=yes /dev/stdout | \ 84 | grep -v unk | gzip -c > $lmdir/lm_phone_bg.arpa.gz 85 | 86 | echo "Dictionary & language model preparation succeeded" 87 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/summarize_logs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 4 | 5 | #scalar(@ARGV) >= 1 && print STDERR "Usage: summarize_warnings.pl \n" && exit 1; 6 | 7 | sub split_hundreds { # split list of filenames into groups of 100. 8 | my $names = shift @_; 9 | my @A = split(" ", $names); 10 | my @ans = (); 11 | while (@A > 0) { 12 | my $group = ""; 13 | for ($x = 0; $x < 100 && @A>0; $x++) { 14 | $fname = pop @A; 15 | $group .= "$fname "; 16 | } 17 | push @ans, $group; 18 | } 19 | return @ans; 20 | } 21 | 22 | sub parse_accounting_entry { 23 | $entry= shift @_; 24 | 25 | @elems = split " ", $entry; 26 | 27 | $time=undef; 28 | $threads=undef; 29 | foreach $elem (@elems) { 30 | if ( $elem=~ m/time=(\d+)/ ) { 31 | $elem =~ s/time=(\d+)/$1/; 32 | $time = $elem; 33 | } elsif ( $elem=~ m/threads=(\d+)/ ) { 34 | $elem =~ s/threads=(\d+)/$1/g; 35 | $threads = $elem; 36 | } else { 37 | die "Unknown entry \"$elem\" when parsing \"$entry\" \n"; 38 | } 39 | } 40 | 41 | if (defined($time) and defined($threads) ) { 42 | return ($time, $threads); 43 | } else { 44 | die "The accounting entry \"$entry\" did not contain all necessary attributes"; 45 | } 46 | } 47 | 48 | foreach $dir (@ARGV) { 49 | 50 | #$dir = $ARGV[0]; 51 | print $dir 52 | 53 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" ; 54 | 55 | $dir =~ s:/$::; # Remove trailing slash. 56 | 57 | 58 | # Group the files into categories where all have the same base-name. 59 | foreach $f (glob ("$dir/*.log")) { 60 | $f_category = $f; 61 | # do next expression twice; s///g doesn't work as they overlap. 62 | $f_category =~ s:\.\d+\.(?!\d+):.*.:; 63 | #$f_category =~ s:\.\d+\.:.*.:; 64 | $fmap{$f_category} .= " $f"; 65 | } 66 | } 67 | 68 | foreach $c (sort (keys %fmap) ) { 69 | $n = 0; 70 | foreach $fgroup (split_hundreds($fmap{$c})) { 71 | $n += `grep -w WARNING $fgroup | wc -l`; 72 | } 73 | if ($n != 0) { 74 | print "$n warnings in $c\n" 75 | } 76 | } 77 | foreach $c (sort (keys %fmap)) { 78 | $n = 0; 79 | foreach $fgroup (split_hundreds($fmap{$c})) { 80 | $n += `grep -w ERROR $fgroup | wc -l`; 81 | } 82 | if ($n != 0) { 83 | print "$n errors in $c\n" 84 | } 85 | } 86 | 87 | $supertotal_cpu_time=0.0; 88 | $supertotal_clock_time=0.0; 89 | $supertotal_threads=0.0; 90 | 91 | foreach $c (sort (keys %fmap)) { 92 | $n = 0; 93 | 94 | $total_cpu_time=0.0; 95 | $total_clock_time=0.0; 96 | $total_threads=0.0; 97 | foreach $fgroup (split_hundreds($fmap{$c})) { 98 | $lines=`grep -a "# Accounting: " $fgroup |sed 's/.* Accounting: *//g'`; 99 | 100 | #print $lines ."\n"; 101 | 102 | @entries = split "\n", $lines; 103 | 104 | foreach $line (@entries) { 105 | $time, $threads = parse_accounting_entry($line); 106 | 107 | $total_cpu_time += $time * $threads; 108 | $total_threads += $threads; 109 | if ( $time > $total_clock_time ) { 110 | $total_clock_time = $time; 111 | } 112 | } 113 | } 114 | print "total_cpu_time=$total_cpu_time clock_time=$total_clock_time total_threads=$total_threads group=$c\n"; 115 | 116 | $supertotal_cpu_time += $total_cpu_time; 117 | $supertotal_clock_time += $total_clock_time; 118 | $supertotal_threads += $total_threads; 119 | } 120 | print "total_cpu_time=$supertotal_cpu_time clock_time=$supertotal_clock_time total_threads=$supertotal_threads group=all\n"; 121 | 122 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/local/timit_norm_trans.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # Copyright 2012 Arnab Ghoshal 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | 19 | # This script normalizes the TIMIT phonetic transcripts that have been 20 | # extracted in a format where each line contains an utterance ID followed by 21 | # the transcript, e.g.: 22 | # fcke0_si1111 h# hh ah dx ux w iy dcl d ix f ay n ih q h# 23 | 24 | my $usage = "Usage: timit_norm_trans.pl -i transcript -m phone_map -from [60|48] -to [48|39] > normalized\n 25 | Normalizes phonetic transcriptions for TIMIT, by mapping the phones to a 26 | smaller set defined by the -m option. This script assumes that the mapping is 27 | done in the \"standard\" fashion, i.e. to 48 or 39 phones. The input is 28 | assumed to have 60 phones (+1 for glottal stop, which is deleted), but that can 29 | be changed using the -from option. The input format is assumed to be utterance 30 | ID followed by transcript on the same line.\n"; 31 | 32 | use strict; 33 | use Getopt::Long; 34 | die "$usage" unless(@ARGV >= 1); 35 | my ($in_trans, $phone_map, $num_phones_out); 36 | my $num_phones_in = 60; 37 | GetOptions ("i=s" => \$in_trans, # Input transcription 38 | "m=s" => \$phone_map, # File containing phone mappings 39 | "from=i" => \$num_phones_in, # Input #phones: must be 60 or 48 40 | "to=i" => \$num_phones_out ); # Output #phones: must be 48 or 39 41 | 42 | die $usage unless(defined($in_trans) && defined($phone_map) && 43 | defined($num_phones_out)); 44 | if ($num_phones_in != 60 && $num_phones_in != 48) { 45 | die "Can only used 60 or 48 for -from (used $num_phones_in)." 46 | } 47 | if ($num_phones_out != 48 && $num_phones_out != 39) { 48 | die "Can only used 48 or 39 for -to (used $num_phones_out)." 49 | } 50 | unless ($num_phones_out < $num_phones_in) { 51 | die "Argument to -from ($num_phones_in) must be greater than that to -to ($num_phones_out)." 52 | } 53 | 54 | 55 | open(M, "<$phone_map") or die "Cannot open mappings file '$phone_map': $!"; 56 | my (%phonemap, %seen_phones); 57 | my $num_seen_phones = 0; 58 | while () { 59 | chomp; 60 | next if ($_ =~ /^q\s*.*$/); # Ignore glottal stops. 61 | m:^(\S+)\s+(\S+)\s+(\S+)$: or die "Bad line: $_"; 62 | my $mapped_from = ($num_phones_in == 60)? $1 : $2; 63 | my $mapped_to = ($num_phones_out == 48)? $2 : $3; 64 | if (!defined($seen_phones{$mapped_to})) { 65 | $seen_phones{$mapped_to} = 1; 66 | $num_seen_phones += 1; 67 | } 68 | $phonemap{$mapped_from} = $mapped_to; 69 | } 70 | if ($num_seen_phones != $num_phones_out) { 71 | die "Trying to map to $num_phones_out phones, but seen only $num_seen_phones"; 72 | } 73 | 74 | open(T, "<$in_trans") or die "Cannot open transcription file '$in_trans': $!"; 75 | while () { 76 | chomp; 77 | $_ =~ m:^(\S+)\s+(.+): or die "Bad line: $_"; 78 | my $utt_id = $1; 79 | my $trans = $2; 80 | 81 | $trans =~ s/q//g; # Remove glottal stops. 82 | $trans =~ s/^\s*//; $trans =~ s/\s*$//; # Normalize spaces 83 | 84 | print $utt_id; 85 | for my $phone (split(/\s+/, $trans)) { 86 | if(exists $phonemap{$phone}) { print " $phonemap{$phone}"; } 87 | if(not exists $phonemap{$phone}) { print " $phone"; } 88 | } 89 | print "\n"; 90 | } 91 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/local/score_wsj.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) 3 | # Apache 2.0 4 | 5 | [ -f ./path.sh ] && . ./path.sh 6 | 7 | # begin configuration section. 8 | cmd=run.pl 9 | stage=0 10 | decode_mbr=true 11 | reverse=false 12 | word_ins_penalty=0.0 13 | min_lmwt=5 14 | max_lmwt=20 15 | #end configuration section. 16 | 17 | [ -f ./path.sh ] && . ./path.sh 18 | . parse_options.sh || exit 1; 19 | 20 | if [ $# -ne 3 ]; then 21 | echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " 22 | echo " Options:" 23 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 24 | echo " --stage (0|1|2) # start scoring script from part-way through." 25 | echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." 26 | echo " --min_lmwt # minumum LM-weight for lattice rescoring " 27 | echo " --max_lmwt # maximum LM-weight for lattice rescoring " 28 | echo " --reverse (true/false) # score with time reversed features " 29 | exit 1; 30 | fi 31 | 32 | hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl 33 | [ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; 34 | hubdir=`dirname $hubscr` 35 | 36 | data=$1 37 | lang_or_graph=$2 38 | dir=$3 39 | 40 | symtab=$lang_or_graph/words.txt 41 | 42 | hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl 43 | [ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1; 44 | hubdir=`dirname $hubscr` 45 | 46 | 47 | for f in $symtab $dir/lat.1.gz $data/text; do 48 | [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; 49 | done 50 | 51 | mkdir -p $dir/scoring/log 52 | 53 | cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt 54 | 55 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ 56 | lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ 57 | lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ 58 | lattice-best-path --word-symbol-table=$symtab \ 59 | ark:- ark,t:$dir/scoring/LMWT.tra || exit 1; 60 | 61 | if $reverse; then 62 | for lmwt in `seq $min_lmwt $max_lmwt`; do 63 | mv $dir/scoring/$lmwt.tra $dir/scoring/$lmwt.tra.orig 64 | awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \ 65 | <$dir/scoring/$lmwt.tra.orig >$dir/scoring/$lmwt.tra 66 | done 67 | fi 68 | 69 | # Note: the double level of quoting for the sed command 70 | #$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ 71 | # cat $dir/scoring/LMWT.tra \| \ 72 | # utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ 73 | # compute-wer --text --mode=present \ 74 | # ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; 75 | 76 | 77 | # glm file 78 | echo ";; empty.glm" > $dir/scoring/glm 79 | echo " [FAKE] => %HESITATION / [ ] __ [ ] ;; hesitation token" >> $dir/scoring/glm 80 | echo "" >> $dir/scoring/glm 81 | 82 | 83 | # Creare scoring folders 84 | for lmwt in `seq $min_lmwt $max_lmwt`; do 85 | mkdir -p $dir/score_$lmwt/ 86 | done 87 | 88 | 89 | # ctm file (for sclite) 90 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ 91 | cat $dir/scoring/LMWT.tra \| \ 92 | utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' "|" awk '{for (i = 2; i <= NF; i++) {printf "%s 1 0.000 0.000 %s\n",$1,$i}}' "|" \ 93 | tr -d . ">&" $dir/score_LMWT/ctm || exit 1 94 | 95 | 96 | # Score the set... 97 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ 98 | $hubscr -p $hubdir -V -l english -h hub5 -g $dir/scoring/glm -r $data/stm $dir/score_LMWT/ctm || exit 1; 99 | 100 | 101 | exit 0; 102 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/sym2int.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | $ignore_oov = 0; 19 | 20 | for($x = 0; $x < 2; $x++) { 21 | if ($ARGV[0] eq "--map-oov") { 22 | shift @ARGV; 23 | $map_oov = shift @ARGV; 24 | if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") { 25 | # disallow '-f', the empty string and anything ending in words.txt as the 26 | # OOV symbol because these are likely command-line errors. 27 | die "the --map-oov option requires an argument"; 28 | } 29 | } 30 | if ($ARGV[0] eq "-f") { 31 | shift @ARGV; 32 | $field_spec = shift @ARGV; 33 | if ($field_spec =~ m/^\d+$/) { 34 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1; 35 | } 36 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) 37 | if ($1 ne "") { 38 | $field_begin = $1 - 1; # Change to zero-based indexing. 39 | } 40 | if ($2 ne "") { 41 | $field_end = $2 - 1; # Change to zero-based indexing. 42 | } 43 | } 44 | if (!defined $field_begin && !defined $field_end) { 45 | die "Bad argument to -f option: $field_spec"; 46 | } 47 | } 48 | } 49 | 50 | $symtab = shift @ARGV; 51 | if (!defined $symtab) { 52 | print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" . 53 | "options: [--map-oov ] [-f ]\n" . 54 | "note: can look like 4-5, or 4-, or 5-, or 1.\n"; 55 | } 56 | open(F, "<$symtab") || die "Error opening symbol table file $symtab"; 57 | while() { 58 | @A = split(" ", $_); 59 | @A == 2 || die "bad line in symbol table file: $_"; 60 | $sym2int{$A[0]} = $A[1] + 0; 61 | } 62 | 63 | if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up 64 | if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; } 65 | $map_oov = $sym2int{$map_oov}; 66 | } 67 | 68 | $num_warning = 0; 69 | $max_warning = 20; 70 | 71 | while (<>) { 72 | @A = split(" ", $_); 73 | @B = (); 74 | for ($n = 0; $n < @A; $n++) { 75 | $a = $A[$n]; 76 | if ( (!defined $field_begin || $n >= $field_begin) 77 | && (!defined $field_end || $n <= $field_end)) { 78 | $i = $sym2int{$a}; 79 | if (!defined ($i)) { 80 | if (defined $map_oov) { 81 | if ($num_warning++ < $max_warning) { 82 | print STDERR "sym2int.pl: replacing $a with $map_oov\n"; 83 | if ($num_warning == $max_warning) { 84 | print STDERR "sym2int.pl: not warning for OOVs any more times\n"; 85 | } 86 | } 87 | $i = $map_oov; 88 | } else { 89 | $pos = $n+1; 90 | die "sym2int.pl: undefined symbol $a (in position $pos)\n"; 91 | } 92 | } 93 | $a = $i; 94 | } 95 | push @B, $a; 96 | } 97 | print join(" ", @B); 98 | print "\n"; 99 | } 100 | if ($num_warning > 0) { 101 | print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n"; 102 | } 103 | 104 | exit(0); 105 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/copy_data_dir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2013 Johns Hopkins University (author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # This script operates on a directory, such as in data/train/, 7 | # that contains some subset of the following files: 8 | # feats.scp 9 | # wav.scp 10 | # spk2utt 11 | # utt2spk 12 | # text 13 | # 14 | # It copies to another directory, possibly adding a specified prefix or a suffix 15 | # to the utterance and/or speaker names. Note, the recording-ids stay the same. 16 | # 17 | 18 | 19 | # begin configuration section 20 | spk_prefix= 21 | utt_prefix= 22 | spk_suffix= 23 | utt_suffix= 24 | validate_opts= # should rarely be needed. 25 | # end configuration section 26 | 27 | . utils/parse_options.sh 28 | 29 | if [ $# != 2 ]; then 30 | echo "Usage: " 31 | echo " $0 [options] " 32 | echo "e.g.:" 33 | echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1" 34 | echo "Options" 35 | echo " --spk-prefix= # Prefix for speaker ids, default empty" 36 | echo " --utt-prefix= # Prefix for utterance ids, default empty" 37 | echo " --spk-suffix= # Suffix for speaker ids, default empty" 38 | echo " --utt-suffix= # Suffix for utterance ids, default empty" 39 | exit 1; 40 | fi 41 | 42 | 43 | export LC_ALL=C 44 | 45 | srcdir=$1 46 | destdir=$2 47 | 48 | if [ ! -f $srcdir/utt2spk ]; then 49 | echo "copy_data_dir.sh: no such file $srcdir/utt2spk" 50 | exit 1; 51 | fi 52 | 53 | set -e; 54 | 55 | mkdir -p $destdir 56 | 57 | cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map 58 | cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map 59 | 60 | if [ ! -f $srcdir/utt2uniq ]; then 61 | if [[ ! -z $utt_prefix || ! -z $utt_suffix ]]; then 62 | cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq 63 | fi 64 | else 65 | cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq 66 | fi 67 | 68 | cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map | \ 69 | utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk 70 | 71 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt 72 | 73 | if [ -f $srcdir/feats.scp ]; then 74 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp 75 | fi 76 | 77 | 78 | if [ -f $srcdir/segments ]; then 79 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments 80 | cp $srcdir/wav.scp $destdir 81 | if [ -f $srcdir/reco2file_and_channel ]; then 82 | cp $srcdir/reco2file_and_channel $destdir/ 83 | fi 84 | else # no segments->wav indexed by utt. 85 | if [ -f $srcdir/wav.scp ]; then 86 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp 87 | fi 88 | fi 89 | 90 | if [ -f $srcdir/text ]; then 91 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text 92 | fi 93 | if [ -f $srcdir/spk2gender ]; then 94 | utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender 95 | fi 96 | if [ -f $srcdir/cmvn.scp ]; then 97 | utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp 98 | fi 99 | for f in stm glm ctm; do 100 | if [ -f $srcdir/$f ]; then 101 | cp $srcdir/$f $destdir 102 | fi 103 | done 104 | 105 | rm $destdir/spk_map $destdir/utt_map 106 | 107 | echo "$0: copied data from $srcdir to $destdir" 108 | 109 | [ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats" 110 | [ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text" 111 | 112 | utils/validate_data_dir.sh $validate_opts $destdir 113 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/parse_options.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey); 4 | # Arnab Ghoshal, Karel Vesely 5 | 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 15 | # MERCHANTABLITY OR NON-INFRINGEMENT. 16 | # See the Apache 2 License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | 20 | # Parse command-line options. 21 | # To be sourced by another script (as in ". parse_options.sh"). 22 | # Option format is: --option-name arg 23 | # and shell variable "option_name" gets set to value "arg." 24 | # The exception is --help, which takes no arguments, but prints the 25 | # $help_message variable (if defined). 26 | 27 | 28 | ### 29 | ### The --config file options have lower priority to command line 30 | ### options, so we need to import them first... 31 | ### 32 | 33 | # Now import all the configs specified by command-line, in left-to-right order 34 | for ((argpos=1; argpos<$#; argpos++)); do 35 | if [ "${!argpos}" == "--config" ]; then 36 | argpos_plus1=$((argpos+1)) 37 | config=${!argpos_plus1} 38 | [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1 39 | . $config # source the config file. 40 | fi 41 | done 42 | 43 | 44 | ### 45 | ### No we process the command line options 46 | ### 47 | while true; do 48 | [ -z "${1:-}" ] && break; # break if there are no arguments 49 | case "$1" in 50 | # If the enclosing script is called with --help option, print the help 51 | # message and exit. Scripts should put help messages in $help_message 52 | --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2; 53 | else printf "$help_message\n" 1>&2 ; fi; 54 | exit 0 ;; 55 | --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'" 56 | exit 1 ;; 57 | # If the first command-line argument begins with "--" (e.g. --foo-bar), 58 | # then work out the variable name as $name, which will equal "foo_bar". 59 | --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; 60 | # Next we test whether the variable in question is undefned-- if so it's 61 | # an invalid option and we die. Note: $0 evaluates to the name of the 62 | # enclosing script. 63 | # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar 64 | # is undefined. We then have to wrap this test inside "eval" because 65 | # foo_bar is itself inside a variable ($name). 66 | eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1; 67 | 68 | oldval="`eval echo \\$$name`"; 69 | # Work out whether we seem to be expecting a Boolean argument. 70 | if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then 71 | was_bool=true; 72 | else 73 | was_bool=false; 74 | fi 75 | 76 | # Set the variable to the right value-- the escaped quotes make it work if 77 | # the option had spaces, like --cmd "queue.pl -sync y" 78 | eval $name=\"$2\"; 79 | 80 | # Check that Boolean-valued arguments are really Boolean. 81 | if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then 82 | echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2 83 | exit 1; 84 | fi 85 | shift 2; 86 | ;; 87 | *) break; 88 | esac 89 | done 90 | 91 | 92 | # Check for an empty argument to the --cmd option, which can easily occur as a 93 | # result of scripting errors. 94 | [ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1; 95 | 96 | 97 | true; # so this script returns exit code 0. 98 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/parse_options.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey); 4 | # Arnab Ghoshal, Karel Vesely 5 | 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 15 | # MERCHANTABLITY OR NON-INFRINGEMENT. 16 | # See the Apache 2 License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | 20 | # Parse command-line options. 21 | # To be sourced by another script (as in ". parse_options.sh"). 22 | # Option format is: --option-name arg 23 | # and shell variable "option_name" gets set to value "arg." 24 | # The exception is --help, which takes no arguments, but prints the 25 | # $help_message variable (if defined). 26 | 27 | 28 | ### 29 | ### The --config file options have lower priority to command line 30 | ### options, so we need to import them first... 31 | ### 32 | 33 | # Now import all the configs specified by command-line, in left-to-right order 34 | for ((argpos=1; argpos<$#; argpos++)); do 35 | if [ "${!argpos}" == "--config" ]; then 36 | argpos_plus1=$((argpos+1)) 37 | config=${!argpos_plus1} 38 | [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1 39 | . $config # source the config file. 40 | fi 41 | done 42 | 43 | 44 | ### 45 | ### No we process the command line options 46 | ### 47 | while true; do 48 | [ -z "${1:-}" ] && break; # break if there are no arguments 49 | case "$1" in 50 | # If the enclosing script is called with --help option, print the help 51 | # message and exit. Scripts should put help messages in $help_message 52 | --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2; 53 | else printf "$help_message\n" 1>&2 ; fi; 54 | exit 0 ;; 55 | --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'" 56 | exit 1 ;; 57 | # If the first command-line argument begins with "--" (e.g. --foo-bar), 58 | # then work out the variable name as $name, which will equal "foo_bar". 59 | --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; 60 | # Next we test whether the variable in question is undefned-- if so it's 61 | # an invalid option and we die. Note: $0 evaluates to the name of the 62 | # enclosing script. 63 | # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar 64 | # is undefined. We then have to wrap this test inside "eval" because 65 | # foo_bar is itself inside a variable ($name). 66 | eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1; 67 | 68 | oldval="`eval echo \\$$name`"; 69 | # Work out whether we seem to be expecting a Boolean argument. 70 | if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then 71 | was_bool=true; 72 | else 73 | was_bool=false; 74 | fi 75 | 76 | # Set the variable to the right value-- the escaped quotes make it work if 77 | # the option had spaces, like --cmd "queue.pl -sync y" 78 | eval $name=\"$2\"; 79 | 80 | # Check that Boolean-valued arguments are really Boolean. 81 | if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then 82 | echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2 83 | exit 1; 84 | fi 85 | shift 2; 86 | ;; 87 | *) break; 88 | esac 89 | done 90 | 91 | 92 | # Check for an empty argument to the --cmd option, which can easily occur as a 93 | # result of scripting errors. 94 | [ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1; 95 | 96 | 97 | true; # so this script returns exit code 0. 98 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/reverse_lm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2012 Brno University of Technology (Author: Mirko Hannemann) 4 | # JHU (Author: Dan Povey) 5 | # Apache 2.0 6 | 7 | # configuration section 8 | tmpdir=data/local/lm_tmp # only for OOVs and checks 9 | lexicon=data/local/lang_tmp.reverse/lexicon.txt # only for checks 10 | # end config section 11 | 12 | mkdir -p $tmpdir 13 | 14 | echo "$0 $@" # Print the command line for logging 15 | 16 | [ -f ./path.sh ] && . ./path.sh; # source the path. 17 | . parse_options.sh || exit 1; 18 | 19 | if [ $# != 3 ]; then 20 | echo "Usage: utils/reverse_lm.sh [options] " 21 | echo "e.g.: utils/reverse_lm.sh data/local/nist_lm/lm_tgpr_5k.arpa.gz data/lang.reverse data/lang_test_tgpr_5k.reverse" 22 | echo "... where files from are copied into " 23 | echo "options:" 24 | echo " --lexicon reversed lexicon (only for checks)" 25 | exit 1; 26 | fi 27 | 28 | lm=$1 # gzipped arpa file 29 | langdir=$2 30 | outdir=$3 # output directory 31 | 32 | # create the corresponding FST for the language model 33 | # and the corresponding lang_test_* directory. 34 | 35 | echo Preparing reverse language model from $lm into $outdir 36 | echo "Finding OOVs and strange silences" 37 | mkdir -p $outdir 38 | for f in phones.txt words.txt L.fst L_disambig.fst phones/; do 39 | cp -r $langdir/$f $outdir 40 | done 41 | gunzip -c $lm | utils/find_arpa_oovs.pl $outdir/words.txt > $tmpdir/oovs.txt 42 | 43 | # grep -v ' ' because the LM seems to have some strange and useless 44 | # stuff in it with multiple 's in the history. Encountered some other similar 45 | # things in a LM from Geoff. Removing all "illegal" combinations of and , 46 | # which are supposed to occur only at being/end of utt. These can cause 47 | # determinization failures of CLG [ends up being epsilon cycles]. 48 | gunzip -c $lm | \ 49 | grep -v ' ' | \ 50 | grep -v ' ' | \ 51 | grep -v ' ' > $outdir/forward.arpa 52 | echo "Mapping ARPA to reverse ARPA" 53 | python utils/reverse_arpa.py $outdir/forward.arpa > $outdir/reverse.arpa 54 | arpa2fst $outdir/reverse.arpa | fstprint | \ 55 | grep -v "230258.5" | \ 56 | utils/remove_oovs.pl $tmpdir/oovs.txt | \ 57 | utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$outdir/words.txt \ 58 | --osymbols=$outdir/words.txt --keep_isymbols=false --keep_osymbols=false \ 59 | | fstrmepsilon > $outdir/G_org.fst 60 | #--arc_type=log 61 | 62 | echo "Push weights to make it stochastic (log semi-ring)" 63 | # delta must be very small otherwise weight pushing won't succeed 64 | #fstpush --push_weights=true --push_labels=true --delta=1E-7 $outdir/G_log.fst >$outdir/G_log_pushed.fst 65 | fstpushspecial --delta=1E-5 $outdir/G_org.fst |\ 66 | fstarcsort --sort_type=ilabel >$outdir/G.fst 67 | 68 | fstisstochastic $outdir/G.fst 69 | # The output is like: 70 | # 9.14233e-05 -0.259833 71 | # we do expect the first of these 2 numbers to be close to zero (the second is 72 | # nonzero because the backoff weights make the states sum to >1). 73 | # Because of the fiasco for these particular LMs, the first number is not 74 | # as close to zero as it could be. 75 | 76 | # Everything below is only for diagnostic. 77 | # Checking that G has no cycles with empty words on them (e.g. , ); 78 | # this might cause determinization failure of CLG. 79 | # #0 is treated as an empty word. 80 | 81 | if [ -f $lexicon ]; then 82 | mkdir -p $tmpdir/g 83 | awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \ 84 | < "$lexicon" >$tmpdir/g/select_empty.fst.txt 85 | fstcompile --isymbols=$outdir/words.txt --osymbols=$outdir/words.txt $tmpdir/g/select_empty.fst.txt | \ 86 | fstarcsort --sort_type=olabel | fstcompose - $outdir/G.fst > $tmpdir/g/empty_words.fst 87 | fstinfo $tmpdir/g/empty_words.fst | grep cyclic | grep -w 'y' && 88 | echo "Language model has cycles with empty words" && exit 1 89 | rm -r $tmpdir/g 90 | fi 91 | echo "Succeeded in creating reversed language model." 92 | rm -r $tmpdir 93 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/nnet/make_lstm_proto.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2015 Brno University of Technology (author: Karel Vesely) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # Generated Nnet prototype, to be initialized by 'nnet-initialize'. 19 | from __future__ import print_function 20 | 21 | import sys 22 | 23 | ### 24 | ### Parse options 25 | ### 26 | from optparse import OptionParser 27 | 28 | usage = "%prog [options] >nnet-proto-file" 29 | parser = OptionParser(usage) 30 | # 31 | parser.add_option( 32 | "--num-cells", dest="num_cells", type="int", default=800, help="Number of LSTM cells [default: %default]" 33 | ) 34 | parser.add_option( 35 | "--num-recurrent", 36 | dest="num_recurrent", 37 | type="int", 38 | default=512, 39 | help="Number of LSTM recurrent units [default: %default]", 40 | ) 41 | parser.add_option( 42 | "--num-layers", dest="num_layers", type="int", default=2, help="Number of LSTM layers [default: %default]" 43 | ) 44 | parser.add_option( 45 | "--lstm-stddev-factor", 46 | dest="lstm_stddev_factor", 47 | type="float", 48 | default=0.01, 49 | help="Standard deviation of initialization [default: %default]", 50 | ) 51 | parser.add_option( 52 | "--param-stddev-factor", 53 | dest="param_stddev_factor", 54 | type="float", 55 | default=0.04, 56 | help="Standard deviation in output layer [default: %default]", 57 | ) 58 | parser.add_option( 59 | "--clip-gradient", 60 | dest="clip_gradient", 61 | type="float", 62 | default=5.0, 63 | help="Clipping constant applied to gradients [default: %default]", 64 | ) 65 | # 66 | (o, args) = parser.parse_args() 67 | if len(args) != 2: 68 | parser.print_help() 69 | sys.exit(1) 70 | 71 | (feat_dim, num_leaves) = list(map(int, args)) 72 | 73 | # Original prototype from Jiayu, 74 | # 75 | # 40 40 76 | # 40 512 800 0.01 4 77 | # 512 8000 0.000000 0.000000 0.04 78 | # 8000 8000 79 | # 80 | 81 | print("") 82 | # normally we won't use more than 2 layers of LSTM 83 | if o.num_layers == 1: 84 | print( 85 | " %d %d %s %f %f" 86 | % (feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient) 87 | ) 88 | elif o.num_layers == 2: 89 | print( 90 | " %d %d %s %f %f" 91 | % (feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient) 92 | ) 93 | print( 94 | " %d %d %s %f %f" 95 | % (o.num_recurrent, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient) 96 | ) 97 | else: 98 | sys.stderr.write("make_lstm_proto.py ERROR: more than 2 layers of LSTM, not supported yet.\n") 99 | sys.exit(1) 100 | print( 101 | " %d %d 0.0 0.0 %f" 102 | % (o.num_recurrent, num_leaves, o.param_stddev_factor) 103 | ) 104 | print(" %d %d" % (num_leaves, num_leaves)) 105 | print("") 106 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/subset_data_dir_tr_cv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2013 Hong Kong University of Science and Technology (Author: Ricky Chan Ho Yin); 3 | # Brno University of Technology (Author: Karel Vesely); 4 | # Johns Hopkins University (Author: Daniel Povey); 5 | # Apache 2.0 6 | 7 | # This script splits dataset to two parts : 8 | # training set from (100-P)% of speakers/utterances and 9 | # held-out set (or cross-validation) from P% of remaining speakers/remaining utterances, 10 | # which will be later on used for neural network training 11 | # 12 | # There are two options for choosing held-out (or cross-validation) set, either by 13 | # --cv-spk-percent P , which will give you CV set based on random chosen P% of speakers, or 14 | # --cv-utt-percent P , which will give you CV set based on last P% utterances in the dataset 15 | # 16 | # If you don't apply the above two options, by default the script will use --cv-utt-percent option, 17 | # and the default cross validation percentage portion is equal to 10% (i.e. P=10) 18 | # 19 | # The --cv-spk-percent option is useful if you would like to have subset chosen from random speakers order, 20 | # especially for the cases where dataset contains multiple different corpora, 21 | # where type of speakers or recording channels may be quite different 22 | 23 | # Begin configuration. 24 | cv_spk_percent= # % of speakers is parsed by option 25 | cv_utt_percent=10 # default 10% of total utterances 26 | seed=777 # use seed for speaker shuffling 27 | # End configuration. 28 | 29 | echo "$0 $@" # Print the command line for logging 30 | 31 | uttbase=true; # by default, we choose last 10% utterances for CV 32 | 33 | if [ "$1" == "--cv-spk-percent" ]; then 34 | uttbase=false; 35 | spkbase=true; 36 | fi 37 | 38 | [ -f path.sh ] && . ./path.sh; 39 | 40 | . parse_options.sh || exit 1; 41 | 42 | if [ $# != 3 ]; then 43 | echo "Usage: $0 [--cv-spk-percent P|--cv-utt-percent P] " 44 | echo " --cv-spk-percent P Cross Validation portion of the total speakers, recommend value is 10% (i.e. P=10)" 45 | echo " --cv-utt-percent P Cross Validation portion of the total utterances, default is 10% (i.e. P=10)" 46 | echo " " 47 | exit 1; 48 | fi 49 | 50 | srcdir=$1 51 | trndir=$2 52 | cvdir=$3 53 | 54 | ## use simple last P% utterance for CV 55 | if $uttbase; then 56 | if [ ! -f $srcdir/utt2spk ]; then 57 | echo "$0: no such file $srcdir/utt2spk" 58 | exit 1; 59 | fi 60 | 61 | #total number of lines 62 | N=$(cat $srcdir/utt2spk | wc -l) 63 | #get line number where (100-P)% of the data lies 64 | P_utt=$((N * cv_utt_percent / 100)) 65 | N_head=$((N -P_utt)) 66 | #move the boundary so it is located on speaker change 67 | N_head=$(cat $srcdir/utt2spk | uniq -f1 -c | awk '{ if(n+$1<='$N_head') { n += $1 } else { nextfile } } END{ print n }') 68 | #the rest of the data will be that big 69 | N_tail=$((N-N_head)) 70 | 71 | #now call the subset_data_dir.sh and fix the directories 72 | subset_data_dir.sh --first $srcdir $N_head $trndir 73 | subset_data_dir.sh --last $srcdir $N_tail $cvdir 74 | 75 | exit 0; 76 | fi 77 | 78 | ## use random chosen P% speakers for CV 79 | if [ ! -f $srcdir/spk2utt ]; then 80 | echo "$0: no such file $srcdir/spk2utt" 81 | exit 1; 82 | fi 83 | 84 | #total, cv, train number of speakers 85 | N=$(cat $srcdir/spk2utt | wc -l) 86 | N_spk_cv=$((N * cv_spk_percent / 100)) 87 | N_spk_trn=$((N - N_spk_cv)) 88 | 89 | mkdir -p $cvdir $trndir 90 | 91 | #shuffle the speaker list 92 | awk '{print $1}' $srcdir/spk2utt | shuffle_list.pl --srand $seed > $trndir/_tmpf_randspk 93 | 94 | #split the train/cv 95 | head -n $N_spk_cv $trndir/_tmpf_randspk > $cvdir/_tmpf_cvspk 96 | tail -n $N_spk_trn $trndir/_tmpf_randspk > $trndir/_tmpf_trainspk 97 | 98 | #now call the subset_data_dir.sh 99 | subset_data_dir.sh --spk-list $trndir/_tmpf_trainspk $srcdir $trndir 100 | subset_data_dir.sh --spk-list $cvdir/_tmpf_cvspk $srcdir $cvdir 101 | 102 | #clean-up 103 | rm -f $trndir/_tmpf_randspk $trndir/_tmpf_trainspk $cvdir/_tmpf_cvspk 104 | 105 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/map_arpa_lm.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2014 Guoguo Chen 4 | # 2014 Johns Hopkins University (author: Daniel Povey) 5 | # Apache 2.0. 6 | # 7 | use strict; 8 | use warnings; 9 | use Getopt::Long; 10 | 11 | my $Usage = < < input-arpa >output-arpa 22 | e.g.: utils/map_arpa_lm.pl words.txt arpa_lm.int 23 | 24 | Allowed options: 25 | --sym2int : If true, maps words to integers, other wise maps integers to 26 | words. (boolean, default = true) 27 | 28 | EOU 29 | 30 | my $sym2int = "true"; 31 | GetOptions('sym2int=s' => \$sym2int); 32 | 33 | ($sym2int eq "true" || $sym2int eq "false") || 34 | die "$0: Bad value for option --sym2int\n"; 35 | 36 | if (@ARGV != 1) { 37 | die $Usage; 38 | } 39 | 40 | # Gets parameters. 41 | my $symtab = shift @ARGV; 42 | my $arpa_in = shift @ARGV; 43 | my $arpa_out = shift @ARGV; 44 | 45 | # Opens files. 46 | open(M, "<$symtab") || die "$0: Fail to open $symtab\n"; 47 | 48 | # Reads in the mapper. 49 | my %mapper; 50 | while () { 51 | chomp; 52 | my @col = split(/[\s]+/, $_); 53 | @col == 2 || die "$0: Bad line in mapper file \"$_\"\n"; 54 | if ($sym2int eq "true") { 55 | if (defined($mapper{$col[0]})) { 56 | die "$0: Duplicate entry \"$col[0]\"\n"; 57 | } 58 | $mapper{$col[0]} = $col[1]; 59 | } else { 60 | if (defined($mapper{$col[1]})) { 61 | die "$0: Duplicate entry \"$col[1]\"\n"; 62 | } 63 | $mapper{$col[1]} = $col[0]; 64 | } 65 | } 66 | 67 | my $num_oov_lines = 0; 68 | my $max_oov_warn = 20; 69 | 70 | # Parses Arpa n-gram language model. 71 | my $arpa = ""; 72 | my $current_order = -1; 73 | my %head_ngram_count; 74 | my %actual_ngram_count; 75 | while () { 76 | chomp; 77 | my @col = split(" ", $_); 78 | 79 | if ($current_order == -1 and ! m/^\\data\\$/) { 80 | next; 81 | } 82 | 83 | if (m/^\\data\\$/) { 84 | print STDERR "$0: Processing \"\\data\\\"\n"; 85 | print "$_\n"; 86 | $current_order = 0; 87 | } elsif (m/^\\[0-9]*-grams:$/) { 88 | $current_order = $_; 89 | $current_order =~ s/-grams:$//g; 90 | $current_order =~ s/^\\//g; 91 | print "$_\n"; 92 | print STDERR "$0: Processing \"\\$current_order-grams:\\\"\n"; 93 | } elsif (m/^\\end\\/) { 94 | print "$_\n"; 95 | } elsif ($_ eq "") { 96 | if ($current_order >= 1) { 97 | print "\n"; 98 | } 99 | } else { 100 | if ($current_order == 0) { 101 | # echo head section. 102 | print "$_\n"; 103 | } else { 104 | # Parses n-gram section. 105 | if (@col > 2 + $current_order || @col < 1 + $current_order) { 106 | die "$0: Bad line in arpa lm \"$_\"\n"; 107 | } 108 | my $prob = shift @col; 109 | my $is_oov = 0; 110 | for (my $i = 0; $i < $current_order; $i++) { 111 | my $temp = $mapper{$col[$i]}; 112 | if (!defined($temp)) { 113 | $is_oov = 1; 114 | $num_oov_lines++; 115 | last; 116 | } else { 117 | $col[$i] = $temp; 118 | } 119 | } 120 | if (!$is_oov) { 121 | my $rest_of_line = join(" ", @col); 122 | print "$prob\t$rest_of_line\n"; 123 | } else { 124 | if ($num_oov_lines < $max_oov_warn) { 125 | print STDERR "$0: Warning: OOV line $_\n"; 126 | } 127 | } 128 | } 129 | } 130 | } 131 | 132 | if ($num_oov_lines > 0) { 133 | print STDERR "$0: $num_oov_lines lines of the Arpa file contained OOVs and "; 134 | print STDERR "were not printed.\n"; 135 | } 136 | 137 | close(M); 138 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/nnet/make_blstm_proto.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2015 Brno University of Technology (author: Karel Vesely) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # Generated Nnet prototype, to be initialized by 'nnet-initialize'. 19 | from __future__ import print_function 20 | 21 | import sys 22 | 23 | ### 24 | ### Parse options 25 | ### 26 | from optparse import OptionParser 27 | 28 | usage = "%prog [options] >nnet-proto-file" 29 | parser = OptionParser(usage) 30 | # 31 | parser.add_option( 32 | "--num-cells", dest="num_cells", type="int", default=800, help="Number of LSTM cells [default: %default]" 33 | ) 34 | parser.add_option( 35 | "--num-recurrent", 36 | dest="num_recurrent", 37 | type="int", 38 | default=512, 39 | help="Number of LSTM recurrent units [default: %default]", 40 | ) 41 | parser.add_option( 42 | "--num-layers", dest="num_layers", type="int", default=2, help="Number of LSTM layers [default: %default]" 43 | ) 44 | parser.add_option( 45 | "--lstm-stddev-factor", 46 | dest="lstm_stddev_factor", 47 | type="float", 48 | default=0.01, 49 | help="Standard deviation of initialization [default: %default]", 50 | ) 51 | parser.add_option( 52 | "--param-stddev-factor", 53 | dest="param_stddev_factor", 54 | type="float", 55 | default=0.04, 56 | help="Standard deviation in output layer [default: %default]", 57 | ) 58 | parser.add_option( 59 | "--clip-gradient", 60 | dest="clip_gradient", 61 | type="float", 62 | default=5.0, 63 | help="Clipping constant applied to gradients [default: %default]", 64 | ) 65 | # 66 | (o, args) = parser.parse_args() 67 | if len(args) != 2: 68 | parser.print_help() 69 | sys.exit(1) 70 | 71 | (feat_dim, num_leaves) = list(map(int, args)) 72 | 73 | # Original prototype from Jiayu, 74 | # 75 | # 40 40 76 | # 40 512 800 0.01 4 77 | # 512 8000 0.000000 0.000000 0.04 78 | # 8000 8000 79 | # 80 | 81 | print("") 82 | # normally we won't use more than 2 layers of LSTM 83 | if o.num_layers == 1: 84 | print( 85 | " %d %d %s %f %f" 86 | % (feat_dim, 2 * o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient) 87 | ) 88 | elif o.num_layers == 2: 89 | print( 90 | " %d %d %s %f %f" 91 | % (feat_dim, 2 * o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient) 92 | ) 93 | print( 94 | " %d %d %s %f %f" 95 | % (2 * o.num_recurrent, 2 * o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient) 96 | ) 97 | else: 98 | sys.stderr.write("make_lstm_proto.py ERROR: more than 2 layers of LSTM, not supported yet.\n") 99 | sys.exit(1) 100 | print( 101 | " %d %d 0.0 0.0 %f" 102 | % (2 * o.num_recurrent, num_leaves, o.param_stddev_factor) 103 | ) 104 | print(" %d %d" % (num_leaves, num_leaves)) 105 | print("") 106 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/pinyin_map.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use warnings; #sed replacement for -w perl parameter 3 | 4 | $num_args = $#ARGV + 1; 5 | if ($num_args != 1) { 6 | print "\nUsage: pinyin2phone.pl pinyin2phone\n"; 7 | exit; 8 | } 9 | 10 | open(MAPS, $ARGV[0]) or die("Could not open pinyin map file."); 11 | my %py2ph; foreach $line () { @A = split(" ", $line); 12 | $py = shift(@A); 13 | $py2ph{$py} = [@A]; 14 | } 15 | 16 | #foreach $word ( keys %py2ph ) { 17 | #foreach $i ( 0 .. $#{ $py2ph{$word} } ) { 18 | # print " $word = $py2ph{$word}[$i]"; 19 | #} 20 | #print " $#{ $py2ph{$word} }"; 21 | #print "\n"; 22 | #} 23 | 24 | my @entry; 25 | 26 | while () { 27 | @A = split(" ", $_); 28 | @entry = (); 29 | $W = shift(@A); 30 | push(@entry, $W); 31 | for($i = 0; $i < @A; $i++) { 32 | $initial= $A[$i]; $final = $A[$i]; 33 | #print $initial, " ", $final, "\n"; 34 | if ($A[$i] =~ /^CH[A-Z0-9]+$/) {$initial =~ s:(CH)[A-Z0-9]+:$1:; $final =~ s:CH([A-Z0-9]+):$1:;} 35 | elsif ($A[$i] =~ /^SH[A-Z0-9]+$/) {$initial =~ s:(SH)[A-Z0-9]+:$1:; $final =~ s:SH([A-Z0-9]+):$1:;} 36 | elsif ($A[$i] =~ /^ZH[A-Z0-9]+$/) {$initial =~ s:(ZH)[A-Z0-9]+:$1:; $final =~ s:ZH([A-Z0-9]+):$1:;} 37 | elsif ($A[$i] =~ /^B[A-Z0-9]+$/) {$initial =~ s:(B)[A-Z0-9]+:$1:; $final =~ s:B([A-Z0-9]+):$1:;} 38 | elsif ($A[$i] =~ /^C[A-Z0-9]+$/) {$initial =~ s:(C)[A-Z0-9]+:$1:; $final =~ s:C([A-Z0-9]+):$1:;} 39 | elsif ($A[$i] =~ /^D[A-Z0-9]+$/) {$initial =~ s:(D)[A-Z0-9]+:$1:; $final =~ s:D([A-Z0-9]+):$1:;} 40 | elsif ($A[$i] =~ /^F[A-Z0-9]+$/) {$initial =~ s:(F)[A-Z0-9]+:$1:; $final =~ s:F([A-Z0-9]+):$1:;} 41 | elsif ($A[$i] =~ /^G[A-Z0-9]+$/) {$initial =~ s:(G)[A-Z0-9]+:$1:; $final =~ s:G([A-Z0-9]+):$1:;} 42 | elsif ($A[$i] =~ /^H[A-Z0-9]+$/) {$initial =~ s:(H)[A-Z0-9]+:$1:; $final =~ s:H([A-Z0-9]+):$1:;} 43 | elsif ($A[$i] =~ /^J[A-Z0-9]+$/) {$initial =~ s:(J)[A-Z0-9]+:$1:; $final =~ s:J([A-Z0-9]+):$1:;} 44 | elsif ($A[$i] =~ /^K[A-Z0-9]+$/) {$initial =~ s:(K)[A-Z0-9]+:$1:; $final =~ s:K([A-Z0-9]+):$1:;} 45 | elsif ($A[$i] =~ /^L[A-Z0-9]+$/) {$initial =~ s:(L)[A-Z0-9]+:$1:; $final =~ s:L([A-Z0-9]+):$1:;} 46 | elsif ($A[$i] =~ /^M[A-Z0-9]+$/) {$initial =~ s:(M)[A-Z0-9]+:$1:; $final =~ s:M([A-Z0-9]+):$1:;} 47 | elsif ($A[$i] =~ /^N[A-Z0-9]+$/) {$initial =~ s:(N)[A-Z0-9]+:$1:; $final =~ s:N([A-Z0-9]+):$1:;} 48 | elsif ($A[$i] =~ /^P[A-Z0-9]+$/) {$initial =~ s:(P)[A-Z0-9]+:$1:; $final =~ s:P([A-Z0-9]+):$1:;} 49 | elsif ($A[$i] =~ /^Q[A-Z0-9]+$/) {$initial =~ s:(Q)[A-Z0-9]+:$1:; $final =~ s:Q([A-Z0-9]+):$1:;} 50 | elsif ($A[$i] =~ /^R[A-Z0-9]+$/) {$initial =~ s:(R)[A-Z0-9]+:$1:; $final =~ s:R([A-Z0-9]+):$1:;} 51 | elsif ($A[$i] =~ /^S[A-Z0-9]+$/) {$initial =~ s:(S)[A-Z0-9]+:$1:; $final =~ s:S([A-Z0-9]+):$1:;} 52 | elsif ($A[$i] =~ /^T[A-Z0-9]+$/) {$initial =~ s:(T)[A-Z0-9]+:$1:; $final =~ s:T([A-Z0-9]+):$1:;} 53 | elsif ($A[$i] =~ /^W[A-Z0-9]+$/) {$initial =~ s:(W)[A-Z0-9]+:$1:; $final =~ s:W([A-Z0-9]+):$1:;} 54 | elsif ($A[$i] =~ /^X[A-Z0-9]+$/) {$initial =~ s:(X)[A-Z0-9]+:$1:; $final =~ s:X([A-Z0-9]+):$1:;} 55 | elsif ($A[$i] =~ /^Y[A-Z0-9]+$/) {$initial =~ s:(Y)[A-Z0-9]+:$1:; $final =~ s:Y([A-Z0-9]+):$1:;} 56 | elsif ($A[$i] =~ /^Z[A-Z0-9]+$/) {$initial =~ s:(Z)[A-Z0-9]+:$1:; $final =~ s:Z([A-Z0-9]+):$1:;} 57 | if ($initial ne $A[$i]) { 58 | $tone = $final; 59 | $final =~ s:([A-Z]+)[0-9]:$1:; 60 | $tone =~ s:[A-Z]+([0-9]):$1:; 61 | if (!(exists $py2ph{$initial}) or !(exists $py2ph{$final})) { print "1: no entry find for ", $A[$i], " ", $initial, " ", $final; exit;} 62 | push(@entry, @{$py2ph{$initial}}); 63 | @tmp = @{$py2ph{$final}}; 64 | for($j = 0; $j < @tmp ; $j++) {$tmp[$j] = $tmp[$j].$tone;} 65 | push(@entry, @tmp); 66 | } 67 | else { 68 | $tone = $A[$i]; 69 | $A[$i] =~ s:([A-Z]+)[0-9]:$1:; 70 | $tone =~ s:[A-Z]+([0-9]):$1:; 71 | if (!(exists $py2ph{$A[$i]})) { print "2: no entry find for ", $A[$i]; exit;} 72 | @tmp = @{$py2ph{$A[$i]}}; 73 | for($j = 0; $j < @tmp ; $j++) {$tmp[$j] = $tmp[$j].$tone;} 74 | push(@entry, @tmp); 75 | } 76 | } 77 | print "@entry"; 78 | print "\n"; 79 | } 80 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/filter_scps.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2012 Microsoft Corporation 3 | # Johns Hopkins University (author: Daniel Povey) 4 | # 2015 Xiaohui Zhang 5 | 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 15 | # MERCHANTABLITY OR NON-INFRINGEMENT. 16 | # See the Apache 2 License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | 20 | # This script takes multiple lists of utterance-ids or any file whose first field 21 | # of each line is an utterance-id, as filters, and filters an scp 22 | # file (or any file whose "n-th" field is an utterance id), printing 23 | # out only those lines whose "n-th" field is in filter. The index of 24 | # the "n-th" field is 1, by default, but can be changed by using 25 | # the -f switch 26 | 27 | 28 | if(@ARGV != 4) { 29 | die "Usage: utils/filter_scps.pl \n" . 30 | "e.g.: utils/filter_scps.pl [-f ] JOB=1:10 data/train/split10/JOB/spk2utt data/train/feats.scp data/train/split10/JOB/feats.scp\n" . 31 | "similar to utils/filter_scp.pl, but it uses multiple filters and output multiple filtered files.\n". 32 | "The -f option specifies the field in that we filter on (default: 1)." . 33 | "See also: utils/filter_scp.pl\n"; 34 | } 35 | 36 | if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) { # e.g. JOB=1:10 37 | $jobname = $1; 38 | $jobstart = $2; 39 | $jobend = $3; 40 | shift; 41 | if ($jobstart > $jobend) { 42 | die "filter_scps.pl: invalid job range $ARGV[0]"; 43 | } 44 | } else { 45 | die "filter_scps.pl: bad job-range specifier $ARGV[0]: expected e.g. JOB=1:10"; 46 | } 47 | 48 | $field = 1; 49 | $shifted = 0; 50 | do { 51 | $shifted=0; 52 | if ($ARGV[0] eq "-f") { 53 | $field = $ARGV[1]; 54 | shift @ARGV; shift @ARGV; 55 | $shifted=1 56 | } 57 | } while ($shifted); 58 | 59 | $idlist = shift @ARGV; 60 | 61 | if (defined $jobname && $idlist !~ m/$jobname/ && 62 | $jobend > $jobstart) { 63 | print STDERR "filter_scps.pl: you are trying to use multiple filter files as filter patterns but " 64 | . "you are providing just one filter file ($idlist)\n"; 65 | exit(1); 66 | } 67 | 68 | 69 | $infile = shift @ARGV; 70 | open (F, "< $infile") or die "Can't open $infile for read: $!"; 71 | my @inlines; 72 | @inlines = ; 73 | close(F); 74 | 75 | $outfile = shift @ARGV; 76 | 77 | if (defined $jobname && $outfile !~ m/$jobname/ && 78 | $jobend > $jobstart) { 79 | print STDERR "filter_scps.pl: you are trying to create multiple filtered files but " 80 | . "you are providing just one output file ($outfile)\n"; 81 | exit(1); 82 | } 83 | 84 | for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) { 85 | $outfile_n = $outfile; 86 | $idlist_n = $idlist; 87 | if (defined $jobname) { 88 | $idlist_n =~ s/$jobname/$jobid/g; 89 | $outfile_n =~ s/$jobname/$jobid/g; 90 | } 91 | 92 | open(F, "<$idlist_n") || die "Could not open id-list file $idlist_n"; 93 | my %seen; 94 | while() { 95 | @A = split; 96 | @A>=1 || die "Invalid line $_ in id-list file $idlist_n"; 97 | $seen{$A[0]} = 1; 98 | } 99 | close(F); 100 | open(FW, ">$outfile_n") || die "Could not open output file $outfile_n"; 101 | foreach (@inlines) { 102 | if ($field == 1) { # Treat this as special case, since it is common. 103 | $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field."; 104 | # $1 is what we filter on. 105 | if ($seen{$1}) { 106 | print FW $_; 107 | } 108 | } else { 109 | @A = split; 110 | @A > 0 || die "Invalid scp file line $_"; 111 | @A >= $field || die "Invalid scp file line $_"; 112 | if ($seen{$A[$field-1]}) { 113 | print FW $_; 114 | } 115 | } 116 | } 117 | close(FW); 118 | } 119 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/create_data_link.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2013 Guoguo Chen 4 | # 2014 Johns Hopkins University (author: Daniel Povey) 5 | # Apache 2.0. 6 | # 7 | # This script distributes data onto different file systems by making symbolic 8 | # links. It is supposed to use together with utils/create_split_dir.pl, which 9 | # creates a "storage" directory that links to different file systems. 10 | # 11 | # If a sub-directory egs/storage does not exist, it does nothing. If it exists, 12 | # then it selects pseudo-randomly a number from those available in egs/storage/* 13 | # creates a link such as 14 | # 15 | # egs/egs.3.4.ark -> storage/4/egs.3.4.ark 16 | # 17 | use strict; 18 | use warnings; 19 | use File::Basename; 20 | use File::Spec; 21 | use Getopt::Long; 22 | 23 | sub GetGCD { 24 | my ($a, $b) = @_; 25 | while ($a != $b) { 26 | if ($a > $b) { 27 | $a = $a - $b; 28 | } else { 29 | $b = $b - $a; 30 | } 31 | } 32 | return $a; 33 | } 34 | 35 | my $Usage = < storage/4/egs.3.4.ark 45 | 46 | Usage: utils/create_data_link.pl [ ... ] 47 | e.g.: utils/create_data_link.pl foo/bar/egs.3.4.ark foo/bar/egs.3.5.ark 48 | (note: the dirname, e.g. foo/bar/, must be the same in all cases). 49 | 50 | See also utils/remove_data_links.sh 51 | EOU 52 | 53 | GetOptions(); 54 | 55 | if (@ARGV == 0) { 56 | die $Usage; 57 | } 58 | 59 | my $example_fullpath = $ARGV[0]; 60 | 61 | # Check if the storage has been created. If so, do nothing. 62 | my $dirname = dirname($example_fullpath); 63 | if (! -d "$dirname/storage") { 64 | exit(0); 65 | } 66 | 67 | # Storage exists, create symbolic links in the next few steps. 68 | 69 | # First, get a list of the available storage directories, and check if they are 70 | # properly created. 71 | opendir(my $dh, "$dirname/storage/") || die "$0: Fail to open $dirname/storage/\n"; 72 | my @storage_dirs = grep(/^[0-9]*$/, readdir($dh)); 73 | closedir($dh); 74 | my $num_storage = scalar(@storage_dirs); 75 | for (my $x = 1; $x <= $num_storage; $x++) { 76 | (-d "$dirname/storage/$x") || die "$0: $dirname/storage/$x does not exist\n"; 77 | } 78 | 79 | # Second, get the coprime list. 80 | my @coprimes; 81 | for (my $n = 1; $n < $num_storage; $n++) { 82 | if (GetGCD($n, $num_storage) == 1) { 83 | push(@coprimes, $n); 84 | } 85 | } 86 | 87 | my $ret = 0; 88 | 89 | foreach my $fullpath (@ARGV) { 90 | if ($dirname ne dirname($fullpath)) { 91 | die "Mismatch in directory names of arguments: $example_fullpath versus $fullpath"; 92 | } 93 | 94 | # Finally, work out the directory index where we should put the data to. 95 | my $basename = basename($fullpath); 96 | my $filename_numbers = $basename; 97 | $filename_numbers =~ s/[^0-9]+/ /g; 98 | my @filename_numbers = split(" ", $filename_numbers); 99 | my $total = 0; 100 | my $index = 0; 101 | foreach my $x (@filename_numbers) { 102 | if ($index >= scalar(@coprimes)) { 103 | $index = 0; 104 | } 105 | $total += $x * $coprimes[$index]; 106 | $index++; 107 | } 108 | my $dir_index = $total % $num_storage + 1; 109 | 110 | # Make the symbolic link. 111 | if (-e $fullpath) { 112 | unlink($fullpath); 113 | } 114 | if (symlink("storage/$dir_index/$basename", $fullpath) != 1) { # failure 115 | $ret = 1; # will exit with error status. 116 | } 117 | } 118 | 119 | exit($ret); 120 | 121 | ## testing: 122 | # rm -rf foo bar 123 | # mkdir -p bar/{1,2,3,4} 124 | # mkdir -p foo/storage 125 | # for x in 1 2 3 4; do ln -s ../../bar/$x foo/storage/$x; done 126 | # utils/create_data_link.pl utils/create_data_link.pl foo/1.3.ark foo/2.3.ark 127 | # ls -l foo 128 | # total 0 129 | # lrwxrwxrwx 1 dpovey fax 17 Sep 2 17:41 1.3.ark -> storage/3/1.3.ark 130 | # lrwxrwxrwx 1 dpovey fax 17 Sep 2 17:41 2.3.ark -> storage/4/2.3.ark 131 | # drwxr-xr-x 2 dpovey fax 38 Sep 2 17:40 storage 132 | -------------------------------------------------------------------------------- /cfg/TIMIT_baselines/TIMIT_MLP_mfcc_basic.cfg: -------------------------------------------------------------------------------- 1 | [cfg_proto] 2 | cfg_proto = proto/global.proto 3 | cfg_proto_chunk = proto/global_chunk.proto 4 | 5 | [exp] 6 | cmd = 7 | run_nn_script = run_nn 8 | out_folder = exp/TIMIT_MLP_basic 9 | seed = 1234 10 | use_cuda = True 11 | multi_gpu = False 12 | save_gpumem = False 13 | n_epochs_tr = 24 14 | 15 | [dataset1] 16 | data_name = TIMIT_tr 17 | fea = fea_name=mfcc 18 | fea_lst=/home/mirco/kaldi-trunk/egs/timit/s5/data/train/feats.scp 19 | fea_opts=apply-cmvn --utt2spk=ark:/home/mirco/kaldi-trunk/egs/timit/s5/data/train/utt2spk ark:/home/mirco/kaldi-trunk/egs/timit/s5/mfcc/cmvn_train.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- | 20 | cw_left=5 21 | cw_right=5 22 | 23 | 24 | lab = lab_name=lab_cd 25 | lab_folder=/home/mirco/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali 26 | lab_opts=ali-to-pdf 27 | lab_count_file=auto 28 | lab_data_folder=/home/mirco/kaldi-trunk/egs/timit/s5/data/train/ 29 | lab_graph=/home/mirco/kaldi-trunk/egs/timit/s5/exp/tri3/graph 30 | 31 | 32 | n_chunks = 5 33 | 34 | [dataset2] 35 | data_name = TIMIT_dev 36 | fea = fea_name=mfcc 37 | fea_lst=/home/mirco/kaldi-trunk/egs/timit/s5/data/dev/feats.scp 38 | fea_opts=apply-cmvn --utt2spk=ark:/home/mirco/kaldi-trunk/egs/timit/s5/data/dev/utt2spk ark:/home/mirco/kaldi-trunk/egs/timit/s5/mfcc/cmvn_dev.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- | 39 | cw_left=5 40 | cw_right=5 41 | 42 | 43 | lab = lab_name=lab_cd 44 | lab_folder=/home/mirco/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali_dev 45 | lab_opts=ali-to-pdf 46 | lab_count_file=auto 47 | lab_data_folder=/home/mirco/kaldi-trunk/egs/timit/s5/data/dev/ 48 | lab_graph=/home/mirco/kaldi-trunk/egs/timit/s5/exp/tri3/graph 49 | 50 | 51 | n_chunks = 1 52 | 53 | [dataset3] 54 | data_name = TIMIT_test 55 | fea = fea_name=mfcc 56 | fea_lst=/home/mirco/kaldi-trunk/egs/timit/s5/data/test/feats.scp 57 | fea_opts=apply-cmvn --utt2spk=ark:/home/mirco/kaldi-trunk/egs/timit/s5/data/test/utt2spk ark:/home/mirco/kaldi-trunk/egs/timit/s5/mfcc/cmvn_test.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- | 58 | cw_left=5 59 | cw_right=5 60 | 61 | 62 | lab = lab_name=lab_cd 63 | lab_folder=/home/mirco/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali_test 64 | lab_opts=ali-to-pdf 65 | lab_count_file=auto 66 | lab_data_folder=/home/mirco/kaldi-trunk/egs/timit/s5/data/test/ 67 | lab_graph=/home/mirco/kaldi-trunk/egs/timit/s5/exp/tri3/graph 68 | 69 | 70 | n_chunks = 1 71 | 72 | [data_use] 73 | train_with = TIMIT_tr 74 | valid_with = TIMIT_dev 75 | forward_with = TIMIT_test 76 | 77 | [batches] 78 | batch_size_train = 128 79 | max_seq_length_train = 1000 80 | increase_seq_length_train = False 81 | start_seq_len_train = 100 82 | multply_factor_seq_len_train = 2 83 | batch_size_valid = 128 84 | max_seq_length_valid = 1000 85 | 86 | [architecture1] 87 | arch_name = MLP_layers1 88 | arch_proto = proto/MLP.proto 89 | arch_library = neural_networks 90 | arch_class = MLP 91 | arch_pretrain_file = none 92 | arch_freeze = False 93 | arch_seq_model = False 94 | dnn_lay = 1024,1024,1024,1024,N_out_lab_cd 95 | dnn_drop = 0.15,0.15,0.15,0.15,0.0 96 | dnn_use_laynorm_inp = False 97 | dnn_use_batchnorm_inp = False 98 | dnn_use_batchnorm = True,True,True,True,False 99 | dnn_use_laynorm = False,False,False,False,False 100 | dnn_act = relu,relu,relu,relu,softmax 101 | arch_lr = 0.08 102 | arch_halving_factor = 0.5 103 | arch_improvement_threshold = 0.001 104 | arch_opt = sgd 105 | opt_momentum = 0.0 106 | opt_weight_decay = 0.0 107 | opt_dampening = 0.0 108 | opt_nesterov = False 109 | 110 | [model] 111 | model_proto = proto/model.proto 112 | model = out_dnn1=compute(MLP_layers1,mfcc) 113 | loss_final=cost_nll(out_dnn1,lab_cd) 114 | err_final=cost_err(out_dnn1,lab_cd) 115 | 116 | [forward] 117 | forward_out = out_dnn1 118 | normalize_posteriors = True 119 | normalize_with_counts_from = lab_cd 120 | save_out_file = False 121 | require_decoding = True 122 | 123 | [decoding] 124 | decoding_script_folder = kaldi_decoding_scripts/ 125 | decoding_script = decode_dnn.sh 126 | decoding_proto = proto/decoding.proto 127 | min_active = 200 128 | max_active = 7000 129 | max_mem = 50000000 130 | beam = 13.0 131 | latbeam = 8.0 132 | acwt = 0.2 133 | max_arcs = -1 134 | skip_scoring = false 135 | scoring_script = local/score.sh 136 | scoring_opts = "--min-lmwt 1 --max-lmwt 10" 137 | norm_vars = False 138 | 139 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/reverse_lm_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2012 Brno University of Technology (Author: Mirko Hannemann) 4 | # Apache 2.0 5 | 6 | # configuration section 7 | utterances=4 8 | maxlen=30 9 | nbest=10 10 | # end config section 11 | 12 | echo "$0 $@" # Print the command line for logging 13 | 14 | [ -f ./path.sh ] && . ./path.sh; # source the path. 15 | . parse_options.sh || exit 1; 16 | 17 | if [ $# != 2 ]; then 18 | echo "Usage: utils/reverse_lm_test.sh [options] " 19 | echo "example: utils/reverse_lm_test.sh data/lang_test_tgpr_5k data/lang_test_tgpr_5k.reverse" 20 | echo "options:" 21 | echo " --utterances number of random test utterances" 22 | echo " --maxlen max number of arcs (words) in utterance" 23 | echo " --nbest compare n best paths" 24 | exit 1; 25 | fi 26 | 27 | test_fwd=$1 28 | test_bwd=$2 29 | nb=`echo $nbest | awk '{print $1-1;}'` 30 | 31 | # For each language model the corresponding FST in lang_test_* directory. 32 | 33 | echo "compare LM scores using "$test_fwd/G.fst" and "$test_bwd/G.fst 34 | 35 | for utt in `seq 1 $utterances` 36 | do 37 | # generate random sentence with forward language model 38 | len=1000 # big number 39 | while [ $len -gt $maxlen ] 40 | do 41 | fstrandgen --npath=1 $test_fwd/G.fst | fstprint --acceptor --isymbols=$test_fwd/words.txt --osymbols=$test_fwd/words.txt > sent$utt 42 | len=`cat sent$utt | wc -l` 43 | done 44 | cat sent$utt | awk '(NF>1){if ($3!="#0") {a[length(a)+1]=$3;}} END{printf "utterance:"; for(i=1;i<=length(a);i++) {printf " %s",a[i];} printf "\n";}' 45 | 46 | # get n best paths with forward language model 47 | cat sent$utt | awk '(NF>1){if ($3!="#0") {a[length(a)+1]=$3;}} END{for(i=1;i<=length(a);i++) {print i-1,i,a[i];} print length(a);}' > sent$utt.forward 48 | fstcompile --acceptor --isymbols=$test_fwd/words.txt --osymbols=$test_fwd/words.txt sent$utt.forward > sent$utt.forward.fst 49 | fstcompose $test_fwd/G.fst sent$utt.forward.fst > sent$utt.composed.forward.fst 50 | fstshortestpath --nshortest=$nbest sent$utt.composed.forward.fst | fstprint > sent$utt.composed.forward.n 51 | 52 | rm sent$utt.forward.scores 2>/dev/null 53 | for n in `seq 0 $nb` 54 | do 55 | # select path with rank n 56 | cat sent$utt.composed.forward.n | awk '(NR>'$n' || $1!="0"){print;}' | fstcompile | fstconnect > sent$utt.composed.forward.$n.fst 57 | fstprint sent$utt.composed.forward.$n.fst > sent$utt.composed.forward.$n 58 | # compute shortest distance to final states 59 | fstshortestdistance sent$utt.composed.forward.$n.fst | \ 60 | awk -v list=sent$utt.composed.forward.$n 'BEGIN{mincost=1E5; while (getline < list > 0){if (NF==2) final[$1]=$2; if (NF==1) final[$1]=0.00001;}} \ 61 | { if (final[$1]) { cost=$2+final[$1]; if (cost> sent$utt.forward.scores 63 | done 64 | 65 | # get n best paths with reverse language model 66 | cat sent$utt | awk '(NF>1){if ($3!="#0") {a[length(a)+1]=$3;}} END{for(i=1;i<=length(a);i++) {print i-1,i,a[length(a)-i+1];} print length(a);}' > sent$utt.reverse 67 | fstcompile --acceptor --isymbols=$test_fwd/words.txt --osymbols=$test_fwd/words.txt sent$utt.reverse > sent$utt.reverse.fst 68 | fstcompose $test_bwd/G.fst sent$utt.reverse.fst > sent$utt.composed.reverse.fst 69 | fstshortestpath --nshortest=$nbest sent$utt.composed.reverse.fst | fstprint > sent$utt.composed.reverse.n 70 | 71 | rm sent$utt.reverse.scores 2>/dev/null 72 | for n in `seq 0 $nb` 73 | do 74 | # select path with rank n 75 | cat sent$utt.composed.reverse.n | awk '(NR>'$n' || $1!="0"){print;}' | fstcompile | fstconnect > sent$utt.composed.reverse.$n.fst 76 | fstprint sent$utt.composed.reverse.$n.fst > sent$utt.composed.reverse.$n 77 | # compute shortest distance to final states 78 | fstshortestdistance sent$utt.composed.reverse.$n.fst | \ 79 | awk -v list=sent$utt.composed.reverse.$n 'BEGIN{mincost=1E5; while (getline < list > 0){if (NF==2) final[$1]=$2; if (NF==1) final[$1]=0.00001;}} \ 80 | { if (final[$1]) { cost=$2+final[$1]; if (cost> sent$utt.reverse.scores 82 | done 83 | 84 | # present results 85 | paste sent$utt.forward.scores sent$utt.reverse.scores | \ 86 | awk '{diff=$1-$2; if ( (diff<0?-diff:diff) > 0.001 ) print NR,$1,$2,"!!!"; else print NR,$1,$2;}' 87 | # clean up 88 | rm sent$utt 89 | rm sent$utt.* 90 | done 91 | -------------------------------------------------------------------------------- /cfg/TIMIT_baselines/TIMIT_MLP_mfcc_basic_flex.cfg: -------------------------------------------------------------------------------- 1 | [cfg_proto] 2 | cfg_proto = proto/global.proto 3 | cfg_proto_chunk = proto/global_chunk.proto 4 | 5 | [exp] 6 | cmd = 7 | run_nn_script = run_nn 8 | out_folder = exp/TIMIT_MLP_basic_flex 9 | seed = 1234 10 | use_cuda = True 11 | multi_gpu = False 12 | save_gpumem = False 13 | n_epochs_tr = 24 14 | 15 | [dataset1] 16 | data_name = TIMIT_tr 17 | fea = fea_name=mfcc 18 | fea_lst=/home/mirco/kaldi-trunk/egs/timit/s5/data/train/feats.scp 19 | fea_opts=apply-cmvn --utt2spk=ark:/home/mirco/kaldi-trunk/egs/timit/s5/data/train/utt2spk ark:/home/mirco/kaldi-trunk/egs/timit/s5/mfcc/cmvn_train.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- | 20 | cw_left=5 21 | cw_right=5 22 | 23 | 24 | lab = lab_name=lab_cd 25 | lab_folder=/home/mirco/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali 26 | lab_opts=ali-to-pdf 27 | lab_count_file=auto 28 | lab_data_folder=/home/mirco/kaldi-trunk/egs/timit/s5/data/train/ 29 | lab_graph=/home/mirco/kaldi-trunk/egs/timit/s5/exp/tri3/graph 30 | 31 | 32 | n_chunks = 5 33 | 34 | [dataset2] 35 | data_name = TIMIT_dev 36 | fea = fea_name=mfcc 37 | fea_lst=/home/mirco/kaldi-trunk/egs/timit/s5/data/dev/feats.scp 38 | fea_opts=apply-cmvn --utt2spk=ark:/home/mirco/kaldi-trunk/egs/timit/s5/data/dev/utt2spk ark:/home/mirco/kaldi-trunk/egs/timit/s5/mfcc/cmvn_dev.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- | 39 | cw_left=5 40 | cw_right=5 41 | 42 | 43 | lab = lab_name=lab_cd 44 | lab_folder=/home/mirco/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali_dev 45 | lab_opts=ali-to-pdf 46 | lab_count_file=auto 47 | lab_data_folder=/home/mirco/kaldi-trunk/egs/timit/s5/data/dev/ 48 | lab_graph=/home/mirco/kaldi-trunk/egs/timit/s5/exp/tri3/graph 49 | 50 | 51 | n_chunks = 1 52 | 53 | [dataset3] 54 | data_name = TIMIT_test 55 | fea = fea_name=mfcc 56 | fea_lst=/home/mirco/kaldi-trunk/egs/timit/s5/data/test/feats.scp 57 | fea_opts=apply-cmvn --utt2spk=ark:/home/mirco/kaldi-trunk/egs/timit/s5/data/test/utt2spk ark:/home/mirco/kaldi-trunk/egs/timit/s5/mfcc/cmvn_test.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- | 58 | cw_left=5 59 | cw_right=5 60 | 61 | 62 | lab = lab_name=lab_cd 63 | lab_folder=/home/mirco/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali_test 64 | lab_opts=ali-to-pdf 65 | lab_count_file=auto 66 | lab_data_folder=/home/mirco/kaldi-trunk/egs/timit/s5/data/test/ 67 | lab_graph=/home/mirco/kaldi-trunk/egs/timit/s5/exp/tri3/graph 68 | 69 | 70 | n_chunks = 1 71 | 72 | [data_use] 73 | train_with = TIMIT_tr 74 | valid_with = TIMIT_dev 75 | forward_with = TIMIT_test 76 | 77 | [batches] 78 | batch_size_train = 128*12 | 64*10 | 32*2 79 | max_seq_length_train = 1000*18 | 500*6 80 | increase_seq_length_train = False 81 | start_seq_len_train = 100 82 | multply_factor_seq_len_train = 2 83 | batch_size_valid = 128 84 | max_seq_length_valid = 1000 85 | 86 | [architecture1] 87 | arch_name = MLP_layers1 88 | arch_proto = proto/MLP.proto 89 | arch_library = neural_networks 90 | arch_class = MLP 91 | arch_pretrain_file = none 92 | arch_freeze = False 93 | arch_seq_model = False 94 | dnn_lay = 1024,1024,1024,1024,N_out_lab_cd 95 | dnn_drop = 0.15*12|0.20*12,0.15,0.15*10|0.20*14,0.15,0.0 96 | dnn_use_laynorm_inp = False 97 | dnn_use_batchnorm_inp = False 98 | dnn_use_batchnorm = True,True,True,True,False 99 | dnn_use_laynorm = False,False,False,False,False 100 | dnn_act = relu,relu,relu,relu,softmax 101 | arch_lr = 0.08*10|0.04*5|0.02*3|0.01*2|0.005*2|0.0025*2 102 | arch_halving_factor = 0.5 103 | arch_improvement_threshold = 0.001 104 | arch_opt = sgd 105 | opt_momentum = 0.0 106 | opt_weight_decay = 0.0 107 | opt_dampening = 0.0 108 | opt_nesterov = False 109 | 110 | [model] 111 | model_proto = proto/model.proto 112 | model = out_dnn1=compute(MLP_layers1,mfcc) 113 | loss_final=cost_nll(out_dnn1,lab_cd) 114 | err_final=cost_err(out_dnn1,lab_cd) 115 | 116 | [forward] 117 | forward_out = out_dnn1 118 | normalize_posteriors = True 119 | normalize_with_counts_from = lab_cd 120 | save_out_file = False 121 | require_decoding = True 122 | 123 | [decoding] 124 | decoding_script_folder = kaldi_decoding_scripts/ 125 | decoding_script = decode_dnn.sh 126 | decoding_proto = proto/decoding.proto 127 | min_active = 200 128 | max_active = 7000 129 | max_mem = 50000000 130 | beam = 13.0 131 | latbeam = 8.0 132 | acwt = 0.2 133 | max_arcs = -1 134 | skip_scoring = false 135 | scoring_script = local/score.sh 136 | scoring_opts = "--min-lmwt 1 --max-lmwt 10" 137 | norm_vars = False 138 | 139 | -------------------------------------------------------------------------------- /cfg/TIMIT_baselines/TIMIT_MLP_fbank_autoencoder.cfg: -------------------------------------------------------------------------------- 1 | [cfg_proto] 2 | cfg_proto = proto/global.proto 3 | cfg_proto_chunk = proto/global_chunk.proto 4 | 5 | [exp] 6 | cmd = 7 | run_nn_script = run_nn.py 8 | out_folder = exp/TIMIT_MLP_fbank_autoencoder 9 | seed = 2234 10 | use_cuda = True 11 | multi_gpu = False 12 | save_gpumem = False 13 | n_epochs_tr = 10 14 | 15 | [dataset1] 16 | data_name = TIMIT_tr 17 | fea = fea_name=fbank 18 | fea_lst=quick_test/data/train/feats_fbank.scp 19 | fea_opts=apply-cmvn --utt2spk=ark:quick_test/data/train/utt2spk ark:quick_test/fbank/cmvn_train.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- | 20 | cw_left=5 21 | cw_right=5 22 | 23 | lab = lab_name=lab_cd 24 | lab_folder=quick_test/dnn4_pretrain-dbn_dnn_ali 25 | lab_opts=ali-to-pdf 26 | lab_count_file=auto 27 | lab_data_folder=quick_test/data/train/ 28 | lab_graph=quick_test/graph 29 | 30 | n_chunks = 5 31 | 32 | [dataset2] 33 | data_name = TIMIT_dev 34 | fea = fea_name=fbank 35 | fea_lst=quick_test/data/dev/feats_fbank.scp 36 | fea_opts=apply-cmvn --utt2spk=ark:quick_test/data/dev/utt2spk ark:quick_test/fbank/cmvn_dev.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- | 37 | cw_left=5 38 | cw_right=5 39 | 40 | 41 | lab = lab_name=lab_cd 42 | lab_folder=quick_test/dnn4_pretrain-dbn_dnn_ali_dev 43 | lab_opts=ali-to-pdf 44 | lab_count_file=auto 45 | lab_data_folder=quick_test/data/dev/ 46 | lab_graph=quick_test/graph 47 | 48 | n_chunks = 1 49 | 50 | [dataset3] 51 | data_name = TIMIT_test 52 | fea = fea_name=fbank 53 | fea_lst=quick_test/data/test/feats_fbank.scp 54 | fea_opts=apply-cmvn --utt2spk=ark:quick_test/data/test/utt2spk ark:quick_test/fbank/cmvn_test.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- | 55 | cw_left=5 56 | cw_right=5 57 | 58 | lab = lab_name=lab_cd 59 | lab_folder=quick_test/dnn4_pretrain-dbn_dnn_ali_test 60 | lab_opts=ali-to-pdf 61 | lab_count_file=auto 62 | lab_data_folder=quick_test/data/test/ 63 | lab_graph=quick_test/graph 64 | 65 | n_chunks = 1 66 | 67 | [data_use] 68 | train_with = TIMIT_tr 69 | valid_with = TIMIT_dev 70 | forward_with = TIMIT_test 71 | 72 | [batches] 73 | batch_size_train = 128 74 | max_seq_length_train = 1000 75 | increase_seq_length_train = False 76 | start_seq_len_train = 100 77 | multply_factor_seq_len_train = 2 78 | batch_size_valid = 128 79 | max_seq_length_valid = 1000 80 | 81 | [architecture1] 82 | arch_name = MLP_encoder 83 | arch_proto = proto/MLP.proto 84 | arch_library = neural_networks 85 | arch_class = MLP 86 | arch_pretrain_file = none 87 | arch_freeze = False 88 | arch_seq_model = False 89 | dnn_lay = 1024,100 90 | dnn_drop = 0.15,0.15 91 | dnn_use_laynorm_inp = False 92 | dnn_use_batchnorm_inp = False 93 | dnn_use_batchnorm = True,True 94 | dnn_use_laynorm = False,False 95 | dnn_act = relu,linear 96 | arch_lr = 0.08 97 | arch_halving_factor = 0.5 98 | arch_improvement_threshold = 0.001 99 | arch_opt = sgd 100 | opt_momentum = 0.0 101 | opt_weight_decay = 0.0 102 | opt_dampening = 0.0 103 | opt_nesterov = False 104 | 105 | [architecture2] 106 | arch_name = MLP_decoder 107 | arch_proto = proto/MLP.proto 108 | arch_library = neural_networks 109 | arch_class = MLP 110 | arch_pretrain_file = none 111 | arch_freeze = False 112 | arch_seq_model = False 113 | dnn_lay = 1024,440 114 | dnn_drop = 0.15,0.0 115 | dnn_use_laynorm_inp = False 116 | dnn_use_batchnorm_inp = False 117 | dnn_use_batchnorm = True,False 118 | dnn_use_laynorm = False,False 119 | dnn_act = relu,linear 120 | arch_lr = 0.08 121 | arch_halving_factor = 0.5 122 | arch_improvement_threshold = 0.001 123 | arch_opt = sgd 124 | opt_momentum = 0.0 125 | opt_weight_decay = 0.0 126 | opt_dampening = 0.0 127 | opt_nesterov = False 128 | 129 | 130 | [model] 131 | model_proto = proto/model.proto 132 | model = enc_out=compute(MLP_encoder,fbank) 133 | dec_out=compute(MLP_decoder,enc_out) 134 | loss_final=mse(dec_out,fbank) 135 | err_final=cost_err(dec_out,lab_cd) 136 | 137 | [forward] 138 | forward_out = enc_out 139 | normalize_posteriors = False 140 | normalize_with_counts_from = None 141 | save_out_file = True 142 | require_decoding = False 143 | 144 | [decoding] 145 | decoding_script_folder = kaldi_decoding_scripts/ 146 | decoding_script = decode_dnn.sh 147 | decoding_proto = proto/decoding.proto 148 | min_active = 200 149 | max_active = 7000 150 | max_mem = 50000000 151 | beam = 13.0 152 | latbeam = 8.0 153 | acwt = 0.2 154 | max_arcs = -1 155 | skip_scoring = false 156 | scoring_script = local/score.sh 157 | scoring_opts = "--min-lmwt 1 --max-lmwt 10" 158 | norm_vars = False 159 | 160 | -------------------------------------------------------------------------------- /save_raw_fea.py: -------------------------------------------------------------------------------- 1 | ########################################################## 2 | # pytorch-kaldi v.0.1 3 | # Mirco Ravanelli, Titouan Parcollet 4 | # Mila, University of Montreal 5 | # October 2018 6 | # 7 | # Description: This script generates kaldi ark files containing raw features. 8 | # The file list must be a file containing "snt_id file.wav". 9 | # Note that only wav files are supported here (sphere or other format are not supported) 10 | ########################################################## 11 | 12 | 13 | import scipy.io.wavfile 14 | import math 15 | import numpy as np 16 | import os 17 | from data_io import read_vec_int_ark, write_mat 18 | 19 | 20 | # Run it for all the data chunks (e.g., train, dev, test) => uncomment 21 | 22 | lab_folder = "/users/parcollet/KALDI/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali_test" 23 | lab_opts = "ali-to-pdf" 24 | out_folder = "/users/parcollet/KALDI/kaldi-trunk/egs/timit/s5/data/raw_TIMIT_200ms/test" 25 | wav_lst = "/users/parcollet/KALDI/kaldi-trunk/egs/timit/s5/data/test/wav.lst" 26 | scp_file_out = "/users/parcollet/KALDI/kaldi-trunk/egs/timit/s5/data/raw_TIMIT_200ms/test/feats_raw.scp" 27 | 28 | # lab_folder='quick_test/dnn4_pretrain-dbn_dnn_ali_dev' 29 | # lab_opts='ali-to-pdf' 30 | # out_folder='raw_TIMIT_200ms/dev' 31 | # wav_lst='/home/mirco/pytorch-kaldi-new/quick_test/data/dev/wav_lst.scp' 32 | # scp_file_out='quick_test/data/dev/feats_raw.scp' 33 | 34 | # lab_folder='quick_test/dnn4_pretrain-dbn_dnn_ali_test' 35 | # lab_opts='ali-to-pdf' 36 | # out_folder='raw_TIMIT_200ms/test' 37 | # wav_lst='/home/mirco/pytorch-kaldi-new/quick_test/data/test/wav_lst.scp' 38 | # scp_file_out='quick_test/data/test/feats_raw.scp' 39 | 40 | 41 | sig_fs = 16000 # Hz 42 | sig_wlen = 200 # ms 43 | 44 | lab_fs = 16000 # Hz 45 | lab_wlen = 25 # ms 46 | lab_wshift = 10 # ms 47 | 48 | sig_wlen_samp = int((sig_fs * sig_wlen) / 1000) 49 | lab_wlen_samp = int((lab_fs * lab_wlen) / 1000) 50 | lab_wshift_samp = int((lab_fs * lab_wshift) / 1000) 51 | 52 | 53 | # Create the output folder 54 | try: 55 | os.stat(out_folder) 56 | except: 57 | os.makedirs(out_folder) 58 | 59 | 60 | # Creare the scp file 61 | scp_file = open(scp_file_out, "w") 62 | 63 | # reading the labels 64 | lab = { 65 | k: v 66 | for k, v in read_vec_int_ark( 67 | "gunzip -c " + lab_folder + "/ali*.gz | " + lab_opts + " " + lab_folder + "/final.mdl ark:- ark:-|", out_folder 68 | ) 69 | } 70 | 71 | # reading the list file 72 | with open(wav_lst) as f: 73 | sig_lst = f.readlines() 74 | 75 | sig_lst = [x.strip() for x in sig_lst] 76 | 77 | for sig_file in sig_lst: 78 | sig_id = sig_file.split(" ")[0] 79 | sig_path = sig_file.split(" ")[1] 80 | [fs, signal] = scipy.io.wavfile.read(sig_path) 81 | signal = signal.astype(float) / 32768 82 | signal = signal / np.max(np.abs(signal)) 83 | 84 | cnt_fr = 0 85 | beg_samp = 0 86 | frame_all = [] 87 | 88 | while beg_samp + lab_wlen_samp < signal.shape[0]: 89 | sample_fr = np.zeros(sig_wlen_samp) 90 | central_sample_lab = int(((beg_samp + lab_wlen_samp / 2) - 1)) 91 | central_fr_index = int(((sig_wlen_samp / 2) - 1)) 92 | 93 | beg_signal_fr = int(central_sample_lab - (sig_wlen_samp / 2)) 94 | end_signal_fr = int(central_sample_lab + (sig_wlen_samp / 2)) 95 | 96 | if beg_signal_fr >= 0 and end_signal_fr <= signal.shape[0]: 97 | sample_fr = signal[beg_signal_fr:end_signal_fr] 98 | else: 99 | if beg_signal_fr < 0: 100 | n_left_samples = central_sample_lab 101 | sample_fr[central_fr_index - n_left_samples + 1 :] = signal[0:end_signal_fr] 102 | if end_signal_fr > signal.shape[0]: 103 | n_right_samples = signal.shape[0] - central_sample_lab 104 | sample_fr[0 : central_fr_index + n_right_samples + 1] = signal[beg_signal_fr:] 105 | 106 | frame_all.append(sample_fr) 107 | cnt_fr = cnt_fr + 1 108 | beg_samp = beg_samp + lab_wshift_samp 109 | 110 | frame_all = np.asarray(frame_all) 111 | 112 | # Save the matrix into a kaldi ark 113 | out_file = out_folder + "/" + sig_id + ".ark" 114 | write_mat(out_folder, out_file, frame_all, key=sig_id) 115 | print(sig_id) 116 | scp_file.write(sig_id + " " + out_folder + "/" + sig_id + ".ark:" + str(len(sig_id) + 1) + "\n") 117 | 118 | N_fr_comp = 1 + math.floor((signal.shape[0] - 400) / 160) 119 | # print("%s %i %i "%(lab[sig_id].shape[0],N_fr_comp,cnt_fr)) 120 | 121 | scp_file.close() 122 | -------------------------------------------------------------------------------- /cfg/Librispeech_baselines/libri_MLP_fmllr.cfg: -------------------------------------------------------------------------------- 1 | [cfg_proto] 2 | cfg_proto=proto/global.proto 3 | cfg_proto_chunk=proto/global_chunk.proto 4 | 5 | [exp] 6 | cmd= 7 | run_nn_script=run_nn 8 | out_folder=exp/libri_MLP_fmllr 9 | seed=1234 10 | use_cuda=True 11 | multi_gpu=False 12 | save_gpumem=False 13 | N_epochs_tr=24 14 | 15 | [dataset1] 16 | data_name=train_clean_100 17 | fea:fea_name=fmllr 18 | fea_lst=/scratch/ravanelm/exp/librispeech/s5/fmllr/train_clean_100/feats.scp 19 | fea_opts=apply-cmvn --utt2spk=ark:/scratch/ravanelm/exp/librispeech/s5/fmllr/train_clean_100/utt2spk ark:/scratch/ravanelm/exp/librispeech/s5/fmllr/train_clean_100/data/cmvn_speaker.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- | 20 | cw_left=5 21 | cw_right=5 22 | 23 | 24 | lab:lab_name=lab_cd 25 | lab_folder=/scratch/ravanelm/exp/librispeech/s5/exp/tri4b/ 26 | lab_opts=ali-to-pdf 27 | lab_count_file=auto 28 | lab_data_folder=/scratch/ravanelm/exp/librispeech/s5/fmllr/train_clean_100/ 29 | lab_graph=/scratch/ravanelm/exp/librispeech/s5/exp/tri4b/graph_tgsmall/ 30 | 31 | N_chunks=50 32 | 33 | [dataset2] 34 | data_name=dev_clean 35 | fea:fea_name=fmllr 36 | fea_lst=/scratch/ravanelm/exp/librispeech/s5/fmllr/dev_clean/feats.scp 37 | fea_opts=apply-cmvn --utt2spk=ark:/scratch/ravanelm/exp/librispeech/s5/fmllr/dev_clean/utt2spk ark:/scratch/ravanelm/exp/librispeech/s5/fmllr/dev_clean/data/cmvn_speaker.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- | 38 | cw_left=5 39 | cw_right=5 40 | 41 | 42 | lab:lab_name=lab_cd 43 | lab_folder=/scratch/ravanelm/exp/librispeech/s5/exp/tri4b_ali_dev_clean_100 44 | lab_opts=ali-to-pdf 45 | lab_count_file=auto 46 | lab_data_folder=/scratch/ravanelm/exp/librispeech/s5/fmllr/dev_clean/ 47 | lab_graph=/scratch/ravanelm/exp/librispeech/s5/exp/tri4b/graph_tgsmall/ 48 | 49 | N_chunks=4 50 | 51 | [dataset3] 52 | data_name=test_clean 53 | fea:fea_name=fmllr 54 | fea_lst=/scratch/ravanelm/exp/librispeech/s5/fmllr/test_clean/feats.scp 55 | fea_opts=apply-cmvn --utt2spk=ark:/scratch/ravanelm/exp/librispeech/s5/fmllr/test_clean/utt2spk ark:/scratch/ravanelm/exp/librispeech/s5/fmllr/test_clean/data/cmvn_speaker.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- | 56 | cw_left=5 57 | cw_right=5 58 | 59 | 60 | lab:lab_name=lab_cd 61 | lab_folder=/scratch/ravanelm/exp/librispeech/s5/exp/tri4b_ali_test_clean_100 62 | lab_opts=ali-to-pdf 63 | lab_count_file=auto 64 | lab_data_folder=/scratch/ravanelm/exp/librispeech/s5/fmllr/test_clean/ 65 | lab_graph=/scratch/ravanelm/exp/librispeech/s5/exp/tri4b/graph_tgsmall/ 66 | 67 | N_chunks=8 68 | 69 | 70 | [data_use] 71 | train_with=train_clean_100 72 | valid_with=dev_clean 73 | forward_with=test_clean 74 | 75 | 76 | [batches] 77 | batch_size_train=128 78 | max_seq_length_train=1000 79 | increase_seq_length_train=False 80 | start_seq_len_train=100 81 | multply_factor_seq_len_train=2 82 | batch_size_valid=128 83 | max_seq_length_valid=1000 84 | 85 | 86 | [architecture1] 87 | arch_name=MLP_layers 88 | arch_proto=proto/MLP.proto 89 | arch_library=neural_networks 90 | arch_class=MLP 91 | arch_pretrain_file=none 92 | arch_freeze=False 93 | arch_seq_model=False 94 | 95 | dnn_lay=1024,1024,1024,1024,1024,N_out_lab_cd 96 | dnn_drop=0.15,0.15,0.15,0.15,0.15,0.0 97 | dnn_use_laynorm_inp=False 98 | dnn_use_batchnorm_inp=False 99 | dnn_use_batchnorm=True,True,True,True,True,False 100 | dnn_use_laynorm=False,False,False,False,False,False 101 | dnn_act=relu,relu,relu,relu,relu,softmax 102 | 103 | arch_lr=0.08 104 | arch_halving_factor=0.5 105 | arch_improvement_threshold=0.001 106 | arch_opt=sgd 107 | opt_momentum=0.0 108 | opt_weight_decay=0.0 109 | opt_dampening=0.0 110 | opt_nesterov=False 111 | 112 | 113 | [model] 114 | model_proto=proto/model.proto 115 | model:out_dnn1=compute(MLP_layers,fmllr) 116 | loss_final=cost_nll(out_dnn1,lab_cd) 117 | err_final=cost_err(out_dnn1,lab_cd) 118 | 119 | 120 | [forward] 121 | forward_out=out_dnn1 122 | normalize_posteriors=True 123 | normalize_with_counts_from=lab_cd 124 | save_out_file=False 125 | require_decoding=True 126 | 127 | 128 | [decoding] 129 | decoding_script_folder=kaldi_decoding_scripts/ 130 | decoding_script=decode_dnn.sh 131 | decoding_proto=proto/decoding.proto 132 | min_active=200 133 | max_active=7000 134 | max_mem=50000000 135 | beam=20.0 136 | latbeam=12.0 137 | acwt=0.10 138 | max_arcs=-1 139 | skip_scoring=false 140 | scoring_script=/scratch/ravanelm/exp/librispeech/s5/local/score.sh 141 | scoring_opts="--min-lmwt 4 --max-lmwt 23" 142 | norm_vars=False 143 | 144 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/utils/format_lm_sri.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2012 Arnab Ghoshal 4 | # Copyright 2010-2011 Microsoft Corporation 5 | 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 15 | # MERCHANTABLITY OR NON-INFRINGEMENT. 16 | # See the Apache 2 License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | # Begin configuration section. 20 | srilm_opts="-subset -prune-lowprobs -unk -tolower" 21 | # end configuration sections 22 | 23 | 24 | . utils/parse_options.sh 25 | 26 | if [ $# -ne 4 ] && [ $# -ne 3 ]; then 27 | echo "Usage: $0 [options] [] " 28 | echo "The argument is no longer needed but is supported for back compatibility" 29 | echo "E.g.: utils/format_lm_sri.sh data/lang data/local/lm/foo.kn.gz data/local/dict/lexicon.txt data/lang_test" 30 | echo "Converts ARPA-format language models to FSTs. Change the LM vocabulary using SRILM." 31 | echo "Note: if you want to just convert ARPA LMs to FSTs, there is a simpler way to do this" 32 | echo "that doesn't require SRILM: see examples in egs/wsj/s5/local/wsj_format_local_lms.sh" 33 | echo "options:" 34 | echo " --help # print this message and exit" 35 | echo " --srilm-opts STRING # options to pass to SRILM tools (default: '$srilm_opts')" 36 | exit 1; 37 | fi 38 | 39 | 40 | if [ $# -eq 4 ] ; then 41 | lang_dir=$1 42 | lm=$2 43 | lexicon=$3 44 | out_dir=$4 45 | else 46 | lang_dir=$1 47 | lm=$2 48 | out_dir=$3 49 | fi 50 | 51 | mkdir -p $out_dir 52 | 53 | for f in $lm $lang_dir/words.txt; do 54 | if [ ! -f $f ]; then 55 | echo "$0: expected input file $f to exist." 56 | exit 1; 57 | fi 58 | done 59 | 60 | [ -f ./path.sh ] && . ./path.sh 61 | 62 | loc=`which change-lm-vocab` 63 | if [ -z $loc ]; then 64 | if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... 65 | sdir=`pwd`/../../../tools/srilm/bin/i686-m64 66 | else 67 | sdir=`pwd`/../../../tools/srilm/bin/i686 68 | fi 69 | if [ -f $sdir/../change-lm-vocab ]; then 70 | echo Using SRILM tools from $sdir 71 | export PATH=$PATH:$sdir:$sdir/.. 72 | else 73 | echo You appear to not have SRILM tools installed, either on your path, 74 | echo or installed in $sdir. See tools/install_srilm.sh for installation 75 | echo instructions. 76 | exit 1 77 | fi 78 | fi 79 | 80 | echo "Converting '$lm' to FST" 81 | tmpdir=$(mktemp -d /tmp/kaldi.XXXX); 82 | trap 'rm -rf "$tmpdir"' EXIT 83 | 84 | mkdir -p $out_dir 85 | cp -r $lang_dir/* $out_dir || exit 1; 86 | 87 | lm_base=$(basename $lm '.gz') 88 | gunzip -c $lm | utils/find_arpa_oovs.pl $out_dir/words.txt \ 89 | > $out_dir/oovs_${lm_base}.txt || exit 1; 90 | 91 | # Removing all "illegal" combinations of and , which are supposed to 92 | # occur only at being/end of utt. These can cause determinization failures 93 | # of CLG [ends up being epsilon cycles]. 94 | gunzip -c $lm \ 95 | | egrep -v ' | | ' \ 96 | | gzip -c > $tmpdir/lm.gz || exit 1; 97 | 98 | awk '{print $1}' $out_dir/words.txt > $tmpdir/voc || exit 1; 99 | 100 | # Change the LM vocabulary to be the intersection of the current LM vocabulary 101 | # and the set of words in the pronunciation lexicon. This also renormalizes the 102 | # LM by recomputing the backoff weights, and remove those ngrams whose 103 | # probabilities are lower than the backed-off estimates. 104 | change-lm-vocab -vocab $tmpdir/voc -lm $tmpdir/lm.gz -write-lm $tmpdir/out_lm \ 105 | $srilm_opts || exit 1; 106 | 107 | arpa2fst $tmpdir/out_lm | fstprint \ 108 | | utils/eps2disambig.pl | utils/s2eps.pl \ 109 | | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \ 110 | --keep_isymbols=false --keep_osymbols=false \ 111 | | fstrmepsilon | fstarcsort --sort_type=ilabel > $out_dir/G.fst || exit 1; 112 | 113 | fstisstochastic $out_dir/G.fst 114 | 115 | # The output is like: 116 | # 9.14233e-05 -0.259833 117 | # we do expect the first of these 2 numbers to be close to zero (the second is 118 | # nonzero because the backoff weights make the states sum to >1). 119 | 120 | echo "Succeeded in formatting LM '$lm' -> '$out_dir/G.fst'" 121 | -------------------------------------------------------------------------------- /kaldi_decoding_scripts/local/nnet/run_dnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely) 4 | # Apache 2.0 5 | 6 | # This example script trains a DNN on top of fMLLR features. 7 | # The training is done in 3 stages, 8 | # 9 | # 1) RBM pre-training: 10 | # in this unsupervised stage we train stack of RBMs, 11 | # a good starting point for frame cross-entropy trainig. 12 | # 2) frame cross-entropy training: 13 | # the objective is to classify frames to correct pdfs. 14 | # 3) sequence-training optimizing sMBR: 15 | # the objective is to emphasize state-sequences with better 16 | # frame accuracy w.r.t. reference alignment. 17 | 18 | . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. 19 | ## This relates to the queue. 20 | 21 | . ./path.sh ## Source the tools/utils (import the queue.pl) 22 | 23 | # Config: 24 | gmmdir=exp/tri3 25 | data_fmllr=data-fmllr-tri3 26 | stage=2 # resume training with --stage=N 27 | # End of config. 28 | . utils/parse_options.sh || exit 1; 29 | # 30 | 31 | if [ $stage -le 0 ]; then 32 | # Store fMLLR features, so we can train on them easily, 33 | # test 34 | dir=$data_fmllr/test 35 | steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ 36 | --transform-dir $gmmdir/decode_test \ 37 | $dir data/test $gmmdir $dir/log $dir/data || exit 1 38 | # dev 39 | dir=$data_fmllr/dev 40 | steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ 41 | --transform-dir $gmmdir/decode_dev \ 42 | $dir data/dev $gmmdir $dir/log $dir/data || exit 1 43 | # train 44 | dir=$data_fmllr/train 45 | steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ 46 | --transform-dir ${gmmdir}_ali \ 47 | $dir data/train $gmmdir $dir/log $dir/data || exit 1 48 | # split the data : 90% train 10% cross-validation (held-out) 49 | utils/subset_data_dir_tr_cv.sh $dir ${dir}_tr90 ${dir}_cv10 || exit 1 50 | fi 51 | 52 | if [ $stage -le 1 ]; then 53 | # Pre-train DBN, i.e. a stack of RBMs (small database, smaller DNN) 54 | dir=exp/dnn4_pretrain-dbn 55 | (tail --pid=$$ -F $dir/log/pretrain_dbn.log 2>/dev/null)& # forward log 56 | # $cuda_cmd $dir/log/pretrain_dbn.log \ 57 | # steps/nnet/pretrain_dbn.sh --hid-dim 1024 --rbm-iter 20 $data_fmllr/train $dir || exit 1; 58 | fi 59 | 60 | if [ $stage -le 2 ]; then 61 | # Train the DNN optimizing per-frame cross-entropy. 62 | dir=exp/dnn4_pretrain-dbn_dnn 63 | ali=${gmmdir}_ali 64 | feature_transform=exp/dnn4_pretrain-dbn/final.feature_transform 65 | dbn=exp/dnn4_pretrain-dbn/6.dbn 66 | (tail --pid=$$ -F $dir/log/train_nnet.log 2>/dev/null)& # forward log 67 | # Train 68 | $cuda_cmd $dir/log/train_nnet.log \ 69 | # steps/nnet/train.sh --feature-transform $feature_transform --hid-layers 6 --learn-rate 0.008 \ 70 | # $data_fmllr/train_tr90 $data_fmllr/train_cv10 data/lang $ali $ali $dir || exit 1; 71 | # Decode (reuse HCLG graph) 72 | steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --acwt 0.2 \ 73 | $gmmdir/graph $data_fmllr/test $dir/decode_test || exit 1; 74 | steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --acwt 0.2 \ 75 | $gmmdir/graph $data_fmllr/dev $dir/decode_dev || exit 1; 76 | fi 77 | 78 | 79 | # Sequence training using sMBR criterion, we do Stochastic-GD 80 | # with per-utterance updates. We use usually good acwt 0.1 81 | dir=exp/dnn4_pretrain-dbn_dnn_smbr 82 | srcdir=exp/dnn4_pretrain-dbn_dnn 83 | acwt=0.2 84 | 85 | if [ $stage -le 3 ]; then 86 | # First we generate lattices and alignments: 87 | steps/nnet/align.sh --nj 20 --cmd "$train_cmd" \ 88 | $data_fmllr/train data/lang $srcdir ${srcdir}_ali || exit 1; 89 | steps/nnet/make_denlats.sh --nj 20 --cmd "$decode_cmd" --acwt $acwt \ 90 | --lattice-beam 10.0 --beam 18.0 \ 91 | $data_fmllr/train data/lang $srcdir ${srcdir}_denlats || exit 1; 92 | fi 93 | 94 | if [ $stage -le 4 ]; then 95 | # Re-train the DNN by 6 iterations of sMBR 96 | steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 6 --acwt $acwt \ 97 | --do-smbr true \ 98 | $data_fmllr/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1 99 | # Decode 100 | for ITER in 1 6; do 101 | steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" \ 102 | --nnet $dir/${ITER}.nnet --acwt $acwt \ 103 | $gmmdir/graph $data_fmllr/test $dir/decode_test_it${ITER} || exit 1 104 | steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" \ 105 | --nnet $dir/${ITER}.nnet --acwt $acwt \ 106 | $gmmdir/graph $data_fmllr/dev $dir/decode_dev_it${ITER} || exit 1 107 | done 108 | fi 109 | 110 | echo Success 111 | exit 0 112 | 113 | # Getting results [see RESULTS file] 114 | # for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done 115 | --------------------------------------------------------------------------------