├── .gitignore ├── LICENSE ├── README.md ├── run_hkust ├── RESULTS ├── run-bnf-tandem.sh ├── run-dnn-fbank+pitch.sh └── run-dnn.sh ├── run_swbd ├── RESULTS ├── run-bnf-tandem.sh ├── run-dnn-fbank.sh └── run-dnn.sh ├── run_swbd_110h ├── RESULTS ├── run-bnf-fbank-tandem.sh ├── run-bnf-tandem.sh ├── run-cnn.sh ├── run-dnn-fbank.sh ├── run-dnn.sh ├── sat │ ├── run-dnn-fbank-sat.sh │ ├── run-dnn-sat.sh │ └── run-ivec-extract.sh └── tmp │ ├── run-bnf-fbank-tandem-sat.sh │ └── run-bnf-tandem-sat.sh ├── run_tedlium ├── .nfs0000001c872b1d8100008f9b ├── RESULTS ├── run-bnf-tandem-fbank.sh ├── run-bnf-tandem.sh ├── run-cnn.sh ├── run-dnn-fbank.sh ├── run-dnn-maxout.sh ├── run-dnn.sh └── tmp │ ├── run-bnf-tandem-fbank-sat.sh │ ├── run-cnn-sat.sh │ ├── run-dnn-fbank-sat-dnnalign.sh │ ├── run-dnn-fbank-sat.sh │ ├── run-dnn-lhuc-bkup.sh │ ├── run-dnn-lhuc.sh │ └── run-dnn-sat-lhuc.sh ├── run_timit ├── RESULTS ├── kaldi_io │ └── run-dnn.sh ├── run-bnf-tandem.sh ├── run-cnn.sh ├── run-dnn-fbank.sh ├── run-dnn-maxout.sh └── run-dnn.sh ├── run_wsj ├── RESULTS ├── run-bnf-tandem.sh ├── run-cnn.sh ├── run-dnn-fbank.sh └── run-dnn.sh └── steps_pdnn ├── align_nnet.sh ├── build_avnnet_pfile.sh ├── build_nnet_pfile.sh ├── decode_avdnn.sh ├── decode_dnn.sh ├── make_bnf_feat.sh ├── make_conv_feat.sh ├── make_denlats_nnet.sh ├── sat ├── build_nnet_pfile_ivec.sh ├── decode_dnn_concat.sh ├── decode_dnn_ivec.sh ├── make_bnf_feat_ivec.sh └── make_feat_with_ivec.sh └── tmp ├── decode_dnn_lhuc.sh └── make_nnet_data.sh /.gitignore: -------------------------------------------------------------------------------- 1 | make_post_nnet_bkup.sh 2 | make_post_nnet.sh 3 | make_seq_data.sh 4 | run-dnn-mpe.sh 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Kaldi+PDNN 2 | ========= 3 | 4 | Kaldi+PDNN builds state-of-the-art DNN-based ASR systems using the [Kaldi](http://kaldi.sourceforge.net/) and [PDNN](http://www.cs.cmu.edu/~ymiao/pdnntk.html) tooklits. Check the [project webpage](http://www.cs.cmu.edu/~ymiao/kaldipdnn.html) for more information. 5 | -------------------------------------------------------------------------------- /run_hkust/RESULTS: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # the SAT model 4 | %WER 44.50 [ 74529 / 167480, 8902 ins, 13372 del, 52255 sub ] exp/tri5a/decode/cer_13 5 | 6 | # run-dnn.sh 7 | %WER 36.77 [ 61582 / 167480, 7783 ins, 11586 del, 42213 sub ] exp_pdnn/dnn/decode/cer_10 8 | 9 | # run-bnf-tandem.sh 10 | %WER 37.24 [ 62374 / 167480, 6921 ins, 12198 del, 43255 sub ] exp_pdnn/bnf_tandem/tri6a/decode/cer_26 11 | %WER 35.84 [ 60021 / 167480, 5949 ins, 11217 del, 42855 sub ] exp_pdnn/bnf_tandem/tri6a_mmi_b0.1/decode_it1/cer_22 12 | %WER 35.45 [ 59373 / 167480, 5428 ins, 11971 del, 41974 sub ] exp_pdnn/bnf_tandem/tri6a_mmi_b0.1/decode_it2/cer_22 13 | %WER 35.42 [ 59326 / 167480, 5199 ins, 12598 del, 41529 sub ] exp_pdnn/bnf_tandem/tri6a_mmi_b0.1/decode_it3/cer_22 14 | %WER 35.49 [ 59435 / 167480, 5017 ins, 13097 del, 41321 sub ] exp_pdnn/bnf_tandem/tri6a_mmi_b0.1/decode_it4/cer_22 15 | 16 | # run-dnn-fbank+pitch.sh 17 | %WER 36.67 [ 61420 / 167480, 8207 ins, 10761 del, 42452 sub ] exp_pdnn/dnn_fbank_pitch/decode/cer_9 18 | 19 | 20 | 21 | # The #states in the SAT model exp/tri5a looks too small. So we build a larger SAT model with 22 | # steps/train_sat.sh \ 23 | # 6000 120000 data/train data/lang exp/tri5a_ali exp/tri6a || exit 1; 24 | # the SAT model exp/tri6a 25 | %WER 44.16 [ 73964 / 167480, 9945 ins, 12499 del, 51520 sub ] exp/tri6a/decode/cer_13 26 | 27 | # run-dnn.sh : so we get some gains on DNN. Other PDNN systems are likely to be boosted as well. 28 | %WER 35.60 [ 59621 / 167480, 7667 ins, 11147 del, 40807 sub ] exp_pdnn/dnn_6a/decode/cer_11 29 | -------------------------------------------------------------------------------- /run_hkust/run-dnn-fbank+pitch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Apache 2.0 4 | # This is the script that trains DNN system over the fbank+pitch features.It 5 | # is to be run after run.sh. Before running this, you should already build 6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn" 7 | # toolkit to train the DNN. The input filterbank features are with mean and 8 | # variance normalization. 9 | 10 | # For more informaiton regarding the recipes and results, visit our webiste 11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 12 | 13 | working_dir=exp_pdnn/dnn_fbank_pitch 14 | do_ptr=true # whether to do pre-training 15 | delete_pfile=true # whether to delete pfiles after DNN training 16 | 17 | gmmdir=exp/tri5a 18 | 19 | # Specify the gpu device to be used 20 | gpu=gpu 21 | 22 | cmd=run.pl 23 | . cmd.sh 24 | [ -f path.sh ] && . ./path.sh 25 | . parse_options.sh || exit 1; 26 | 27 | # At this point you may want to make sure the directory $working_dir is 28 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 29 | if [ ! -d pdnn ]; then 30 | echo "Checking out PDNN code." 31 | svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/pdnn pdnn 32 | fi 33 | 34 | if [ ! -d steps_pdnn ]; then 35 | echo "Checking out steps_pdnn scripts." 36 | svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/steps_pdnn steps_pdnn 37 | fi 38 | 39 | if ! nvidia-smi; then 40 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 41 | echo "(Note: this script might still work, it would just be slower.)" 42 | fi 43 | 44 | # The hope here is that Theano has been installed either to python or to python2.6 45 | pythonCMD=python 46 | if ! python -c 'import theano;'; then 47 | if ! python2.6 -c 'import theano;'; then 48 | echo "Theano does not seem to be installed on your machine. Not continuing." 49 | echo "(Note: this script might still work, it would just be slower.)" 50 | exit 1; 51 | else 52 | pythonCMD=python2.6 53 | fi 54 | fi 55 | 56 | mkdir -p $working_dir/log 57 | 58 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 59 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 60 | 61 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 62 | 63 | echo --------------------------------------------------------------------- 64 | echo "Creating DNN training and validation data (pfiles)" 65 | echo --------------------------------------------------------------------- 66 | # Alignment on the training data 67 | if [ ! -d ${gmmdir}_ali ]; then 68 | echo "Generate alignment on train" 69 | steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \ 70 | data/train data/lang $gmmdir ${gmmdir}_ali || exit 1 71 | fi 72 | 73 | # Generate the fbank features. The fbanks are 40-dimensional on each frame 74 | echo "--num-mel-bins=40" > conf/fbank.conf 75 | echo "--sample-frequency=8000" >> conf/fbank.conf 76 | mkdir -p $working_dir/data_fbank 77 | for set in train dev; do 78 | if [ ! -d $working_dir/data_fbank/$set ]; then 79 | cp -r data/$set $working_dir/data_fbank/$set 80 | ( cd $working_dir/data_fbank/$set; rm -rf {cmvn,feats}.scp split*; ) 81 | steps/make_fbank.sh --cmd "$train_cmd" --nj 16 \ 82 | $working_dir/data_fbank/$set $working_dir/_log $working_dir/_fbank || exit 1; 83 | fi 84 | done 85 | 86 | # Generate the pitch features. The pitches are 3-dimensional on each frame 87 | echo "--sample-frequency=8000" >> conf/pitch.conf 88 | mkdir -p $working_dir/data_pitch 89 | for set in train dev; do 90 | if [ ! -d $working_dir/data_pitch/$set ]; then 91 | cp -r data/$set $working_dir/data_pitch/$set 92 | ( cd $working_dir/data_pitch/$set; rm -rf {cmvn,feats}.scp split*; ) 93 | steps/make_pitch_kaldi.sh --cmd "$train_cmd" --nj 16 \ 94 | $working_dir/data_pitch/$set $working_dir/_log $working_dir/_pitch || exit 1; 95 | fi 96 | done 97 | 98 | # Combine fbank and pitch together 99 | mkdir -p $working_dir/data 100 | for set in train dev; do 101 | if [ ! -d $working_dir/data/$set ]; then 102 | steps/append_feats.sh --cmd "$train_cmd" --nj 16 \ 103 | $working_dir/data_fbank/$set $working_dir/data_pitch/$set \ 104 | $working_dir/data/$set $working_dir/_log $working_dir/_append || exit 1; 105 | # We need to compute CMVN stats on the appended features 106 | steps/compute_cmvn_stats.sh $working_dir/data/$set $working_dir/_log $working_dir/_append || exit 1; 107 | fi 108 | done 109 | 110 | # By default, inputs include 11 frames (+/-5) of 43-dimensional appended features, with 473 dimensions. 111 | if [ ! -f $working_dir/train.pfile.done ]; then 112 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --every-nth-frame 1 --norm-vars true \ 113 | --do-split true --pfile-unit-size 30 --cv-ratio 0.05 \ 114 | --splice-opts "--left-context=5 --right-context=5" --input-dim 473 \ 115 | $working_dir/data/train ${gmmdir}_ali $working_dir || exit 1 116 | ( cd $working_dir; rm concat.pfile; ) 117 | touch $working_dir/train.pfile.done 118 | fi 119 | 120 | echo --------------------------------------------------------------------- 121 | echo "Starting DNN training" 122 | echo --------------------------------------------------------------------- 123 | feat_dim=$(cat $working_dir/train.pfile |head |grep num_features| awk '{print $2}') || exit 1; 124 | 125 | if $do_ptr && [ ! -f $working_dir/dnn.ptr.done ]; then 126 | echo "SDA Pre-training" 127 | $cmd $working_dir/log/dnn.ptr.log \ 128 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 129 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 130 | $pythonCMD pdnn/run_SdA.py --train-data "$working_dir/train.pfile,partition=2000m,random=true,stream=true" \ 131 | --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 132 | --first-reconstruct-activation "tanh" \ 133 | --wdir $working_dir --output-file $working_dir/dnn.ptr \ 134 | --ptr-layer-number 6 --epoch-number 5 || exit 1; 135 | touch $working_dir/dnn.ptr.done 136 | fi 137 | 138 | if [ ! -f $working_dir/dnn.fine.done ]; then 139 | echo "Fine-tuning DNN" 140 | $cmd $working_dir/log/dnn.fine.log \ 141 | export PYTHONPATH=$PYTHONPATH:`pwd`/ptdnn/ \; \ 142 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 143 | $pythonCMD pdnn/run_DNN.py --train-data "$working_dir/train.pfile,partition=2000m,random=true,stream=true" \ 144 | --valid-data "$working_dir/valid.pfile,partition=600m,random=true,stream=true" \ 145 | --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 146 | --ptr-file $working_dir/dnn.ptr --ptr-layer-number 6 \ 147 | --output-format kaldi --lrate "D:0.08:0.5:0.2,0.2:8" \ 148 | --wdir $working_dir --output-file $working_dir/dnn.nnet || exit 1; 149 | touch $working_dir/dnn.fine.done 150 | $delete_pfile && rm -rf $working_dir/*.pfile 151 | fi 152 | 153 | echo --------------------------------------------------------------------- 154 | echo "Decode the final system" 155 | echo --------------------------------------------------------------------- 156 | if [ ! -f $working_dir/decode.done ]; then 157 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 158 | graph_dir=$gmmdir/graph 159 | # Here norm-vars has to be the same as steps_pdnn/build_nnet_pfile.sh 160 | steps_pdnn/decode_dnn.sh --nj 24 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" --norm-vars true \ 161 | $graph_dir $working_dir/data/dev ${gmmdir}_ali $working_dir/decode || exit 1; 162 | touch $working_dir/decode.done 163 | fi 164 | 165 | echo "Finish !!" 166 | -------------------------------------------------------------------------------- /run_hkust/run-dnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Apache 2.0 4 | # This is the script that trains DNN system. It is to be run after run.sh. 5 | # Before running this, you should already build the initial GMM model. This 6 | # script requires a GPU, and also the "pdnn" toolkit to train the DNN. 7 | 8 | # For more informaiton regarding the recipes and results, visit our webiste 9 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 10 | 11 | working_dir=exp_pdnn/dnn 12 | do_ptr=true # whether to do pre-training 13 | delete_pfile=false # whether to delete pfiles after DNN training 14 | 15 | gmmdir=exp/tri5a 16 | 17 | # Specify the gpu device to be used 18 | gpu=gpu 19 | 20 | cmd=run.pl 21 | . cmd.sh 22 | [ -f path.sh ] && . ./path.sh 23 | . parse_options.sh || exit 1; 24 | 25 | # At this point you may want to make sure the directory $working_dir is 26 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 27 | if [ ! -d pdnn ]; then 28 | echo "Checking out PDNN code." 29 | svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/pdnn pdnn 30 | fi 31 | 32 | if [ ! -d steps_pdnn ]; then 33 | echo "Checking out steps_pdnn scripts." 34 | svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/steps_pdnn steps_pdnn 35 | fi 36 | 37 | if ! nvidia-smi; then 38 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 39 | echo "(Note: this script might still work, it would just be slower.)" 40 | fi 41 | 42 | # The hope here is that Theano has been installed either to python or to python2.6 43 | pythonCMD=python 44 | if ! python -c 'import theano;'; then 45 | if ! python2.6 -c 'import theano;'; then 46 | echo "Theano does not seem to be installed on your machine. Not continuing." 47 | echo "(Note: this script might still work, it would just be slower.)" 48 | exit 1; 49 | else 50 | pythonCMD=python2.6 51 | fi 52 | fi 53 | 54 | mkdir -p $working_dir/log 55 | 56 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 57 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 58 | 59 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 60 | 61 | echo --------------------------------------------------------------------- 62 | echo "Generate alignment and prepare fMLLR features" 63 | echo --------------------------------------------------------------------- 64 | # Alignment on the training data 65 | if [ ! -d ${gmmdir}_ali ]; then 66 | echo "Generate alignment on train" 67 | steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \ 68 | data/train data/lang $gmmdir ${gmmdir}_ali || exit 1 69 | fi 70 | 71 | # Dump fMLLR features. We generate "fake" cmvn states (0 means and 1 variance) which apply no normalization 72 | if [ ! -d $working_dir/data/train ]; then 73 | echo "Save fmllr features of train" 74 | steps/nnet/make_fmllr_feats.sh --nj 24 --cmd "$train_cmd" \ 75 | --transform-dir ${gmmdir}_ali \ 76 | $working_dir/data/train data/train $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 77 | steps/compute_cmvn_stats.sh --fake \ 78 | $working_dir/data/train $working_dir/_log $working_dir/_fmllr || exit 1; 79 | fi 80 | if [ ! -d $working_dir/data/dev ]; then 81 | echo "Save fmllr features of dev" 82 | steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ 83 | --transform-dir $gmmdir/decode \ 84 | $working_dir/data/dev data/dev $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 85 | steps/compute_cmvn_stats.sh --fake \ 86 | $working_dir/data/dev $working_dir/_log $working_dir/_fmllr || exit 1; 87 | fi 88 | 89 | echo --------------------------------------------------------------------- 90 | echo "Create DNN training and validation pfiles" 91 | echo --------------------------------------------------------------------- 92 | 93 | # By default, DNN inputs include: spliced 11 frames (+/-5) of fMLLR with 440 dimensions 94 | if [ ! -f $working_dir/train.pfile.done ]; then 95 | steps_pdnn/build_nnet_pfile.sh --every-nth-frame 1 --norm-vars false \ 96 | --do-split true --pfile-unit-size 50 --cv-ratio 0.05 \ 97 | --splice-opts "--left-context=5 --right-context=5" --input-dim 440 \ 98 | $working_dir/data/train ${gmmdir}_ali $working_dir || exit 1 99 | ( cd $working_dir; rm concat.pfile; ) 100 | touch $working_dir/train.pfile.done 101 | fi 102 | 103 | echo --------------------------------------------------------------------- 104 | echo "Start DNN training" 105 | echo --------------------------------------------------------------------- 106 | feat_dim=$(cat $working_dir/train.pfile |head |grep num_features| awk '{print $2}') || exit 1; 107 | 108 | if $do_ptr && [ ! -f $working_dir/dnn.ptr.done ]; then 109 | echo "SDA Pre-training" 110 | $cmd $working_dir/log/dnn.ptr.log \ 111 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 112 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 113 | $pythonCMD pdnn/run_SdA.py --train-data "$working_dir/train.pfile,partition=2000m,random=true,stream=true" \ 114 | --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 115 | --first-reconstruct-activation "tanh" \ 116 | --wdir $working_dir --output-file $working_dir/dnn.ptr \ 117 | --ptr-layer-number 6 --epoch-number 5 || exit 1; 118 | touch $working_dir/dnn.ptr.done 119 | fi 120 | 121 | if [ ! -f $working_dir/dnn.fine.done ]; then 122 | echo "Fine-tuning DNN" 123 | $cmd $working_dir/log/dnn.fine.log \ 124 | export PYTHONPATH=$PYTHONPATH:`pwd`/ptdnn/ \; \ 125 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 126 | $pythonCMD pdnn/run_DNN.py --train-data "$working_dir/train.pfile,partition=2000m,random=true,stream=true" \ 127 | --valid-data "$working_dir/valid.pfile,partition=600m,random=true,stream=true" \ 128 | --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 129 | --ptr-file $working_dir/dnn.ptr --ptr-layer-number 6 \ 130 | --output-format kaldi --lrate "D:0.08:0.5:0.2,0.2:8" \ 131 | --wdir $working_dir --output-file $working_dir/dnn.nnet || exit 1; 132 | touch $working_dir/dnn.fine.done 133 | $delete_pfile && rm -rf $working_dir/*.pfile 134 | fi 135 | 136 | echo --------------------------------------------------------------------- 137 | echo "Decode the final system" 138 | echo --------------------------------------------------------------------- 139 | if [ ! -f $working_dir/decode.done ]; then 140 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 141 | graph_dir=$gmmdir/graph 142 | steps_pdnn/decode_dnn.sh --nj 24 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" --norm-vars false \ 143 | $graph_dir $working_dir/data/dev ${gmmdir}_ali $working_dir/decode || exit 1; 144 | touch $working_dir/decode.done 145 | fi 146 | 147 | echo "Finish !!" 148 | -------------------------------------------------------------------------------- /run_swbd/RESULTS: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # exp/tri4b : the SAT model 4 | %WER 22.6 | 1831 21395 | 79.9 13.9 6.3 2.5 22.6 60.5 | exp/tri4b/decode_eval2000_sw1_tg/score_16/eval2000.ctm.swbd.filt.sys 5 | %WER 30.2 | 4459 42989 | 73.4 18.9 7.6 3.6 30.2 66.8 | exp/tri4b/decode_eval2000_sw1_tg/score_14/eval2000.ctm.filt.sys 6 | 7 | # results of pdnn systems 8 | 9 | # run-dnn.sh 10 | %WER 15.4 | 1831 21395 | 86.2 8.9 4.8 1.7 15.4 52.7 | exp_pdnn/dnn/decode_eval2000_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys 11 | %WER 21.4 | 4459 42989 | 81.0 12.9 6.1 2.4 21.4 58.8 | exp_pdnn/dnn/decode_eval2000_sw1_tg/score_11/eval2000.ctm.filt.sys 12 | 13 | # run-bnf-tandem.sh 14 | %WER 15.9 | 1831 21395 | 85.7 9.5 4.8 1.6 15.9 53.7 | exp_pdnn/bnf_tandem/tri5a/decode_eval2000_sw1_tg/score_31/eval2000.ctm.swbd.filt.sys 15 | %WER 22.5 | 4459 42989 | 79.9 13.7 6.4 2.5 22.5 60.9 | exp_pdnn/bnf_tandem/tri5a/decode_eval2000_sw1_tg/score_27/eval2000.ctm.filt.sys 16 | 17 | %WER 15.3 | 1831 21395 | 86.4 9.3 4.3 1.7 15.3 52.1 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it1/score_24/eval2000.ctm.swbd.filt.sys 18 | %WER 15.1 | 1831 21395 | 86.5 9.2 4.4 1.6 15.1 52.3 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it2/score_24/eval2000.ctm.swbd.filt.sys 19 | %WER 15.0 | 1831 21395 | 86.4 8.9 4.7 1.5 15.0 52.3 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it3/score_26/eval2000.ctm.swbd.filt.sys 20 | %WER 15.2 | 1831 21395 | 86.5 9.1 4.5 1.6 15.2 52.9 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it4/score_24/eval2000.ctm.swbd.filt.sys 21 | %WER 21.8 | 4459 42989 | 80.6 13.1 6.3 2.3 21.8 59.7 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it1/score_24/eval2000.ctm.filt.sys 22 | %WER 21.7 | 4459 42989 | 80.5 12.9 6.6 2.2 21.7 59.9 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it2/score_24/eval2000.ctm.filt.sys 23 | %WER 21.7 | 4459 42989 | 80.4 12.7 6.9 2.1 21.7 59.9 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it3/score_24/eval2000.ctm.filt.sys 24 | %WER 21.9 | 4459 42989 | 80.2 12.6 7.2 2.1 21.9 60.4 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it4/score_24/eval2000.ctm.filt.sys 25 | 26 | 27 | # run-dnn-fbank.sh 28 | -------------------------------------------------------------------------------- /run_swbd/run-dnn-fbank.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This is the script that trains DNN system over the filterbank features. It 5 | # is to be run after run.sh. Before running this, you should already build 6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn" 7 | # toolkit to train the DNN. The input filterbank features are with mean and 8 | # variance normalization. 9 | 10 | # For more informaiton regarding the recipes and results, visit the webiste 11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 12 | 13 | working_dir=exp_pdnn/dnn_fbank 14 | gmmdir=exp/tri4b 15 | 16 | # Specify the gpu device to be used 17 | gpu=gpu 18 | 19 | cmd=run.pl 20 | . cmd.sh 21 | [ -f path.sh ] && . ./path.sh 22 | . parse_options.sh || exit 1; 23 | 24 | # At this point you may want to make sure the directory $working_dir is 25 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 26 | if [ ! -d pdnn ]; then 27 | echo "Checking out PDNN code." 28 | svn co https://github.com/yajiemiao/pdnn/trunk pdnn 29 | fi 30 | 31 | if [ ! -d steps_pdnn ]; then 32 | echo "Checking out steps_pdnn scripts." 33 | svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn 34 | fi 35 | 36 | if ! nvidia-smi; then 37 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 38 | echo "(Note: this script might still work, it would just be slower.)" 39 | fi 40 | 41 | # The hope here is that Theano has been installed either to python or to python2.6 42 | pythonCMD=python 43 | if ! python -c 'import theano;'; then 44 | if ! python2.6 -c 'import theano;'; then 45 | echo "Theano does not seem to be installed on your machine. Not continuing." 46 | echo "(Note: this script might still work, it would just be slower.)" 47 | exit 1; 48 | else 49 | pythonCMD=python2.6 50 | fi 51 | fi 52 | 53 | mkdir -p $working_dir/log 54 | 55 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 56 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 57 | 58 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 59 | 60 | echo ===================================================================== 61 | echo " Alignment & Feature Preparation " 62 | echo ===================================================================== 63 | # Alignment on the training and validation data 64 | if [ ! -d ${gmmdir}_ali_nodup ]; then 65 | steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \ 66 | data/train_nodup data/lang $gmmdir ${gmmdir}_ali_nodup || exit 1 67 | fi 68 | if [ ! -d ${gmmdir}_ali_dev ]; then 69 | steps/align_fmllr.sh --nj 12 --cmd "$train_cmd" \ 70 | data/train_dev data/lang $gmmdir ${gmmdir}_ali_dev || exit 1 71 | fi 72 | 73 | # Generate the fbank features. We generate the 40-dimensional fbanks on each frame 74 | echo "--num-mel-bins=40" > conf/fbank.conf 75 | echo "--sample-frequency=8000" >> conf/fbank.conf 76 | mkdir -p $working_dir/data 77 | if [ ! -d $working_dir/data/train ]; then 78 | cp -r data/train_nodup $working_dir/data/train 79 | ( cd $working_dir/data/train; rm -rf {cmvn,feats}.scp split*; ) 80 | steps/make_fbank.sh --cmd "$train_cmd" --nj 24 $working_dir/data/train $working_dir/_log $working_dir/_fbank || exit 1; 81 | utils/fix_data_dir.sh $working_dir/data/train || exit; 82 | steps/compute_cmvn_stats.sh $working_dir/data/train $working_dir/_log $working_dir/_fbank || exit 1; 83 | fi 84 | if [ ! -d $working_dir/data/valid ]; then 85 | cp -r data/train_dev $working_dir/data/valid 86 | ( cd $working_dir/data/valid; rm -rf {cmvn,feats}.scp split*; ) 87 | steps/make_fbank.sh --cmd "$train_cmd" --nj 12 $working_dir/data/valid $working_dir/_log $working_dir/_fbank || exit 1; 88 | utils/fix_data_dir.sh $working_dir/data/valid || exit; 89 | steps/compute_cmvn_stats.sh $working_dir/data/valid $working_dir/_log $working_dir/_fbank || exit 1; 90 | fi 91 | if [ ! -d $working_dir/data/eval2000 ]; then 92 | cp -r data/eval2000 $working_dir/data/eval2000 93 | ( cd $working_dir/data/eval2000; rm -rf {cmvn,feats}.scp split*; ) 94 | steps/make_fbank.sh --cmd "$train_cmd" --nj 12 $working_dir/data/eval2000 $working_dir/_log $working_dir/_fbank || exit 1; 95 | utils/fix_data_dir.sh $working_dir/data/eval2000 || exit; 96 | steps/compute_cmvn_stats.sh $working_dir/data/eval2000 $working_dir/_log $working_dir/_fbank || exit 1; 97 | fi 98 | 99 | echo ===================================================================== 100 | echo " Training and Cross-Validation Pfiles " 101 | echo ===================================================================== 102 | # By default, inputs include 11 frames of filterbanks 103 | if [ ! -f $working_dir/train.pfile.done ]; then 104 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \ 105 | --norm-vars true --splice-opts "--left-context=5 --right-context=5" \ 106 | $working_dir/data/train ${gmmdir}_ali_nodup $working_dir || exit 1 107 | touch $working_dir/train.pfile.done 108 | fi 109 | if [ ! -f $working_dir/valid.pfile.done ]; then 110 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \ 111 | --norm-vars true --splice-opts "--left-context=5 --right-context=5" \ 112 | $working_dir/data/valid ${gmmdir}_ali_dev $working_dir || exit 1 113 | touch $working_dir/valid.pfile.done 114 | fi 115 | 116 | echo ===================================================================== 117 | echo " DNN Pre-training & Fine-tuning " 118 | echo ===================================================================== 119 | feat_dim=$(gunzip -c $working_dir/train.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1; 120 | 121 | # We use SDA because it's faster than RBM 122 | if [ ! -f $working_dir/dnn.ptr.done ]; then 123 | echo "SDA Pre-training" 124 | $cmd $working_dir/log/dnn.ptr.log \ 125 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 126 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 127 | $pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \ 128 | --nnet-spec "$feat_dim:2048:2048:2048:2048:2048:2048:2048:$num_pdfs" \ 129 | --1stlayer-reconstruct-activation "tanh" \ 130 | --wdir $working_dir --param-output-file $working_dir/dnn.ptr \ 131 | --ptr-layer-number 7 --epoch-number 5 || exit 1; 132 | touch $working_dir/dnn.ptr.done 133 | fi 134 | 135 | if [ ! -f $working_dir/dnn.fine.done ]; then 136 | echo "Fine-tuning DNN" 137 | $cmd $working_dir/log/dnn.fine.log \ 138 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 139 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 140 | $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \ 141 | --valid-data "$working_dir/valid.pfile.*.gz,partition=600m,random=true,stream=true" \ 142 | --nnet-spec "$feat_dim:2048:2048:2048:2048:2048:2048:2048:$num_pdfs" \ 143 | --ptr-file $working_dir/dnn.ptr --ptr-layer-number 7 \ 144 | --lrate "D:0.08:0.5:0.2,0.2:8" \ 145 | --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1; 146 | touch $working_dir/dnn.fine.done 147 | fi 148 | 149 | echo ===================================================================== 150 | echo " Decoding " 151 | echo ===================================================================== 152 | if [ ! -f $working_dir/decode.done ]; then 153 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 154 | graph_dir=$gmmdir/graph_sw1_tg 155 | steps_pdnn/decode_dnn.sh --nj 24 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 156 | $graph_dir $working_dir/data/eval2000 ${gmmdir}_ali_nodup $working_dir/decode_eval2000_sw1_tg || exit 1; 157 | touch $working_dir/decode.done 158 | fi 159 | 160 | echo "Finish !!" 161 | -------------------------------------------------------------------------------- /run_swbd/run-dnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This is the script that trains DNN model over fMLLR features. It is to be 5 | # run after run.sh. Before running this, you should already build the initial 6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train 7 | # the DNN. 8 | 9 | # For more informaiton regarding the recipes and results, visit the webiste 10 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 11 | 12 | working_dir=exp_pdnn/dnn 13 | gmmdir=exp/tri4b 14 | 15 | # Specify the gpu device to be used 16 | gpu=gpu 17 | 18 | cmd=run.pl 19 | . cmd.sh 20 | [ -f path.sh ] && . ./path.sh 21 | . parse_options.sh || exit 1; 22 | 23 | # At this point you may want to make sure the directory $working_dir is 24 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 25 | if [ ! -d pdnn ]; then 26 | echo "Checking out PDNN code." 27 | svn co https://github.com/yajiemiao/pdnn/trunk pdnn 28 | fi 29 | 30 | if [ ! -d steps_pdnn ]; then 31 | echo "Checking out steps_pdnn scripts." 32 | svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn 33 | fi 34 | 35 | if ! nvidia-smi; then 36 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 37 | echo "(Note: this script might still work, it would just be slower.)" 38 | fi 39 | 40 | # The hope here is that Theano has been installed either to python or to python2.6 41 | pythonCMD=python 42 | if ! python -c 'import theano;'; then 43 | if ! python2.6 -c 'import theano;'; then 44 | echo "Theano does not seem to be installed on your machine. Not continuing." 45 | echo "(Note: this script might still work, it would just be slower.)" 46 | exit 1; 47 | else 48 | pythonCMD=python2.6 49 | fi 50 | fi 51 | 52 | mkdir -p $working_dir/log 53 | 54 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 55 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 56 | 57 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 58 | 59 | echo ===================================================================== 60 | echo " Alignment & Feature Preparation " 61 | echo ===================================================================== 62 | # Alignment on the training and validation data 63 | if [ ! -d ${gmmdir}_ali_nodup ]; then 64 | steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \ 65 | data/train_nodup data/lang $gmmdir ${gmmdir}_ali_nodup || exit 1 66 | fi 67 | if [ ! -d ${gmmdir}_ali_dev ]; then 68 | steps/align_fmllr.sh --nj 12 --cmd "$train_cmd" \ 69 | data/train_dev data/lang $gmmdir ${gmmdir}_ali_dev || exit 1 70 | fi 71 | 72 | # Dump fMLLR features. "fake" cmvn states (0 means and 1 variance) which apply no normalization 73 | if [ ! -d $working_dir/data/train ]; then 74 | steps/make_fmllr_feats.sh --nj 24 --cmd "$train_cmd" \ 75 | --transform-dir ${gmmdir}_ali_nodup \ 76 | $working_dir/data/train data/train_nodup $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 77 | steps/compute_cmvn_stats.sh --fake \ 78 | $working_dir/data/train $working_dir/_log $working_dir/_fmllr || exit 1; 79 | fi 80 | if [ ! -d $working_dir/data/valid ]; then 81 | steps/make_fmllr_feats.sh --nj 12 --cmd "$train_cmd" \ 82 | --transform-dir ${gmmdir}_ali_dev \ 83 | $working_dir/data/valid data/train_dev $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 84 | steps/compute_cmvn_stats.sh --fake \ 85 | $working_dir/data/valid $working_dir/_log $working_dir/_fmllr || exit 1; 86 | fi 87 | if [ ! -d $working_dir/data/eval2000 ]; then 88 | steps/make_fmllr_feats.sh --nj 12 --cmd "$train_cmd" \ 89 | --transform-dir $gmmdir/decode_eval2000_sw1_tg \ 90 | $working_dir/data/eval2000 data/eval2000 $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 91 | steps/compute_cmvn_stats.sh --fake \ 92 | $working_dir/data/eval2000 $working_dir/_log $working_dir/_fmllr || exit 1; 93 | fi 94 | 95 | echo ===================================================================== 96 | echo " Training and Cross-Validation Pfiles " 97 | echo ===================================================================== 98 | # By default, DNN inputs include 11 frames of fMLLR 99 | if [ ! -f $working_dir/train.pfile.done ]; then 100 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \ 101 | --norm-vars false --splice-opts "--left-context=5 --right-context=5" \ 102 | $working_dir/data/train ${gmmdir}_ali_nodup $working_dir || exit 1 103 | touch $working_dir/train.pfile.done 104 | fi 105 | if [ ! -f $working_dir/valid.pfile.done ]; then 106 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \ 107 | --norm-vars false --splice-opts "--left-context=5 --right-context=5" \ 108 | $working_dir/data/valid ${gmmdir}_ali_dev $working_dir || exit 1 109 | touch $working_dir/valid.pfile.done 110 | fi 111 | 112 | echo ===================================================================== 113 | echo " DNN Pre-training & Fine-tuning " 114 | echo ===================================================================== 115 | feat_dim=$(gunzip -c $working_dir/train.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1; 116 | 117 | # We use SDA because it's faster than RBM 118 | if [ ! -f $working_dir/dnn.ptr.done ]; then 119 | echo "SDA Pre-training" 120 | $cmd $working_dir/log/dnn.ptr.log \ 121 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 122 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 123 | $pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \ 124 | --nnet-spec "$feat_dim:2048:2048:2048:2048:2048:2048:2048:$num_pdfs" \ 125 | --1stlayer-reconstruct-activation "tanh" \ 126 | --wdir $working_dir --param-output-file $working_dir/dnn.ptr \ 127 | --ptr-layer-number 7 --epoch-number 5 || exit 1; 128 | touch $working_dir/dnn.ptr.done 129 | fi 130 | 131 | if [ ! -f $working_dir/dnn.fine.done ]; then 132 | echo "Fine-tuning DNN" 133 | $cmd $working_dir/log/dnn.fine.log \ 134 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 135 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 136 | $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \ 137 | --valid-data "$working_dir/valid.pfile.*.gz,partition=600m,random=true,stream=true" \ 138 | --nnet-spec "$feat_dim:2048:2048:2048:2048:2048:2048:2048:$num_pdfs" \ 139 | --ptr-file $working_dir/dnn.ptr --ptr-layer-number 7 \ 140 | --lrate "D:0.08:0.5:0.2,0.2:8" \ 141 | --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1; 142 | touch $working_dir/dnn.fine.done 143 | fi 144 | 145 | echo ===================================================================== 146 | echo " Decoding " 147 | echo ===================================================================== 148 | if [ ! -f $working_dir/decode.done ]; then 149 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 150 | graph_dir=$gmmdir/graph_sw1_tg 151 | steps_pdnn/decode_dnn.sh --nj 24 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 152 | $graph_dir $working_dir/data/eval2000 ${gmmdir}_ali_nodup $working_dir/decode_eval2000_sw1_tg || exit 1; 153 | touch $working_dir/decode.done 154 | fi 155 | 156 | echo "Finish !!" 157 | -------------------------------------------------------------------------------- /run_swbd_110h/RESULTS: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # exp/tri4a : the SAT model 4 | %WER 25.1 | 1831 21395 | 77.5 15.5 7.0 2.6 25.1 62.8 | exp/tri4a/decode_eval2000_sw1_tg/score_16/eval2000.ctm.swbd.filt.sys 5 | %WER 32.7 | 4459 42989 | 70.9 20.7 8.5 3.5 32.7 69.1 | exp/tri4a/decode_eval2000_sw1_tg/score_14/eval2000.ctm.filt.sys 6 | 7 | # results of pdnn systems 8 | 9 | # run-dnn.sh 10 | %WER 19.2 | 1831 21395 | 83.0 11.5 5.5 2.2 19.2 57.8 | exp_pdnn_110h/dnn/decode_eval2000_sw1_tg/score_10/eval2000.ctm.swbd.filt.sys 11 | %WER 25.6 | 4459 42989 | 77.2 15.8 7.0 2.8 25.6 63.2 | exp_pdnn_110h/dnn/decode_eval2000_sw1_tg/score_10/eval2000.ctm.filt.sys 12 | 13 | # run-bnf-tandem.sh 14 | %WER 18.7 | 1831 21395 | 83.0 11.2 5.7 1.8 18.7 56.8 | exp_pdnn_110h/bnf_tandem/tri5a/decode_eval2000_sw1_tg/score_29/eval2000.ctm.swbd.filt.sys 15 | %WER 25.5 | 4459 42989 | 76.9 15.5 7.6 2.4 25.5 62.9 | exp_pdnn_110h/bnf_tandem/tri5a/decode_eval2000_sw1_tg/score_27/eval2000.ctm.filt.sys 16 | 17 | %WER 18.1 | 1831 21395 | 83.5 10.9 5.6 1.6 18.1 55.3 | exp_pdnn_110h/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it1/score_27/eval2000.ctm.swbd.filt.sys 18 | %WER 18.0 | 1831 21395 | 83.5 10.8 5.6 1.6 18.0 55.3 | exp_pdnn_110h/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it2/score_26/eval2000.ctm.swbd.filt.sys 19 | %WER 18.0 | 1831 21395 | 83.6 10.7 5.7 1.6 18.0 55.5 | exp_pdnn_110h/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it3/score_26/eval2000.ctm.swbd.filt.sys 20 | %WER 25.1 | 4459 42989 | 77.1 14.9 8.1 2.1 25.1 62.4 | exp_pdnn_110h/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it1/score_26/eval2000.ctm.filt.sys 21 | %WER 25.0 | 4459 42989 | 77.0 14.6 8.5 2.0 25.0 62.4 | exp_pdnn_110h/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it2/score_26/eval2000.ctm.filt.sys 22 | %WER 25.2 | 4459 42989 | 76.8 14.4 8.8 2.0 25.2 62.5 | exp_pdnn_110h/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it3/score_26/eval2000.ctm.filt.sys 23 | 24 | # run-dnn-fbank.sh 25 | %WER 21.7 | 1831 21395 | 80.8 13.2 6.0 2.5 21.7 60.0 | exp_pdnn_110h/dnn_fbank/decode_eval2000_sw1_tg/score_10/eval2000.ctm.swbd.filt.sys 26 | %WER 28.2 | 4459 42989 | 74.7 17.4 8.0 2.9 28.2 65.2 | exp_pdnn_110h/dnn_fbank/decode_eval2000_sw1_tg/score_10/eval2000.ctm.filt.sys 27 | 28 | # run-cnn.sh. 29 | 30 | -------------------------------------------------------------------------------- /run_swbd_110h/run-dnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This is the script that trains DNN model over fMLLR features. It is to be 5 | # run after run.sh. Before running this, you should already build the initial 6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train 7 | # the DNN. 8 | 9 | # For more informaiton regarding the recipes and results, visit the webiste 10 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 11 | 12 | working_dir=exp_pdnn_110h/dnn 13 | gmmdir=exp/tri4a 14 | 15 | # Specify the gpu device to be used 16 | gpu=gpu 17 | 18 | cmd=run.pl 19 | . cmd.sh 20 | [ -f path.sh ] && . ./path.sh 21 | . parse_options.sh || exit 1; 22 | 23 | # At this point you may want to make sure the directory $working_dir is 24 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 25 | if [ ! -d pdnn ]; then 26 | echo "Checking out PDNN code." 27 | svn co https://github.com/yajiemiao/pdnn/trunk pdnn 28 | fi 29 | 30 | if [ ! -d steps_pdnn ]; then 31 | echo "Checking out steps_pdnn scripts." 32 | svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn 33 | fi 34 | 35 | if ! nvidia-smi; then 36 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 37 | echo "(Note: this script might still work, it would just be slower.)" 38 | fi 39 | 40 | # The hope here is that Theano has been installed either to python or to python2.6 41 | pythonCMD=python 42 | if ! python -c 'import theano;'; then 43 | if ! python2.6 -c 'import theano;'; then 44 | echo "Theano does not seem to be installed on your machine. Not continuing." 45 | echo "(Note: this script might still work, it would just be slower.)" 46 | exit 1; 47 | else 48 | pythonCMD=python2.6 49 | fi 50 | fi 51 | 52 | mkdir -p $working_dir/log 53 | 54 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 55 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 56 | 57 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 58 | 59 | 60 | echo ===================================================================== 61 | echo " Alignment & Feature Preparation " 62 | echo ===================================================================== 63 | # Alignment on the training and validation data 64 | if [ ! -d ${gmmdir}_ali_100k_nodup ]; then 65 | steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \ 66 | data/train_100k_nodup data/lang $gmmdir ${gmmdir}_ali_100k_nodup || exit 1 67 | fi 68 | if [ ! -d ${gmmdir}_ali_dev ]; then 69 | steps/align_fmllr.sh --nj 12 --cmd "$train_cmd" \ 70 | data/train_dev data/lang $gmmdir ${gmmdir}_ali_dev || exit 1 71 | fi 72 | 73 | # Dump fMLLR features. "fake" cmvn states (0 means and 1 variance) which apply no normalization 74 | if [ ! -d $working_dir/data/train ]; then 75 | steps/nnet/make_fmllr_feats.sh --nj 24 --cmd "$train_cmd" \ 76 | --transform-dir ${gmmdir}_ali_100k_nodup \ 77 | $working_dir/data/train data/train_100k_nodup $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 78 | steps/compute_cmvn_stats.sh --fake \ 79 | $working_dir/data/train $working_dir/_log $working_dir/_fmllr || exit 1; 80 | fi 81 | if [ ! -d $working_dir/data/valid ]; then 82 | steps/nnet/make_fmllr_feats.sh --nj 12 --cmd "$train_cmd" \ 83 | --transform-dir ${gmmdir}_ali_dev \ 84 | $working_dir/data/valid data/train_dev $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 85 | steps/compute_cmvn_stats.sh --fake \ 86 | $working_dir/data/valid $working_dir/_log $working_dir/_fmllr || exit 1; 87 | fi 88 | if [ ! -d $working_dir/data/eval2000 ]; then 89 | steps/nnet/make_fmllr_feats.sh --nj 12 --cmd "$train_cmd" \ 90 | --transform-dir $gmmdir/decode_eval2000_sw1_tg \ 91 | $working_dir/data/eval2000 data/eval2000 $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 92 | steps/compute_cmvn_stats.sh --fake \ 93 | $working_dir/data/eval2000 $working_dir/_log $working_dir/_fmllr || exit 1; 94 | fi 95 | 96 | echo ===================================================================== 97 | echo " Training and Cross-Validation Pfiles " 98 | echo ===================================================================== 99 | # By default, DNN inputs include 11 frame of fMLLRs 100 | if [ ! -f $working_dir/train.pfile.done ]; then 101 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \ 102 | --norm-vars false --splice-opts "--left-context=5 --right-context=5" \ 103 | $working_dir/data/train ${gmmdir}_ali_100k_nodup $working_dir || exit 1 104 | touch $working_dir/train.pfile.done 105 | fi 106 | if [ ! -f $working_dir/valid.pfile.done ]; then 107 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \ 108 | --norm-vars false --splice-opts "--left-context=5 --right-context=5" \ 109 | $working_dir/data/valid ${gmmdir}_ali_dev $working_dir || exit 1 110 | touch $working_dir/valid.pfile.done 111 | fi 112 | 113 | echo ===================================================================== 114 | echo " DNN Pre-training & Fine-tuning " 115 | echo ===================================================================== 116 | feat_dim=$(gunzip -c $working_dir/train.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1; 117 | 118 | # We use SDA because it's faster than RBM 119 | if [ ! -f $working_dir/dnn.ptr.done ]; then 120 | echo "SDA Pre-training" 121 | $cmd $working_dir/log/dnn.ptr.log \ 122 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 123 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 124 | $pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \ 125 | --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 126 | --1stlayer-reconstruct-activation "tanh" \ 127 | --wdir $working_dir --param-output-file $working_dir/dnn.ptr \ 128 | --ptr-layer-number 6 --epoch-number 5 || exit 1; 129 | touch $working_dir/dnn.ptr.done 130 | fi 131 | 132 | # To apply dropout, add "--dropout-factor 0.2,0.2,0.2,0.2,0.2,0.2" and change the value of "--lrate" to "D:0.8:0.5:0.2,0.2:4" 133 | if [ ! -f $working_dir/dnn.fine.done ]; then 134 | echo "Fine-tuning DNN" 135 | $cmd $working_dir/log/dnn.fine.log \ 136 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 137 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 138 | $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \ 139 | --valid-data "$working_dir/valid.pfile.*.gz,partition=600m,random=true,stream=true" \ 140 | --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 141 | --ptr-file $working_dir/dnn.ptr --ptr-layer-number 6 \ 142 | --lrate "D:0.08:0.5:0.2,0.2:8" --param-output-file $working_dir/nnet.param \ 143 | --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1; 144 | touch $working_dir/dnn.fine.done 145 | fi 146 | 147 | echo ===================================================================== 148 | echo " Decoding " 149 | echo ===================================================================== 150 | if [ ! -f $working_dir/decode.done ]; then 151 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 152 | graph_dir=$gmmdir/graph_sw1_tg 153 | steps_pdnn/decode_dnn.sh --nj 24 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 154 | $graph_dir $working_dir/data/eval2000 ${gmmdir}_ali_100k_nodup $working_dir/decode_eval2000_sw1_tg || exit 1; 155 | touch $working_dir/decode.done 156 | fi 157 | 158 | echo "Finish !!" 159 | -------------------------------------------------------------------------------- /run_swbd_110h/sat/run-dnn-fbank-sat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Apache 2.0 4 | # This is the script that performs speaker adaptive training (SAT) of the 5 | # DNN model which has been trained on the filterbank features.It is to be 6 | # run after run-dnn-fbank.sh. 7 | 8 | # Yajie Miao, Hao Zhang, Florian Metze. "Towards Speaker Adaptive Training 9 | # of Deep Neural Network Acoustic Models". Interspeech 2014. 10 | 11 | # You need two additional commands to execute this recipe: get-spkvec-feat 12 | # and add-feats.Download the following two source files and put them under 13 | # src/featbin. Then compiling them will give you the required commands. 14 | 15 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/get-spkvec-feat.cc 16 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/add-feats.cc 17 | 18 | # For more informaiton regarding the recipes and results, visit our webiste 19 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 20 | 21 | working_dir=exp_pdnn_110h/dnn_fbank_sat 22 | initdnn_dir=exp_pdnn_110h/dnn_fbank # the directory of the initial DNN model 23 | 24 | gmmdir=exp/tri4a # GMM model directory 25 | 26 | # I-vectors for the training and decoding speakers. There should be an ivector.scp 27 | # file in each of both directories. 28 | train_ivec=exp_ivec/ivectors_swbd1 29 | decode_ivec=exp_ivec/ivectors_eval2000 30 | 31 | # Specify the gpu device to be used 32 | gpu=gpu 33 | 34 | cmd=run.pl 35 | . cmd.sh 36 | [ -f path.sh ] && . ./path.sh 37 | . parse_options.sh || exit 1; 38 | 39 | # At this point you may want to make sure the directory $working_dir is 40 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 41 | if [ ! -d pdnn ]; then 42 | echo "Checking out PDNN code." 43 | svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/pdnn pdnn 44 | fi 45 | 46 | if [ ! -d steps_pdnn ]; then 47 | echo "Checking out steps_pdnn scripts." 48 | svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/steps_pdnn steps_pdnn 49 | fi 50 | 51 | if ! nvidia-smi; then 52 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 53 | echo "(Note: this script might still work, it would just be slower.)" 54 | fi 55 | 56 | # The hope here is that Theano has been installed either to python or to python2.6 57 | pythonCMD=python 58 | if ! python -c 'import theano;'; then 59 | if ! python2.6 -c 'import theano;'; then 60 | echo "Theano does not seem to be installed on your machine. Not continuing." 61 | echo "(Note: this script might still work, it would just be slower.)" 62 | exit 1; 63 | else 64 | pythonCMD=python2.6 65 | fi 66 | fi 67 | 68 | mkdir -p $working_dir/log 69 | 70 | # Check whether i-vectors have been generated 71 | for f in $train_ivec/ivector.scp $decode_ivec/ivector.scp; do 72 | [ ! -f $f ] && echo "Error i-vectors for $f have NOT been extracted. Check/Run run_swbd_110h/sat/run-ivec-extract.sh." && exit 1; 73 | done 74 | # Check whether the initial DNN has been trained 75 | if [ ! -f $initdnn_dir/nnet.param ]; then 76 | echo "Error the initial DNN $initdnn_dir/nnet.param has NOT been trained" && exit 1; 77 | fi 78 | 79 | # Prepare dataset; copy related files from the initial DNN directory 80 | ln -s $PWD/$initdnn_dir/data $working_dir/data || exit 1; 81 | cp $initdnn_dir/{splice_opts,norm_vars,add_deltas} $working_dir || exit 1; 82 | splice_opts=`cat $working_dir/splice_opts 2>/dev/null` # frame-splicing options. 83 | norm_vars=`cat $working_dir/norm_vars 2>/dev/null` # variance normalization? 84 | add_deltas=`cat $working_dir/add_deltas 2>/dev/null` # add deltas? 85 | 86 | echo ===================================================================== 87 | echo " Training and Cross-Validation Pfiles " 88 | echo ===================================================================== 89 | 90 | if [ ! -f $working_dir/train.pfile.done ]; then 91 | steps_pdnn/sat/build_nnet_pfile_ivec.sh --cmd "$train_cmd" --every-nth-frame 1 --do-concat false \ 92 | --norm-vars $norm_vars --splice-opts "$splice_opts" --add-deltas $add_deltas \ 93 | --ivec-type speaker \ 94 | $working_dir/data/train ${gmmdir}_ali_100k_nodup $train_ivec $working_dir || exit 1 95 | touch $working_dir/train.pfile.done 96 | fi 97 | if [ ! -f $working_dir/valid.pfile.done ]; then 98 | steps_pdnn/build_nnet_pfile_ivec.sh --cmd "$train_cmd" --every-nth-frame 1 --do-concat false \ 99 | --norm-vars $norm_vars --splice-opts "$splice_opts" --add-deltas $add_deltas \ 100 | --ivec-type speaker \ 101 | $working_dir/data/valid ${gmmdir}_ali_dev $train_ivec $working_dir || exit 1 102 | touch $working_dir/valid.pfile.done 103 | fi 104 | 105 | echo ===================================================================== 106 | echo " SAT-DNN Fine-tuning " 107 | echo ===================================================================== 108 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 109 | ivec_dim=`feat-to-dim scp:$train_ivec/ivector.scp ark,t:- | head -1 | awk '{print $2}'` || exit 1; 110 | feat_dim=$(gunzip -c $working_dir/train.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1; 111 | feat_dim=$[$feat_dim-$ivec_dim] 112 | 113 | # NOTE: the definition of "--si-nnet-spec" here has to be the same as "--nnet-spec" in run-dnn-fbank.sh 114 | if [ ! -f $working_dir/sat.fine.done ]; then 115 | echo "Fine-tuning DNN" 116 | $cmd $working_dir/log/sat.fine.log \ 117 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 118 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 119 | $pythonCMD pdnn/run_DNN_SAT.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \ 120 | --valid-data "$working_dir/valid.pfile.*.gz,partition=600m,random=true,stream=true" \ 121 | --si-nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 122 | --adapt-nnet-spec "$ivec_dim:512:512:512" --init-model $initdnn_dir/nnet.param \ 123 | --lrate "D:0.08:0.5:0.05,0.05:0" --param-output-file $working_dir/nnet.param \ 124 | --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1; 125 | touch $working_dir/sat.fine.done 126 | fi 127 | 128 | # Remove the last line " *** ***" of dnn.nnet.adapt, because the output layer of the adaptation network 129 | # uses the linear activation function 130 | ( cd $working_dir; head -n -1 dnn.nnet.adapt > dnn.nnet.adapt.tmp; mv dnn.nnet.adapt.tmp dnn.nnet.adapt; ) 131 | 132 | echo ===================================================================== 133 | echo " Decoding " 134 | echo ===================================================================== 135 | if [ ! -f $working_dir/decode.done ]; then 136 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 137 | graph_dir=$gmmdir/graph_sw1_tg 138 | steps_pdnn/sat/decode_dnn_ivec.sh --nj 24 --scoring-opts "--min-lmwt 8 --max-lmwt 16" --cmd "$decode_cmd" --ivec-type speaker \ 139 | $graph_dir $working_dir/data/eval2000 ${gmmdir}_ali_100k_nodup $decode_ivec $working_dir/decode_eval2000_sw1_tg || exit 1; 140 | touch $working_dir/decode.done 141 | fi 142 | 143 | echo "Finish !!" 144 | -------------------------------------------------------------------------------- /run_swbd_110h/sat/run-dnn-sat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Apache 2.0 4 | # This is the script that performs speaker adaptive training (SAT) of the 5 | # DNN model which has been trained on the fMLLR features. It is to be run 6 | # after run-dnn.sh. 7 | 8 | # Yajie Miao, Hao Zhang, Florian Metze. "Towards Speaker Adaptive Training 9 | # of Deep Neural Network Acoustic Models". Interspeech 2014. 10 | 11 | # You need two additional commands to execute this recipe: get-spkvec-feat 12 | # and add-feats.Download the following two source files and put them under 13 | # src/featbin. Then compiling them will give you the required commands. 14 | 15 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/get-spkvec-feat.cc 16 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/add-feats.cc 17 | 18 | # For more informaiton regarding the recipes and results, visit our webiste 19 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 20 | 21 | working_dir=exp_pdnn_110h/dnn_sat 22 | initdnn_dir=exp_pdnn_110h/dnn # the directory of the initial DNN model 23 | 24 | gmmdir=exp/tri4a # GMM model directory 25 | 26 | # I-vectors for the training and decoding speakers. There should be an ivector.scp 27 | # file in each of both directories. 28 | train_ivec=exp_ivec/ivectors_swbd1 29 | decode_ivec=exp_ivec/ivectors_eval2000 30 | 31 | # Specify the gpu device to be used 32 | gpu=gpu 33 | 34 | cmd=run.pl 35 | . cmd.sh 36 | [ -f path.sh ] && . ./path.sh 37 | . parse_options.sh || exit 1; 38 | 39 | # At this point you may want to make sure the directory $working_dir is 40 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 41 | if [ ! -d pdnn ]; then 42 | echo "Checking out PDNN code." 43 | svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/pdnn pdnn 44 | fi 45 | 46 | if [ ! -d steps_pdnn ]; then 47 | echo "Checking out steps_pdnn scripts." 48 | svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/steps_pdnn steps_pdnn 49 | fi 50 | 51 | if ! nvidia-smi; then 52 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 53 | echo "(Note: this script might still work, it would just be slower.)" 54 | fi 55 | 56 | # The hope here is that Theano has been installed either to python or to python2.6 57 | pythonCMD=python 58 | if ! python -c 'import theano;'; then 59 | if ! python2.6 -c 'import theano;'; then 60 | echo "Theano does not seem to be installed on your machine. Not continuing." 61 | echo "(Note: this script might still work, it would just be slower.)" 62 | exit 1; 63 | else 64 | pythonCMD=python2.6 65 | fi 66 | fi 67 | 68 | mkdir -p $working_dir/log 69 | 70 | # Check whether i-vectors have been generated 71 | for f in $train_ivec/ivector.scp $decode_ivec/ivector.scp; do 72 | [ ! -f $f ] && echo "Error i-vectors for $f have NOT been extracted. Check/Run run_swbd_110h/sat/run-ivec-extract.sh." && exit 1; 73 | done 74 | # Check whether the initial DNN has been trained 75 | [ ! -f $initdnn_dir/nnet.param ] && echo "Error the initial DNN $initdnn_dir/nnet.param has NOT been trained" && exit 1; 76 | 77 | # Prepare dataset; copy related files from the initial DNN directory 78 | ln -s $PWD/$initdnn_dir/data $working_dir/data || exit 1; 79 | cp $initdnn_dir/{splice_opts,norm_vars,add_deltas} $working_dir || exit 1; 80 | splice_opts=`cat $working_dir/splice_opts 2>/dev/null` # frame-splicing options. 81 | norm_vars=`cat $working_dir/norm_vars 2>/dev/null` # variance normalization? 82 | add_deltas=`cat $working_dir/add_deltas 2>/dev/null` # add deltas? 83 | 84 | echo ===================================================================== 85 | echo " Training and Cross-Validation Pfiles " 86 | echo ===================================================================== 87 | if [ ! -f $working_dir/train.pfile.done ]; then 88 | steps_pdnn/sat/build_nnet_pfile_ivec.sh --cmd "$train_cmd" --every-nth-frame 1 --do-concat false \ 89 | --norm-vars $norm_vars --splice-opts "$splice_opts" --add-deltas $add_deltas \ 90 | --ivec-type speaker \ 91 | $working_dir/data/train ${gmmdir}_ali_100k_nodup $train_ivec $working_dir || exit 1 92 | touch $working_dir/train.pfile.done 93 | fi 94 | if [ ! -f $working_dir/valid.pfile.done ]; then 95 | steps_pdnn/build_nnet_pfile_ivec.sh --cmd "$train_cmd" --every-nth-frame 1 --do-concat false \ 96 | --norm-vars $norm_vars --splice-opts "$splice_opts" --add-deltas $add_deltas \ 97 | --ivec-type speaker \ 98 | $working_dir/data/valid ${gmmdir}_ali_dev $train_ivec $working_dir || exit 1 99 | touch $working_dir/valid.pfile.done 100 | fi 101 | 102 | echo ===================================================================== 103 | echo " SAT-DNN Fine-tuning " 104 | echo ===================================================================== 105 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 106 | ivec_dim=`feat-to-dim scp:$train_ivec/ivector.scp ark,t:- | head -1 | awk '{print $2}'` || exit 1; 107 | feat_dim=$(gunzip -c $working_dir/train.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1; 108 | feat_dim=$[$feat_dim-$ivec_dim] 109 | 110 | # NOTE: the definition of "--si-nnet-spec" here has to be the same as "--nnet-spec" in run-dnn-fbank.sh 111 | if [ ! -f $working_dir/sat.fine.done ]; then 112 | echo "Fine-tuning DNN" 113 | $cmd $working_dir/log/sat.fine.log \ 114 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 115 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 116 | $pythonCMD pdnn/run_DNN_SAT.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \ 117 | --valid-data "$working_dir/valid.pfile.*.gz,partition=600m,random=true,stream=true" \ 118 | --si-nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 119 | --adapt-nnet-spec "$ivec_dim:512:512:512" --init-model $initdnn_dir/nnet.param \ 120 | --lrate "D:0.08:0.5:0.05,0.05:0" --param-output-file $working_dir/nnet.param \ 121 | --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1; 122 | touch $working_dir/sat.fine.done 123 | fi 124 | 125 | # Remove the last line " *** ***" of dnn.nnet.adapt, because the output layer of the adaptation network 126 | # uses the linear activation function 127 | ( cd $working_dir; head -n -1 dnn.nnet.adapt > dnn.nnet.adapt.tmp; mv dnn.nnet.adapt.tmp dnn.nnet.adapt; ) 128 | 129 | echo ===================================================================== 130 | echo " Decoding " 131 | echo ===================================================================== 132 | if [ ! -f $working_dir/decode.done ]; then 133 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 134 | graph_dir=$gmmdir/graph_sw1_tg 135 | steps_pdnn/sat/decode_dnn_ivec.sh --nj 24 --scoring-opts "--min-lmwt 8 --max-lmwt 16" --cmd "$decode_cmd" --ivec-type speaker \ 136 | $graph_dir $working_dir/data/eval2000 ${gmmdir}_ali_100k_nodup $decode_ivec $working_dir/decode_eval2000_sw1_tg || exit 1; 137 | touch $working_dir/decode.done 138 | 139 | echo "Finish !!" 140 | -------------------------------------------------------------------------------- /run_swbd_110h/sat/run-ivec-extract.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Apache 2.0 4 | # This is the script that trains an i-vector extractor on the entire set of 5 | # swbd1. The i-vectors will be used in the run-dnn-fbank-sat.sh and run-dnn 6 | # -sat.sh recipes. 7 | 8 | # For more informaiton regarding the recipes and results, visit our webiste 9 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 10 | 11 | cmd=run.pl 12 | . cmd.sh 13 | [ -f path.sh ] && . ./path.sh 14 | . parse_options.sh || exit 1; 15 | 16 | # Link the scripts from the sre recipe to here 17 | if [ ! -d sid ]; then 18 | ln -s ../../sre08/v1/sid ./ 19 | fi 20 | mkdir -p data_ivec mfcc_ivec exp_ivec 21 | 22 | echo --------------------------------------------------------------------- 23 | echo "Train the i-vector extractor with the entire SWBD set (310 hours)" 24 | echo --------------------------------------------------------------------- 25 | 26 | # MFCC config borrowed from egs/sre08/v1 27 | echo "--sample-frequency=8000" > conf/mfcc.conf.ivec 28 | echo "--frame-length=20" >> conf/mfcc.conf.ivec 29 | echo "--low-freq=20" >> conf/mfcc.conf.ivec 30 | echo "--high-freq=3700" >> conf/mfcc.conf.ivec 31 | echo "--num-ceps=20" >> conf/mfcc.conf.ivec 32 | # Config for VAD (voice activity detection) borrowed from egs/sre08/v1 33 | echo "--vad-energy-threshold=5.5" > conf/vad.conf 34 | echo "--vad-energy-mean-scale=0.5" >> conf/vad.conf 35 | 36 | if [ ! -d data_ivec/swbd1 ]; then 37 | echo "Save features for swbd1" 38 | mkdir -p data_ivec/swbd1; cat data/train/wav.scp | awk '{gsub("^sw0","",$1); print $1 " " $2;}' > data_ivec/swbd1/wav.scp 39 | ( cd data_ivec/swbd1; cat wav.scp | awk '{print $1 " " $1}' > utt2spk; cp utt2spk spk2utt; ) 40 | 41 | steps/make_mfcc.sh --mfcc-config conf/mfcc.conf.ivec --nj 24 --cmd "$train_cmd" \ 42 | data_ivec/swbd1 exp_ivec/make_mfcc mfcc_ivec || exit 1; 43 | sid/compute_vad_decision.sh --nj 24 --cmd "$train_cmd" \ 44 | data_ivec/swbd1 exp_ivec/make_mfcc mfcc_ivec || exit 1; 45 | utils/fix_data_dir.sh data_ivec/swbd1 || exit 1; 46 | fi 47 | 48 | if [ ! -d data_ivec/eval2000 ]; then 49 | echo "Save features for eval2000" 50 | mkdir -p data_ivec/eval2000; cp data/eval2000/wav.scp data_ivec/eval2000 51 | ( cd data_ivec/eval2000; cat wav.scp | awk '{print $1 " " $1}' > utt2spk; cp utt2spk spk2utt; ) 52 | steps/make_mfcc.sh --mfcc-config conf/mfcc.conf.ivec --nj 24 --cmd "$train_cmd" \ 53 | data_ivec/eval2000 exp_ivec/make_mfcc mfcc_ivec || exit 1; 54 | sid/compute_vad_decision.sh --nj 24 --cmd "$train_cmd" \ 55 | data_ivec/eval2000 exp_ivec/make_mfcc mfcc_ivec || exit 1; 56 | utils/fix_data_dir.sh data_ivec/eval2000 || exit 1; 57 | fi 58 | 59 | # Train the diagonal and full UBMs 60 | if [ ! -f exp_ivec/diag_ubm_2048/final.dubm ]; then 61 | echo "Train the diagonal UBM" 62 | sid/train_diag_ubm.sh --parallel-opts "" --nj 24 --cmd "$train_cmd" \ 63 | data_ivec/swbd1 2048 exp_ivec/diag_ubm_2048 || exit 1; 64 | fi 65 | if [ ! -f full_ubm_2048/final.ubm ]; then 66 | echo "Train the full UBM" 67 | sid/train_full_ubm.sh --nj 24 --cmd "$train_cmd" \ 68 | data_ivec/swbd1 exp_ivec/diag_ubm_2048 exp_ivec/full_ubm_2048 || exit 1; 69 | fi 70 | 71 | # Train the i-vector extractor 72 | if [ ! -f exp_ivec/extractor_2048/final.ie ]; then 73 | echo "Train the i-vector extractor" 74 | sid/train_ivector_extractor.sh --nj 24 --num-threads 1 --num-processes 1 \ 75 | --cmd "$train_cmd" --ivector-dim 100 --num-iters 10 \ 76 | exp_ivec/full_ubm_2048/final.ubm data_ivec/swbd1 exp_ivec/extractor_2048 77 | fi 78 | 79 | # Generate i-vectors on the training and testing (decoding) sets 80 | if [ ! -f exp_ivec/ivectors_swbd1/ivector.scp ]; then 81 | echo "Extract i-vectors for the swbd1 speakers" 82 | sid/extract_ivectors.sh --cmd "$train_cmd" --nj 24 \ 83 | exp_ivec/extractor_2048 data_ivec/swbd1 exp_ivec/ivectors_swbd1 84 | fi 85 | if [ ! -f exp_ivec/ivectors_eval2000/ivector.scp ]; then 86 | echo "Extract i-vectors for the eval2000 speakers" 87 | sid/extract_ivectors.sh --cmd "$train_cmd" --nj 24 \ 88 | exp_ivec/extractor_2048 data_ivec/eval2000 exp_ivec/ivectors_eval2000 89 | fi 90 | 91 | echo "Finish! Now you can safely delete data_ivec and mfcc_ivec." 92 | -------------------------------------------------------------------------------- /run_tedlium/RESULTS: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # results of tri3, the SAT system 4 | %WER 27.4 | 507 17792 | 77.9 16.6 5.5 5.4 27.4 96.1 | -0.176 | exp/tri3/decode_dev/score_18/ctm.filt.filt.sys 5 | %WER 24.7 | 1155 27512 | 79.6 16.3 4.1 4.3 24.7 93.2 | -0.053 | exp/tri3/decode_test/score_15/ctm.filt.filt.sys 6 | 7 | # -------------------------------------------------------------------------------------------------------------- 8 | # run-dnn.sh : DNN Models over fMLLR features 9 | # -------------------------------------------------------------------------------------------------------------- 10 | %WER 23.3 | 507 17792 | 81.9 13.9 4.2 5.2 23.3 94.1 | -0.117 | exp_pdnn/dnn/decode_dev/score_10/ctm.filt.filt.sys 11 | %WER 20.4 | 1155 27512 | 83.1 13.0 3.9 3.5 20.4 90.7 | 0.020 | exp_pdnn/dnn/decode_test/score_11/ctm.filt.filt.sys 12 | # when set "--momentum 0.9" during SdA pre-training 13 | %WER 23.4 | 507 17792 | 81.7 13.9 4.4 5.1 23.4 93.9 | -0.094 | exp_pdnn/dnn/decode_dev/score_10/ctm.filt.filt.sys 14 | %WER 20.2 | 1155 27512 | 83.2 12.9 3.9 3.4 20.2 91.1 | 0.003 | exp_pdnn/dnn/decode_test/score_11/ctm.filt.filt.sys 15 | 16 | # -------------------------------------------------------------------------------------------------------------- 17 | # run-dnn-maxout.sh : Deep Maxout Networks over fMLLR features 18 | # -------------------------------------------------------------------------------------------------------------- 19 | %WER 22.9 | 507 17792 | 82.2 13.4 4.4 5.2 22.9 94.1 | -0.148 | exp_pdnn/dnn_maxout/decode_dev/score_11/ctm.filt.filt.sys 20 | %WER 19.7 | 1155 27512 | 83.6 12.6 3.8 3.3 19.7 90.1 | -0.032 | exp_pdnn/dnn_maxout/decode_test/score_12/ctm.filt.filt.sys 21 | 22 | # -------------------------------------------------------------------------------------------------------------- 23 | # run-bnf-tandem.sh : Tandem Systems with BNFs Trained on fMLLRs 24 | # -------------------------------------------------------------------------------------------------------------- 25 | # LDA+MLLT 26 | %WER 23.4 | 507 17792 | 81.9 13.6 4.5 5.3 23.4 93.7 | -0.185 | exp_pdnn/bnf_tandem/tri4/decode_dev/score_29/ctm.filt.filt.sys 27 | %WER 20.4 | 1155 27512 | 83.5 13.1 3.4 4.0 20.4 90.1 | -0.075 | exp_pdnn/bnf_tandem/tri4/decode_test/score_30/ctm.filt.filt.sys 28 | # MMI 29 | %WER 22.7 | 507 17792 | 82.2 13.2 4.6 4.9 22.7 93.7 | -0.157 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_dev_it1/score_29/ctm.filt.filt.sys 30 | %WER 22.4 | 507 17792 | 82.4 13.2 4.3 4.8 22.4 93.7 | -0.164 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_dev_it2/score_26/ctm.filt.filt.sys 31 | %WER 22.0 | 507 17792 | 82.5 13.0 4.5 4.6 22.0 93.7 | -0.150 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_dev_it3/score_26/ctm.filt.filt.sys 32 | %WER 22.1 | 507 17792 | 82.4 12.9 4.7 4.5 22.1 94.3 | -0.146 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_dev_it4/score_26/ctm.filt.filt.sys 33 | %WER 19.8 | 1155 27512 | 83.9 12.6 3.4 3.8 19.8 89.6 | -0.050 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_test_it1/score_28/ctm.filt.filt.sys 34 | %WER 19.5 | 1155 27512 | 84.0 12.3 3.7 3.5 19.5 89.4 | -0.029 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_test_it2/score_28/ctm.filt.filt.sys 35 | %WER 19.3 | 1155 27512 | 84.2 12.2 3.6 3.5 19.3 88.9 | -0.018 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_test_it3/score_26/ctm.filt.filt.sys 36 | %WER 19.4 | 1155 27512 | 84.0 12.2 3.9 3.4 19.4 89.7 | 0.004 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_test_it4/score_26/ctm.filt.filt.sys 37 | # SGMM 38 | %WER 23.0 | 507 17792 | 81.5 13.4 5.1 4.5 23.0 94.1 | -0.183 | exp_pdnn/bnf_tandem/sgmm5a/decode_dev/score_20/ctm.filt.filt.sys 39 | %WER 20.6 | 1155 27512 | 82.7 13.1 4.2 3.3 20.6 90.7 | -0.070 | exp_pdnn/bnf_tandem/sgmm5a/decode_test/score_20/ctm.filt.filt.sys 40 | 41 | # -------------------------------------------------------------------------------------------------------------- 42 | # run-dnn-fbank.sh : DNN Models over filterbank features 43 | # -------------------------------------------------------------------------------------------------------------- 44 | %WER 24.5 | 507 17792 | 80.8 14.8 4.4 5.4 24.5 96.1 | -0.007 | exp_pdnn/dnn_fbank/decode_dev/score_10/ctm.filt.filt.sys 45 | %WER 21.4 | 1155 27512 | 82.6 13.8 3.7 3.9 21.4 91.7 | 0.068 | exp_pdnn/dnn_fbank/decode_test/score_10/ctm.filt.filt.sys 46 | 47 | # -------------------------------------------------------------------------------------------------------------- 48 | # run-cnn.sh : CNN Models over filterbank features 49 | # -------------------------------------------------------------------------------------------------------------- 50 | %WER 22.7 | 507 17792 | 82.7 13.6 3.7 5.4 22.7 93.7 | -0.174 | exp_pdnn/cnn/decode_dev/score_10/ctm.filt.filt.sys 51 | %WER 19.7 | 1155 27512 | 83.9 12.6 3.4 3.7 19.7 90.0 | -0.014 | exp_pdnn/cnn/decode_test/score_11/ctm.filt.filt.sys 52 | -------------------------------------------------------------------------------- /run_tedlium/run-dnn-maxout.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This script trains Maxout Network models over fMLLR features. It is to be 5 | # run after run.sh. Before running this, you should already build the initial 6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train 7 | # the DNN. 8 | 9 | # We implement the activation function, based on Kaldi "revision 4960". 10 | # Please follow the following steps: 11 | # 1. Go to /path/to/kaldi/src/nnet and *backup* nnet-component.h, nnet-component.cc, nnet-activation.h 12 | # 2. Download these 3 files from here: 13 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/nnet-component.h 14 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/nnet-component.cc 15 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/nnet-activation.h 16 | # 3. Recompile Kaldi 17 | 18 | # For more informaiton regarding the recipes and results, visit the webiste 19 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 20 | 21 | working_dir=exp_pdnn/dnn_maxout 22 | gmmdir=exp/tri3 23 | 24 | # Specify the gpu device to be used 25 | gpu=gpu 26 | 27 | cmd=run.pl 28 | . cmd.sh 29 | [ -f path.sh ] && . ./path.sh 30 | . parse_options.sh || exit 1; 31 | 32 | # At this point you may want to make sure the directory $working_dir is 33 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 34 | if [ ! -d pdnn ]; then 35 | echo "Checking out PDNN code." 36 | svn co https://github.com/yajiemiao/pdnn/trunk pdnn 37 | fi 38 | 39 | if [ ! -d steps_pdnn ]; then 40 | echo "Checking out steps_pdnn scripts." 41 | svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn 42 | fi 43 | 44 | if ! nvidia-smi; then 45 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 46 | echo "(Note: this script might still work, it would just be slower.)" 47 | fi 48 | 49 | # The hope here is that Theano has been installed either to python or to python2.6 50 | pythonCMD=python 51 | if ! python -c 'import theano;'; then 52 | if ! python2.6 -c 'import theano;'; then 53 | echo "Theano does not seem to be installed on your machine. Not continuing." 54 | echo "(Note: this script might still work, it would just be slower.)" 55 | exit 1; 56 | else 57 | pythonCMD=python2.6 58 | fi 59 | fi 60 | 61 | mkdir -p $working_dir/log 62 | 63 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 64 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 65 | 66 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 67 | 68 | echo ===================================================================== 69 | echo " Data Split & Alignment & Feature Preparation " 70 | echo ===================================================================== 71 | # Split training data into traing and cross-validation sets for DNN 72 | if [ ! -d data/train_tr95 ]; then 73 | utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1 74 | fi 75 | # Alignment on the training and validation data. 76 | for set in tr95 cv05; do 77 | if [ ! -d ${gmmdir}_ali_$set ]; then 78 | steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \ 79 | data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1 80 | fi 81 | done 82 | # Dump fMLLR features. "Fake" cmvn states (0 means and 1 variance) are applied. 83 | for set in tr95 cv05; do 84 | if [ ! -d $working_dir/data/train_$set ]; then 85 | steps/nnet/make_fmllr_feats.sh --nj 24 --cmd "$train_cmd" \ 86 | --transform-dir ${gmmdir}_ali_$set \ 87 | $working_dir/data/train_$set data/train_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 88 | steps/compute_cmvn_stats.sh --fake \ 89 | $working_dir/data/train_$set $working_dir/_log $working_dir/_fmllr || exit 1; 90 | fi 91 | done 92 | for set in dev test; do 93 | if [ ! -d $working_dir/data/$set ]; then 94 | steps/nnet/make_fmllr_feats.sh --nj 8 --cmd "$train_cmd" \ 95 | --transform-dir $gmmdir/decode_$set \ 96 | $working_dir/data/$set data/$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 97 | steps/compute_cmvn_stats.sh --fake \ 98 | $working_dir/data/$set $working_dir/_log $working_dir/_fmllr || exit 1; 99 | fi 100 | done 101 | 102 | echo ===================================================================== 103 | echo " Training and Cross-Validation Pfiles " 104 | echo ===================================================================== 105 | # By default, DNN inputs include 11 frames of fMLLR 106 | for set in tr95 cv05; do 107 | if [ ! -f $working_dir/${set}.pfile.done ]; then 108 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \ 109 | --norm-vars false --splice-opts "--left-context=5 --right-context=5" \ 110 | $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1 111 | touch $working_dir/${set}.pfile.done 112 | fi 113 | done 114 | 115 | echo ===================================================================== 116 | echo " DNN Pre-training & Fine-tuning " 117 | echo ===================================================================== 118 | feat_dim=$(gunzip -c $working_dir/train_tr95.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1; 119 | 120 | if [ ! -f $working_dir/dnn.fine.done ]; then 121 | echo "Fine-tuning DNN" 122 | $cmd $working_dir/log/dnn.fine.log \ 123 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 124 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 125 | $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=true" \ 126 | --valid-data "$working_dir/train_cv05.pfile.*.gz,partition=600m,random=true,stream=true" \ 127 | --nnet-spec "$feat_dim:650:650:650:650:650:650:$num_pdfs" \ 128 | --activation "maxout:3" \ 129 | --lrate "D:0.008:0.5:0.2,0.2:8" \ 130 | --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1; 131 | touch $working_dir/dnn.fine.done 132 | fi 133 | 134 | echo ===================================================================== 135 | echo " Decoding " 136 | echo ===================================================================== 137 | if [ ! -f $working_dir/decode.done ]; then 138 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 139 | graph_dir=$gmmdir/graph 140 | steps_pdnn/decode_dnn.sh --nj 8 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 141 | $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1; 142 | steps_pdnn/decode_dnn.sh --nj 11 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 143 | $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1; 144 | touch $working_dir/decode.done 145 | fi 146 | # Decoding with our own pruned trigram LM. 147 | if [ ! -f $working_dir/decode.bd.done ] && [ -d $gmmdir/graph_bd_tgpr ]; then 148 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 149 | graph_dir=$gmmdir/graph_bd_tgpr 150 | steps_pdnn/decode_dnn.sh --nj 8 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 151 | $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev_bd_tgpr || exit 1; 152 | steps_pdnn/decode_dnn.sh --nj 11 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 153 | $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test_bd_tgpr || exit 1; 154 | touch $working_dir/decode.bd.done 155 | fi 156 | 157 | echo "Finish !!" 158 | -------------------------------------------------------------------------------- /run_tedlium/run-dnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This is the script that trains DNN model over fMLLR features. It is to be 5 | # run after run.sh. Before running this, you should already build the initial 6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train 7 | # the DNN. 8 | 9 | # For more informaiton regarding the recipes and results, visit the webiste 10 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 11 | 12 | working_dir=exp_pdnn/dnn 13 | gmmdir=exp/tri3 14 | 15 | # Specify the gpu device to be used 16 | gpu=gpu 17 | 18 | cmd=run.pl 19 | . cmd.sh 20 | [ -f path.sh ] && . ./path.sh 21 | . parse_options.sh || exit 1; 22 | 23 | # At this point you may want to make sure the directory $working_dir is 24 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 25 | if [ ! -d pdnn ]; then 26 | echo "Checking out PDNN code." 27 | svn co https://github.com/yajiemiao/pdnn/trunk pdnn 28 | fi 29 | 30 | if [ ! -d steps_pdnn ]; then 31 | echo "Checking out steps_pdnn scripts." 32 | svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn 33 | fi 34 | 35 | if ! nvidia-smi; then 36 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 37 | echo "(Note: this script might still work, it would just be slower.)" 38 | fi 39 | 40 | # The hope here is that Theano has been installed either to python or to python2.6 41 | pythonCMD=python 42 | if ! python -c 'import theano;'; then 43 | if ! python2.6 -c 'import theano;'; then 44 | echo "Theano does not seem to be installed on your machine. Not continuing." 45 | echo "(Note: this script might still work, it would just be slower.)" 46 | exit 1; 47 | else 48 | pythonCMD=python2.6 49 | fi 50 | fi 51 | 52 | mkdir -p $working_dir/log 53 | 54 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 55 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 56 | 57 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 58 | 59 | echo ===================================================================== 60 | echo " Data Split & Alignment & Feature Preparation " 61 | echo ===================================================================== 62 | # Split training data into traing and cross-validation sets for DNN 63 | if [ ! -d data/train_tr95 ]; then 64 | utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1 65 | fi 66 | # Alignment on the training and validation data. 67 | for set in tr95 cv05; do 68 | if [ ! -d ${gmmdir}_ali_$set ]; then 69 | steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \ 70 | data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1 71 | fi 72 | done 73 | # Dump fMLLR features. "Fake" cmvn states (0 means and 1 variance) are applied. 74 | for set in tr95 cv05; do 75 | if [ ! -d $working_dir/data/train_$set ]; then 76 | steps/nnet/make_fmllr_feats.sh --nj 24 --cmd "$train_cmd" \ 77 | --transform-dir ${gmmdir}_ali_$set \ 78 | $working_dir/data/train_$set data/train_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 79 | steps/compute_cmvn_stats.sh --fake \ 80 | $working_dir/data/train_$set $working_dir/_log $working_dir/_fmllr || exit 1; 81 | fi 82 | done 83 | for set in dev test; do 84 | if [ ! -d $working_dir/data/$set ]; then 85 | steps/nnet/make_fmllr_feats.sh --nj 8 --cmd "$train_cmd" \ 86 | --transform-dir $gmmdir/decode_$set \ 87 | $working_dir/data/$set data/$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 88 | steps/compute_cmvn_stats.sh --fake \ 89 | $working_dir/data/$set $working_dir/_log $working_dir/_fmllr || exit 1; 90 | fi 91 | done 92 | 93 | echo ===================================================================== 94 | echo " Training and Cross-Validation Pfiles " 95 | echo ===================================================================== 96 | # By default, DNN inputs include 11 frames of fMLLR 97 | for set in tr95 cv05; do 98 | if [ ! -f $working_dir/${set}.pfile.done ]; then 99 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \ 100 | --norm-vars false --splice-opts "--left-context=5 --right-context=5" \ 101 | $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1 102 | touch $working_dir/${set}.pfile.done 103 | fi 104 | done 105 | 106 | echo ===================================================================== 107 | echo " DNN Pre-training & Fine-tuning " 108 | echo ===================================================================== 109 | feat_dim=$(gunzip -c $working_dir/train_tr95.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1; 110 | 111 | if [ ! -f $working_dir/dnn.ptr.done ]; then 112 | echo "SDA Pre-training" 113 | $cmd $working_dir/log/dnn.ptr.log \ 114 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 115 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 116 | $pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=true" \ 117 | --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 118 | --1stlayer-reconstruct-activation "tanh" \ 119 | --wdir $working_dir --param-output-file $working_dir/dnn.ptr \ 120 | --ptr-layer-number 6 --epoch-number 5 || exit 1; 121 | touch $working_dir/dnn.ptr.done 122 | fi 123 | 124 | if [ ! -f $working_dir/dnn.fine.done ]; then 125 | echo "Fine-tuning DNN" 126 | $cmd $working_dir/log/dnn.fine.log \ 127 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 128 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 129 | $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=true" \ 130 | --valid-data "$working_dir/train_cv05.pfile.*.gz,partition=600m,random=true,stream=true" \ 131 | --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 132 | --ptr-file $working_dir/dnn.ptr --ptr-layer-number 6 \ 133 | --lrate "D:0.08:0.5:0.2,0.2:8" \ 134 | --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1; 135 | touch $working_dir/dnn.fine.done 136 | fi 137 | 138 | echo ===================================================================== 139 | echo " Decoding " 140 | echo ===================================================================== 141 | if [ ! -f $working_dir/decode.done ]; then 142 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 143 | graph_dir=$gmmdir/graph 144 | steps_pdnn/decode_dnn.sh --nj 8 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 145 | $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1; 146 | steps_pdnn/decode_dnn.sh --nj 11 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 147 | $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1; 148 | touch $working_dir/decode.done 149 | fi 150 | # Decoding with our own LM. This trigram LM is trained over TED talk transcripts and is pruned 151 | if [ ! -f $working_dir/decode.bd.done ] && [ -d $gmmdir/graph_bd_tgpr ]; then 152 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 153 | graph_dir=$gmmdir/graph_bd_tgpr 154 | steps_pdnn/decode_dnn.sh --nj 8 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \ 155 | $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev_bd_tgpr || exit 1; 156 | steps_pdnn/decode_dnn.sh --nj 11 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \ 157 | $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test_bd_tgpr || exit 1; 158 | touch $working_dir/decode.bd.done 159 | fi 160 | 161 | echo "Finish !!" 162 | -------------------------------------------------------------------------------- /run_tedlium/tmp/run-dnn-lhuc-bkup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This is the script that trains DNN system over the filterbank features. It 5 | # is to be run after run.sh. Before running this, you should already build 6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn" 7 | # toolkit to train the DNN. The input filterbank features are with mean and 8 | # variance normalization. 9 | 10 | # For more informaiton regarding the recipes and results, visit the webiste 11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 12 | 13 | working_dir=exp_pdnn/dnn_fbank 14 | gmmdir=exp/tri3 15 | 16 | # Specify the gpu device to be used 17 | gpu=gpu 18 | stage=1 19 | 20 | cmd=run.pl 21 | . cmd.sh 22 | [ -f path.sh ] && . ./path.sh 23 | . parse_options.sh || exit 1; 24 | 25 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 26 | 27 | echo ===================================================================== 28 | echo " Prepare Adaptation Data & Alignment " 29 | echo ===================================================================== 30 | 31 | firstpass=$working_dir/decode_dev_bd_tgpr 32 | dir=$working_dir/decode_dev_bd_tgpr_lhuc_V3 33 | mkdir -p $dir/log 34 | 35 | nj=8 36 | echo $nj > $dir/num_jobs 37 | 38 | if [ $stage -eq 1 ]; then 39 | 40 | $cmd JOB=1:$nj $dir/log/best_path.JOB.log \ 41 | lattice-scale --inv-acoustic-scale=10 "ark:gunzip -c $firstpass/lat.JOB.gz|" ark:- \| \ 42 | lattice-best-path ark:- ark,t:$dir/tra.JOB "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; 43 | rm -rf $dir/tra.* 44 | 45 | cp $gmmdir/final.mdl $dir 46 | for set in dev; do 47 | if [ ! -f $dir/${set}.pfile.done ]; then 48 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \ 49 | --norm-vars true --splice-opts "--left-context=5 --right-context=5" \ 50 | $working_dir/data/$set $dir $dir || exit 1 51 | touch $dir/${set}.pfile.done 52 | fi 53 | done 54 | 55 | fi 56 | 57 | echo ===================================================================== 58 | echo " DNN Pre-training & Fine-tuning " 59 | echo ===================================================================== 60 | #feat_dim=$(gunzip -c $dir/dev.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1; 61 | 62 | #if [ ! -f $decode_dir/dnn.fine.done ]; then 63 | # $train_cmd JOB=1:$nj $decode_dir/log/dnn.fine.JOB.log \ 64 | # export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn_lhuc/ \; \ 65 | # export THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 \; \ 66 | # python pdnn_lhuc/cmds/run_DNN.py --train-data "$decode_dir/data.pfile.JOB.gz,partition=2000m,random=true,stream=true" \ 67 | # --valid-data "$decode_dir/data.pfile.JOB.gz,partition=600m,random=true,stream=true" \ 68 | # --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 69 | # --ptr-file $working_dir/nnet.param --ptr-layer-number 7 \ 70 | # --lrate "C:0.8:3" \ 71 | # --wdir $decode_dir --kaldi-output-file $decode_dir/dnn.nnet.JOB || exit 1; 72 | # touch $working_dir/dnn.fine.done 73 | #fi 74 | 75 | echo ===================================================================== 76 | echo " Decoding " 77 | echo ===================================================================== 78 | if [ $stage -eq 2 ]; then 79 | graph_dir=$gmmdir/graph_bd_tgpr 80 | steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 8 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 81 | $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $dir || exit 1; 82 | fi 83 | 84 | echo "Finish !!" 85 | -------------------------------------------------------------------------------- /run_tedlium/tmp/run-dnn-lhuc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This is the script that trains DNN system over the filterbank features. It 5 | # is to be run after run.sh. Before running this, you should already build 6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn" 7 | # toolkit to train the DNN. The input filterbank features are with mean and 8 | # variance normalization. 9 | 10 | # For more informaiton regarding the recipes and results, visit the webiste 11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 12 | 13 | working_dir=exp_pdnn/dnn_fbank 14 | gmmdir=exp/tri3 15 | 16 | # Specify the gpu device to be used 17 | gpu=gpu 18 | stage=1 19 | inv_acoustic_scale=10 20 | 21 | cmd=run.pl 22 | . cmd.sh 23 | [ -f path.sh ] && . ./path.sh 24 | . parse_options.sh || exit 1; 25 | 26 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 27 | 28 | echo ===================================================================== 29 | echo " Prepare Adaptation Data & Alignment " 30 | echo ===================================================================== 31 | 32 | if [ $stage -eq 1 ]; then 33 | 34 | for set in dev test; do 35 | dir=$working_dir/decode_${set}_bd_tgpr_lhuc 36 | mkdir -p $dir/log 37 | 38 | case $set in 39 | dev) nj=8;; 40 | test) nj=11;; 41 | *) echo "$0: invalid set name $set" && exit 1; 42 | esac 43 | 44 | echo $nj > $dir/num_jobs 45 | 46 | $cmd JOB=1:$nj $dir/log/best_path.JOB.log \ 47 | lattice-scale --inv-acoustic-scale=$inv_acoustic_scale "ark:gunzip -c $working_dir/decode_${set}_bd_tgpr/lat.JOB.gz|" ark:- \| \ 48 | lattice-best-path ark:- ark,t:$dir/tra.JOB "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; 49 | 50 | rm -rf $dir/tra.* 51 | 52 | cp $gmmdir/final.mdl $dir 53 | if [ ! -f $dir/${set}.pfile.done ]; then 54 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \ 55 | --norm-vars false --splice-opts "--left-context=5 --right-context=5" \ 56 | $working_dir/data/$set $dir $dir || exit 1 57 | touch $dir/${set}.pfile.done 58 | fi 59 | done 60 | fi 61 | 62 | echo ===================================================================== 63 | echo " DNN Pre-training & Fine-tuning " 64 | echo ===================================================================== 65 | #feat_dim=$(gunzip -c $dir/dev.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1; 66 | 67 | #if [ ! -f $decode_dir/dnn.fine.done ]; then 68 | # $train_cmd JOB=1:$nj $decode_dir/log/dnn.fine.JOB.log \ 69 | # export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn_lhuc/ \; \ 70 | # export THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 \; \ 71 | # python pdnn_lhuc/cmds/run_DNN.py --train-data "$decode_dir/data.pfile.JOB.gz,partition=2000m,random=true,stream=true" \ 72 | # --valid-data "$decode_dir/data.pfile.JOB.gz,partition=600m,random=true,stream=true" \ 73 | # --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 74 | # --ptr-file $working_dir/nnet.param --ptr-layer-number 7 \ 75 | # --lrate "C:0.8:3" \ 76 | # --wdir $decode_dir --kaldi-output-file $decode_dir/dnn.nnet.JOB || exit 1; 77 | # touch $working_dir/dnn.fine.done 78 | #fi 79 | 80 | echo ===================================================================== 81 | echo " Decoding " 82 | echo ===================================================================== 83 | if [ $stage -eq 2 ]; then 84 | graph_dir=$gmmdir/graph_bd_tgpr 85 | steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 8 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \ 86 | $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev_bd_tgpr_lhuc || exit 1; 87 | steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 11 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \ 88 | $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test_bd_tgpr_lhuc || exit 1; 89 | fi 90 | 91 | echo "Finish !!" 92 | -------------------------------------------------------------------------------- /run_tedlium/tmp/run-dnn-sat-lhuc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This is the script that trains DNN system over the filterbank features. It 5 | # is to be run after run.sh. Before running this, you should already build 6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn" 7 | # toolkit to train the DNN. The input filterbank features are with mean and 8 | # variance normalization. 9 | 10 | # For more informaiton regarding the recipes and results, visit the webiste 11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 12 | 13 | working_dir=exp_pdnn/dnn_fbank_sat 14 | gmmdir=exp/tri3 15 | dnndir=exp_pdnn/dnn_fbank 16 | 17 | # Specify the gpu device to be used 18 | gpu=gpu 19 | stage=1 20 | 21 | cmd=run.pl 22 | . cmd.sh 23 | [ -f path.sh ] && . ./path.sh 24 | . parse_options.sh || exit 1; 25 | 26 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 27 | 28 | echo ===================================================================== 29 | echo " Prepare Adaptation Data & Alignment " 30 | echo ===================================================================== 31 | 32 | #ivec_dir="/data/ASR5/babel/ymiao/Install/kaldi-latest/egs/sre08/v1/exp_tedlium_V2/ivectors_devtest" 33 | ivec_dir="/data/ASR5/babel/ymiao/Install/kaldi-latest/egs/sre08/v1/exp_tedlium_bnf/ivectors_devtest" 34 | 35 | if [ $stage -eq 1 ]; then 36 | 37 | for set in dev test; do 38 | dir=$working_dir/decode_${set}_bd_tgpr_lhuc 39 | mkdir -p $dir/log 40 | 41 | case $set in 42 | dev) nj=8;; 43 | test) nj=11;; 44 | *) echo "$0: invalid set name $set" && exit 1; 45 | esac 46 | 47 | echo $nj > $dir/num_jobs 48 | 49 | steps_pdnn/sat/make_feat_with_ivec.sh --nj $nj --cmd "$train_cmd" --ivec-type speaker \ 50 | $working_dir/data_ivec/$set $working_dir/data/$set $working_dir $ivec_dir $working_dir/_log $working_dir/_ivec || exit 1; 51 | steps/compute_cmvn_stats.sh --fake \ 52 | $working_dir/data_ivec/$set $working_dir/_log $working_dir/_ivec || exit 1; 53 | 54 | # $cmd JOB=1:$nj $dir/log/best_path.JOB.log \ 55 | # lattice-scale --inv-acoustic-scale=10 "ark:gunzip -c $working_dir/decode_${set}_bd_tgpr/lat.JOB.gz|" ark:- \| \ 56 | # lattice-best-path ark:- ark,t:$dir/tra.JOB "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; 57 | 58 | # $cmd JOB=1:$nj $dir/log/best_path.JOB.log \ 59 | # lattice-scale --inv-acoustic-scale=10 "ark:gunzip -c exp_pdnn/dnn_fbank/decode_${set}_bd_tgpr/lat.JOB.gz|" ark:- \| \ 60 | # lattice-best-path ark:- ark,t:$dir/tra.JOB "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; 61 | 62 | rm -rf $dir/tra.* 63 | 64 | cp $gmmdir/final.mdl $dir 65 | if [ ! -f $dir/${set}.pfile.done ]; then 66 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \ 67 | --norm-vars false --splice-opts "--left-context=0 --right-context=0" \ 68 | $working_dir/data_ivec/$set $dir $dir || exit 1 69 | touch $dir/${set}.pfile.done 70 | fi 71 | done 72 | fi 73 | 74 | echo ===================================================================== 75 | echo " DNN Pre-training & Fine-tuning " 76 | echo ===================================================================== 77 | #feat_dim=$(gunzip -c $dir/dev.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1; 78 | 79 | #if [ ! -f $decode_dir/dnn.fine.done ]; then 80 | # $train_cmd JOB=1:$nj $decode_dir/log/dnn.fine.JOB.log \ 81 | # export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn_lhuc/ \; \ 82 | # export THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 \; \ 83 | # python pdnn_lhuc/cmds/run_DNN.py --train-data "$decode_dir/data.pfile.JOB.gz,partition=2000m,random=true,stream=true" \ 84 | # --valid-data "$decode_dir/data.pfile.JOB.gz,partition=600m,random=true,stream=true" \ 85 | # --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 86 | # --ptr-file $working_dir/nnet.param --ptr-layer-number 7 \ 87 | # --lrate "C:0.8:3" \ 88 | # --wdir $decode_dir --kaldi-output-file $decode_dir/dnn.nnet.JOB || exit 1; 89 | # touch $working_dir/dnn.fine.done 90 | #fi 91 | 92 | echo ===================================================================== 93 | echo " Decoding " 94 | echo ===================================================================== 95 | 96 | if [ $stage -eq 2 ]; then 97 | graph_dir=$gmmdir/graph_bd_tgpr 98 | steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 8 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \ 99 | --norm-vars false --splice-opts "--left-context=0 --right-context=0" \ 100 | $graph_dir $working_dir/data_ivec/dev ${dnndir}_ali_tr95 $working_dir/decode_dev_bd_tgpr_lhuc || exit 1; 101 | steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 11 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \ 102 | --norm-vars false --splice-opts "--left-context=0 --right-context=0" \ 103 | $graph_dir $working_dir/data_ivec/test ${dnndir}_ali_tr95 $working_dir/decode_test_bd_tgpr_lhuc || exit 1; 104 | 105 | # steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 8 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \ 106 | # --norm-vars false --splice-opts "--left-context=0 --right-context=0" \ 107 | # $graph_dir $working_dir/data_ivec/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev_bd_tgpr_lhuc || exit 1; 108 | # steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 11 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \ 109 | # --norm-vars false --splice-opts "--left-context=0 --right-context=0" \ 110 | # $graph_dir $working_dir/data_ivec/test ${gmmdir}_ali_tr95 $working_dir/decode_test_bd_tgpr_lhuc || exit 1; 111 | 112 | fi 113 | 114 | echo "Finish !!" 115 | -------------------------------------------------------------------------------- /run_timit/RESULTS: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # -------------------------------------------------------------------------------------------------------------- 4 | # run-dnn.sh : DNN Models over fMLLR features 5 | # -------------------------------------------------------------------------------------------------------------- 6 | %WER 18.8 | 400 15057 | 83.4 11.4 5.1 2.3 18.8 99.8 | -0.592 | exp_pdnn/dnn/decode_dev/score_5/ctm_39phn.filt.sys 7 | %WER 20.2 | 192 7215 | 82.9 11.9 5.2 3.1 20.2 100.0 | -0.917 | exp_pdnn/dnn/decode_test/score_4/ctm_39phn.filt.sys 8 | # with dropout applied 9 | %WER 18.0 | 400 15057 | 84.3 11.2 4.5 2.3 18.0 99.3 | -0.706 | exp_pdnn/dnn_dropout/decode_dev/score_5/ctm_39phn.filt.sys 10 | %WER 19.6 | 192 7215 | 83.3 11.9 4.8 3.0 19.6 99.5 | -1.070 | exp_pdnn/dnn_dropout/decode_test/score_4/ctm_39phn.filt.sys 11 | 12 | # -------------------------------------------------------------------------------------------------------------- 13 | # run-dnn-maxout.sh : Deep Maxout Networks over fMLLR features 14 | # -------------------------------------------------------------------------------------------------------------- 15 | %WER 17.5 | 400 15057 | 85.0 10.8 4.2 2.4 17.5 99.8 | -0.859 | exp_pdnn/dnn_maxout/decode_dev/score_5/ctm_39phn.filt.sys 16 | %WER 19.0 | 192 7215 | 83.4 11.9 4.7 2.4 19.0 99.5 | -0.894 | exp_pdnn/dnn_maxout/decode_test/score_5/ctm_39phn.filt.sys 17 | # with dropout applied 18 | %WER 16.7 | 400 15057 | 85.1 10.3 4.5 1.9 16.7 99.3 | -0.515 | exp_pdnn/dnn_maxout+dropout/decode_dev/score_7/ctm_39phn.filt.sys 19 | %WER 18.0 | 192 7215 | 84.5 11.1 4.3 2.5 18.0 99.0 | -1.070 | exp_pdnn/dnn_maxout+dropout/decode_test/score_5/ctm_39phn.filt.sys 20 | # by setting momentum to 0.9, you can improve dnn by ~1.0% and maxout by ~0.5% 21 | 22 | # -------------------------------------------------------------------------------------------------------------- 23 | # run-bnf-tandem.sh : Tandem Systems with BNFs Trained on fMLLRs 24 | # -------------------------------------------------------------------------------------------------------------- 25 | # LDA+MLLT 26 | %WER 16.8 | 400 15057 | 85.5 11.2 3.3 2.4 16.8 99.3 | -1.195 | exp_pdnn/bnf_tandem/tri4/decode_dev/score_11/ctm_39phn.filt.sys 27 | %WER 18.5 | 192 7215 | 84.1 12.1 3.9 2.6 18.5 99.5 | -1.223 | exp_pdnn/bnf_tandem/tri4/decode_test/score_12/ctm_39phn.filt.sys 28 | # MMI 29 | %WER 16.7 | 400 15057 | 85.8 11.0 3.2 2.5 16.7 99.0 | -1.188 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_dev_it1/score_11/ctm_39phn.filt.sys 30 | %WER 18.4 | 192 7215 | 84.3 12.0 3.7 2.8 18.4 99.5 | -1.191 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_test_it1/score_12/ctm_39phn.filt.sys 31 | # SGMM 32 | %WER 16.3 | 400 15057 | 85.7 10.8 3.5 2.0 16.3 99.0 | -1.114 | exp_pdnn/bnf_tandem/sgmm5a/decode_dev/score_9/ctm_39phn.filt.sys 33 | %WER 17.8 | 192 7215 | 84.2 11.4 4.4 2.0 17.8 100.0 | -0.978 | exp_pdnn/bnf_tandem/sgmm5a/decode_test/score_12/ctm_39phn.filt.sys 34 | # MMI-SGMM 35 | %WER 16.5 | 400 15057 | 86.0 10.9 3.2 2.5 16.5 98.8 | -1.119 | exp_pdnn/bnf_tandem/sgmm5a_mmi_b0.1/decode_dev_it1/score_10/ctm_39phn.filt.sys 36 | %WER 17.9 | 192 7215 | 85.1 11.6 3.3 3.0 17.9 99.0 | -1.303 | exp_pdnn/bnf_tandem/sgmm5a_mmi_b0.1/decode_test_it1/score_8/ctm_39phn.filt.sys 37 | 38 | # -------------------------------------------------------------------------------------------------------------- 39 | # run-dnn-fbank.sh : DNN Models over filterbank features 40 | # -------------------------------------------------------------------------------------------------------------- 41 | %WER 22.5 | 400 15057 | 80.1 14.3 5.6 2.6 22.5 99.8 | -0.340 | exp_pdnn/dnn_fbank/decode_dev/score_5/ctm_39phn.filt.sys 42 | %WER 24.0 | 192 7215 | 78.7 15.1 6.1 2.8 24.0 100.0 | -0.244 | exp_pdnn/dnn_fbank/decode_test/score_5/ctm_39phn.filt.sys 43 | # if momentum increased to 0.9 for faster convergence 44 | %WER 20.2 | 400 15057 | 82.4 12.9 4.7 2.6 20.2 99.5 | -0.424 | exp_pdnn/dnn_fbank/decode_dev/score_5/ctm_39phn.filt.sys 45 | %WER 21.6 | 192 7215 | 81.2 13.7 5.1 2.8 21.6 100.0 | -0.397 | exp_pdnn/dnn_fbank/decode_test/score_5/ctm_39phn.filt.sys 46 | 47 | # -------------------------------------------------------------------------------------------------------------- 48 | # run-cnn.sh : CNN Models over filterbank features 49 | # -------------------------------------------------------------------------------------------------------------- 50 | %WER 19.0 | 400 15057 | 83.2 12.0 4.8 2.2 19.0 99.3 | -0.392 | exp_pdnn/cnn/decode_dev/score_6/ctm_39phn.filt.sys 51 | %WER 19.7 | 192 7215 | 82.6 12.5 4.9 2.3 19.7 99.0 | -0.530 | exp_pdnn/cnn/decode_test/score_5/ctm_39phn.filt.sys 52 | -------------------------------------------------------------------------------- /run_timit/kaldi_io/run-dnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This script trains DNN model by reading the Kaldi feature (.scp) and align- 5 | # ment (.ali) files directly. It is to be run after run.sh. Before running 6 | # this, you should already build the initial GMM model. This script requires 7 | # a GPU, and also the "pdnn" toolkit to train the DNN. 8 | 9 | # For more informaiton regarding the recipes and results, visit the webiste 10 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 11 | 12 | working_dir=exp_pdnn/dnn_kaldi_io 13 | gmmdir=exp/tri3 14 | 15 | # Specify the gpu device to be used 16 | gpu=gpu 17 | 18 | cmd=run.pl 19 | . cmd.sh 20 | [ -f path.sh ] && . ./path.sh 21 | . parse_options.sh || exit 1; 22 | 23 | # At this point you may want to make sure the directory $working_dir is 24 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 25 | if [ ! -d pdnn ]; then 26 | echo "Checking out PDNN code." 27 | svn co https://github.com/yajiemiao/pdnn/trunk pdnn 28 | fi 29 | 30 | if [ ! -d steps_pdnn ]; then 31 | echo "Checking out steps_pdnn scripts." 32 | svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn 33 | fi 34 | 35 | if ! nvidia-smi; then 36 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 37 | echo "(Note: this script might still work, it would just be slower.)" 38 | fi 39 | 40 | # The hope here is that Theano has been installed either to python or to python2.6 41 | pythonCMD=python 42 | if ! python -c 'import theano;'; then 43 | if ! python2.6 -c 'import theano;'; then 44 | echo "Theano does not seem to be installed on your machine. Not continuing." 45 | echo "(Note: this script might still work, it would just be slower.)" 46 | exit 1; 47 | else 48 | pythonCMD=python2.6 49 | fi 50 | fi 51 | 52 | mkdir -p $working_dir/log 53 | 54 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 55 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 56 | 57 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 58 | 59 | echo ===================================================================== 60 | echo " Data Split & Alignment & Feature Preparation " 61 | echo ===================================================================== 62 | # Split training data into traing and cross-validation sets for DNN 63 | if [ ! -d data/train_tr95 ]; then 64 | utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1 65 | fi 66 | # Alignment on the training and validation data 67 | for set in tr95 cv05; do 68 | if [ ! -d ${gmmdir}_ali_$set ]; then 69 | steps/align_fmllr.sh --nj 16 --cmd "$train_cmd" \ 70 | data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1 71 | fi 72 | done 73 | 74 | # Dump fMLLR features. "Fake" cmvn states (0 means and 1 variance) are applied. 75 | for set in tr95 cv05; do 76 | if [ ! -d $working_dir/data/train_$set ]; then 77 | steps/nnet/make_fmllr_feats.sh --nj 16 --cmd "$train_cmd" \ 78 | --transform-dir ${gmmdir}_ali_$set \ 79 | $working_dir/data/train_$set data/train_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 80 | steps/compute_cmvn_stats.sh --fake \ 81 | $working_dir/data/train_$set $working_dir/_log $working_dir/_fmllr || exit 1; 82 | fi 83 | done 84 | for set in dev test; do 85 | if [ ! -d $working_dir/data/$set ]; then 86 | steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ 87 | --transform-dir $gmmdir/decode_$set \ 88 | $working_dir/data/$set data/$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 89 | steps/compute_cmvn_stats.sh --fake \ 90 | $working_dir/data/$set $working_dir/_log $working_dir/_fmllr || exit 1; 91 | fi 92 | done 93 | 94 | echo ===================================================================== 95 | echo " Training and Cross-Validation Pfiles " 96 | echo ===================================================================== 97 | # By default, DNN inputs include 11 frames of fMLLR 98 | for set in tr95 cv05; do 99 | if [ ! -f $working_dir/${set}.netdata.done ]; then 100 | steps_pdnn/make_nnet_data.sh --nj 10 --cmd "$train_cmd" --norm-vars false \ 101 | --splice-opts "--left-context=5 --right-context=5" \ 102 | $working_dir/data_nnet/train_$set $working_dir/data/train_$set \ 103 | $working_dir/_nnet_input ${gmmdir}_ali_$set $working_dir || exit 1 104 | touch $working_dir/${set}.netdata.done 105 | fi 106 | done 107 | # Shuffle the scp list 108 | cat $working_dir/data_nnet/train_tr95/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} > $working_dir/train_tr95.scp 109 | cat $working_dir/data_nnet/train_cv05/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} > $working_dir/train_cv05.scp 110 | 111 | echo ===================================================================== 112 | echo " DNN Pre-training & Fine-tuning " 113 | echo ===================================================================== 114 | 115 | if [ ! -f $working_dir/dnn.ptr.done ]; then 116 | echo "RBM Pre-training" 117 | $cmd $working_dir/log/dnn.ptr.log \ 118 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 119 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 120 | $pythonCMD pdnn/cmds/run_RBM.py --train-data "$working_dir/train_tr95.scp,partition=1000m,random=true,stream=false" \ 121 | --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" --wdir $working_dir \ 122 | --ptr-layer-number 4 --param-output-file $working_dir/dnn.ptr || exit 1; 123 | touch $working_dir/dnn.ptr.done 124 | fi 125 | 126 | if [ ! -f $working_dir/dnn.fine.done ]; then 127 | echo "Fine-tuning DNN" 128 | $cmd $working_dir/log/dnn.fine.log \ 129 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 130 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 131 | $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train_tr95.scp,label=train_tr95.ali.gz,partition=1000m,random=true,stream=false" \ 132 | --valid-data "$working_dir/train_cv05.scp,label=train_cv05.ali.gz,partition=200m,random=true,stream=false" \ 133 | --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" \ 134 | --ptr-file $working_dir/dnn.ptr --ptr-layer-number 4 \ 135 | --lrate "D:0.08:0.5:0.2,0.2:8" \ 136 | --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1; 137 | touch $working_dir/dnn.fine.done 138 | fi 139 | 140 | echo ===================================================================== 141 | echo " Decoding " 142 | echo ===================================================================== 143 | if [ ! -f $working_dir/decode.done ]; then 144 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 145 | graph_dir=$gmmdir/graph 146 | steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \ 147 | $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1; 148 | steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \ 149 | $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1; 150 | 151 | touch $working_dir/decode.done 152 | fi 153 | 154 | echo "Finish !!" 155 | -------------------------------------------------------------------------------- /run_timit/run-cnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This script trains CNN model over the filterbank features. It is to be run 5 | # after run.sh. Before running this, you should already build the initial GMM 6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train 7 | # CNN. The input filterbank features are with mean and variance normalization. 8 | 9 | # The input features and CNN architecture follow the IBM configuration: 10 | # Hagen Soltau, George Saon, and Tara N. Sainath. Joint Training of Convolu- 11 | # tional and non-Convolutional Neural Networks 12 | 13 | # For more informaiton regarding the recipes and results, visit the webiste 14 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 15 | 16 | working_dir=exp_pdnn/cnn 17 | gmmdir=exp/tri3 18 | 19 | # Specify the gpu device to be used 20 | gpu=gpu 21 | 22 | cmd=run.pl 23 | . cmd.sh 24 | [ -f path.sh ] && . ./path.sh 25 | . parse_options.sh || exit 1; 26 | 27 | # At this point you may want to make sure the directory $working_dir is 28 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 29 | if [ ! -d pdnn ]; then 30 | echo "Checking out PDNN code." 31 | svn co https://github.com/yajiemiao/pdnn/trunk pdnn 32 | fi 33 | 34 | if [ ! -d steps_pdnn ]; then 35 | echo "Checking out steps_pdnn scripts." 36 | svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn 37 | fi 38 | 39 | if ! nvidia-smi; then 40 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 41 | echo "(Note: this script might still work, it would just be slower.)" 42 | fi 43 | 44 | # The hope here is that Theano has been installed either to python or to python2.6 45 | pythonCMD=python 46 | if ! python -c 'import theano;'; then 47 | if ! python2.6 -c 'import theano;'; then 48 | echo "Theano does not seem to be installed on your machine. Not continuing." 49 | echo "(Note: this script might still work, it would just be slower.)" 50 | exit 1; 51 | else 52 | pythonCMD=python2.6 53 | fi 54 | fi 55 | 56 | mkdir -p $working_dir/log 57 | 58 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 59 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 60 | 61 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 62 | 63 | echo ===================================================================== 64 | echo " Data Split & Alignment & Feature Preparation " 65 | echo ===================================================================== 66 | # Split training data into traing and cross-validation sets for DNN 67 | if [ ! -d data/train_tr95 ]; then 68 | utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1 69 | fi 70 | # Alignment on the training and validation data 71 | for set in tr95 cv05; do 72 | if [ ! -d ${gmmdir}_ali_$set ]; then 73 | steps/align_fmllr.sh --nj 16 --cmd "$train_cmd" \ 74 | data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1 75 | fi 76 | done 77 | 78 | # Generate the fbank features: 40-dimensional fbanks on each frame 79 | echo "--num-mel-bins=40" > conf/fbank.conf 80 | mkdir -p $working_dir/data 81 | for set in train_tr95 train_cv05 dev test; do 82 | if [ ! -d $working_dir/data/$set ]; then 83 | cp -r data/$set $working_dir/data/$set 84 | ( cd $working_dir/data/$set; rm -rf {cmvn,feats}.scp split*; ) 85 | steps/make_fbank.sh --cmd "$train_cmd" --nj 16 $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1; 86 | steps/compute_cmvn_stats.sh $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1; 87 | fi 88 | done 89 | 90 | echo ===================================================================== 91 | echo " Training and Cross-Validation Pfiles " 92 | echo ===================================================================== 93 | # By default, CNN inputs include 11 frames of filterbanks, and with delta 94 | # and double-deltas. 95 | for set in tr95 cv05; do 96 | if [ ! -f $working_dir/${set}.pfile.done ]; then 97 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --norm-vars true --add-deltas true \ 98 | --splice-opts "--left-context=5 --right-context=5" \ 99 | $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1 100 | ( cd $working_dir; mv concat.pfile ${set}.pfile; gzip ${set}.pfile; ) 101 | touch $working_dir/${set}.pfile.done 102 | fi 103 | done 104 | # Rename pfiles to keep consistency 105 | ( cd $working_dir; 106 | ln -s tr95.pfile.gz train.pfile.gz; ln -s cv05.pfile.gz valid.pfile.gz 107 | ) 108 | 109 | echo ===================================================================== 110 | echo " CNN Fine-tuning " 111 | echo ===================================================================== 112 | # CNN is configed in the way that it has (approximately) the same number of trainable parameters as DNN 113 | # (e.g., the DNN in run-dnn-fbank.sh). Also, we adopt "--momentum 0.9" becuase CNN over filterbanks seems 114 | # to converge slowly. So we increase momentum to speed up convergence. 115 | if [ ! -f $working_dir/cnn.fine.done ]; then 116 | echo "Fine-tuning CNN" 117 | $cmd $working_dir/log/cnn.fine.log \ 118 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 119 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 120 | $pythonCMD pdnn/cmds/run_CNN.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \ 121 | --valid-data "$working_dir/valid.pfile.gz,partition=400m,random=true,stream=false" \ 122 | --conv-nnet-spec "3x11x40:147,9x9,p1x3:147,3x4,p1x1,f" \ 123 | --nnet-spec "1024:1024:1024:$num_pdfs" \ 124 | --lrate "D:0.08:0.5:0.2,0.2:4" --momentum 0.9 \ 125 | --wdir $working_dir --param-output-file $working_dir/nnet.param \ 126 | --cfg-output-file $working_dir/nnet.cfg --kaldi-output-file $working_dir/dnn.nnet || exit 1; 127 | touch $working_dir/cnn.fine.done 128 | fi 129 | 130 | echo ===================================================================== 131 | echo " Dump Convolution-Layer Activation " 132 | echo ===================================================================== 133 | mkdir -p $working_dir/data_conv 134 | for set in dev test; do 135 | if [ ! -d $working_dir/data_conv/$set ]; then 136 | steps_pdnn/make_conv_feat.sh --nj 12 --cmd "$decode_cmd" \ 137 | $working_dir/data_conv/$set $working_dir/data/$set $working_dir $working_dir/nnet.param \ 138 | $working_dir/nnet.cfg $working_dir/_log $working_dir/_conv || exit 1; 139 | # Generate *fake* CMVN states here. 140 | steps/compute_cmvn_stats.sh --fake \ 141 | $working_dir/data_conv/$set $working_dir/_log $working_dir/_conv || exit 1; 142 | fi 143 | done 144 | 145 | echo ===================================================================== 146 | echo " Decoding " 147 | echo ===================================================================== 148 | # In decoding, we take the convolution-layer activation as inputs and the 149 | # fully-connected layers as the DNN model. So we set --norm-vars, --add-deltas 150 | # and --splice-opts accordingly. 151 | if [ ! -f $working_dir/decode.done ]; then 152 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 153 | graph_dir=$gmmdir/graph 154 | steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \ 155 | --norm-vars false --add-deltas false --splice-opts "--left-context=0 --right-context=0" \ 156 | $graph_dir $working_dir/data_conv/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1; 157 | steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \ 158 | --norm-vars false --add-deltas false --splice-opts "--left-context=0 --right-context=0" \ 159 | $graph_dir $working_dir/data_conv/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1; 160 | touch $working_dir/decode.done 161 | fi 162 | 163 | echo "Finish !!" 164 | -------------------------------------------------------------------------------- /run_timit/run-dnn-fbank.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This is the script that trains DNN system over the filterbank features. It 5 | # is to be run after run.sh. Before running this, you should already build 6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn" 7 | # toolkit to train the DNN. The input filterbank features are with mean and 8 | # variance normalization. 9 | 10 | # For more informaiton regarding the recipes and results, visit the webiste 11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 12 | 13 | working_dir=exp_pdnn/dnn_fbank 14 | gmmdir=exp/tri3 15 | 16 | # Specify the gpu device to be used 17 | gpu=gpu 18 | 19 | cmd=run.pl 20 | . cmd.sh 21 | [ -f path.sh ] && . ./path.sh 22 | . parse_options.sh || exit 1; 23 | 24 | # At this point you may want to make sure the directory $working_dir is 25 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 26 | if [ ! -d pdnn ]; then 27 | echo "Checking out PDNN code." 28 | svn co https://github.com/yajiemiao/pdnn/trunk pdnn 29 | fi 30 | 31 | if [ ! -d steps_pdnn ]; then 32 | echo "Checking out steps_pdnn scripts." 33 | svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn 34 | fi 35 | 36 | if ! nvidia-smi; then 37 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 38 | echo "(Note: this script might still work, it would just be slower.)" 39 | fi 40 | 41 | # The hope here is that Theano has been installed either to python or to python2.6 42 | pythonCMD=python 43 | if ! python -c 'import theano;'; then 44 | if ! python2.6 -c 'import theano;'; then 45 | echo "Theano does not seem to be installed on your machine. Not continuing." 46 | echo "(Note: this script might still work, it would just be slower.)" 47 | exit 1; 48 | else 49 | pythonCMD=python2.6 50 | fi 51 | fi 52 | 53 | mkdir -p $working_dir/log 54 | 55 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 56 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 57 | 58 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 59 | 60 | echo ===================================================================== 61 | echo " Data Split & Alignment & Feature Preparation " 62 | echo ===================================================================== 63 | # Split training data into traing and cross-validation sets for DNN 64 | if [ ! -d data/train_tr95 ]; then 65 | utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1 66 | fi 67 | # Alignment on the training and validation data 68 | for set in tr95 cv05; do 69 | if [ ! -d ${gmmdir}_ali_$set ]; then 70 | steps/align_fmllr.sh --nj 16 --cmd "$train_cmd" \ 71 | data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1 72 | fi 73 | done 74 | 75 | # Generate the fbank features: 40-dimensional fbanks on each frame 76 | echo "--num-mel-bins=40" > conf/fbank.conf 77 | mkdir -p $working_dir/data 78 | for set in train_tr95 train_cv05 dev test; do 79 | if [ ! -d $working_dir/data/$set ]; then 80 | cp -r data/$set $working_dir/data/$set 81 | ( cd $working_dir/data/$set; rm -rf {cmvn,feats}.scp split*; ) 82 | steps/make_fbank.sh --cmd "$train_cmd" --nj 16 $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1; 83 | steps/compute_cmvn_stats.sh $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1; 84 | fi 85 | done 86 | 87 | echo ===================================================================== 88 | echo " Training and Cross-Validation Pfiles " 89 | echo ===================================================================== 90 | # By default, DNN inputs include 11 frames of filterbanks 91 | for set in tr95 cv05; do 92 | if [ ! -f $working_dir/${set}.pfile.done ]; then 93 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --norm-vars true \ 94 | --splice-opts "--left-context=5 --right-context=5" \ 95 | $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1 96 | ( cd $working_dir; mv concat.pfile ${set}.pfile; gzip ${set}.pfile; ) 97 | touch $working_dir/${set}.pfile.done 98 | fi 99 | done 100 | # Rename pfiles to keep consistency 101 | ( cd $working_dir; 102 | ln -s tr95.pfile.gz train.pfile.gz; ln -s cv05.pfile.gz valid.pfile.gz 103 | ) 104 | 105 | echo ===================================================================== 106 | echo " DNN Pre-training & Fine-tuning " 107 | echo ===================================================================== 108 | feat_dim=$(gunzip -c $working_dir/train.pfile.gz |head |grep num_features| awk '{print $2}') || exit 1; 109 | 110 | if [ ! -f $working_dir/dnn.ptr.done ]; then 111 | echo "RBM Pre-training" 112 | $cmd $working_dir/log/dnn.ptr.log \ 113 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 114 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 115 | $pythonCMD pdnn/cmds/run_RBM.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \ 116 | --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" --wdir $working_dir \ 117 | --ptr-layer-number 4 --param-output-file $working_dir/dnn.ptr || exit 1; 118 | touch $working_dir/dnn.ptr.done 119 | fi 120 | 121 | # For SDA pre-training 122 | #$pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \ 123 | # --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" \ 124 | # --1stlayer-reconstruct-activation "tanh" \ 125 | # --wdir $working_dir --param-output-file $working_dir/dnn.ptr \ 126 | # --ptr-layer-number 4 --epoch-number 5 || exit 1; 127 | 128 | # To apply dropout, add "--dropout-factor 0.2,0.2,0.2,0.2" and change the value of "--lrate" to "D:0.8:0.5:0.2,0.2:8" 129 | # Check run_timit/RESULTS for the results 130 | 131 | if [ ! -f $working_dir/dnn.fine.done ]; then 132 | echo "Fine-tuning DNN" 133 | $cmd $working_dir/log/dnn.fine.log \ 134 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 135 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 136 | $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \ 137 | --valid-data "$working_dir/valid.pfile.gz,partition=200m,random=true,stream=false" \ 138 | --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" \ 139 | --ptr-file $working_dir/dnn.ptr --ptr-layer-number 4 \ 140 | --lrate "D:0.08:0.5:0.2,0.2:8" --momentum 0.9 \ 141 | --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1; 142 | touch $working_dir/dnn.fine.done 143 | fi 144 | 145 | echo ===================================================================== 146 | echo " Decoding " 147 | echo ===================================================================== 148 | if [ ! -f $working_dir/decode.done ]; then 149 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 150 | graph_dir=$gmmdir/graph 151 | steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \ 152 | $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1; 153 | steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \ 154 | $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1; 155 | 156 | touch $working_dir/decode.done 157 | fi 158 | 159 | echo "Finish !!" 160 | -------------------------------------------------------------------------------- /run_timit/run-dnn-maxout.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This script trains Maxout Network models over fMLLR features. It is to be 5 | # run after run.sh. Before running this, you should already build the initial 6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train 7 | # the DNN. 8 | 9 | # We implement the activation function, based on Kaldi "revision 4960". 10 | # Please follow the following steps: 11 | # 1. Go to /path/to/kaldi/src/nnet and *backup* nnet-component.h, nnet-component.cc, nnet-activation.h 12 | # 2. Download these 3 files from here: 13 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/nnet-component.h 14 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/nnet-component.cc 15 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/nnet-activation.h 16 | # 3. Recompile Kaldi 17 | 18 | # For more informaiton regarding the recipes and results, visit the webiste 19 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 20 | 21 | working_dir=exp_pdnn/dnn_maxout 22 | gmmdir=exp/tri3 23 | 24 | # Specify the gpu device to be used 25 | gpu=gpu 26 | 27 | cmd=run.pl 28 | . cmd.sh 29 | [ -f path.sh ] && . ./path.sh 30 | . parse_options.sh || exit 1; 31 | 32 | # At this point you may want to make sure the directory $working_dir is 33 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 34 | if [ ! -d pdnn ]; then 35 | echo "Checking out PDNN code." 36 | svn co https://github.com/yajiemiao/pdnn/trunk pdnn 37 | fi 38 | 39 | if [ ! -d steps_pdnn ]; then 40 | echo "Checking out steps_pdnn scripts." 41 | svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn 42 | fi 43 | 44 | if ! nvidia-smi; then 45 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 46 | echo "(Note: this script might still work, it would just be slower.)" 47 | fi 48 | 49 | # The hope here is that Theano has been installed either to python or to python2.6 50 | pythonCMD=python 51 | if ! python -c 'import theano;'; then 52 | if ! python2.6 -c 'import theano;'; then 53 | echo "Theano does not seem to be installed on your machine. Not continuing." 54 | echo "(Note: this script might still work, it would just be slower.)" 55 | exit 1; 56 | else 57 | pythonCMD=python2.6 58 | fi 59 | fi 60 | 61 | mkdir -p $working_dir/log 62 | 63 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 64 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 65 | 66 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 67 | 68 | echo ===================================================================== 69 | echo " Data Split & Alignment & Feature Preparation " 70 | echo ===================================================================== 71 | # Split training data into traing and cross-validation sets for DNN 72 | if [ ! -d data/train_tr95 ]; then 73 | utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1 74 | fi 75 | # Alignment on the training and validation data 76 | for set in tr95 cv05; do 77 | if [ ! -d ${gmmdir}_ali_$set ]; then 78 | steps/align_fmllr.sh --nj 16 --cmd "$train_cmd" \ 79 | data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1 80 | fi 81 | done 82 | 83 | # Dump fMLLR features. "Fake" cmvn states (0 means and 1 variance) are applied. 84 | for set in tr95 cv05; do 85 | if [ ! -d $working_dir/data/train_$set ]; then 86 | steps/nnet/make_fmllr_feats.sh --nj 16 --cmd "$train_cmd" \ 87 | --transform-dir ${gmmdir}_ali_$set \ 88 | $working_dir/data/train_$set data/train_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 89 | steps/compute_cmvn_stats.sh --fake \ 90 | $working_dir/data/train_$set $working_dir/_log $working_dir/_fmllr || exit 1; 91 | fi 92 | done 93 | for set in dev test; do 94 | if [ ! -d $working_dir/data/$set ]; then 95 | steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ 96 | --transform-dir $gmmdir/decode_$set \ 97 | $working_dir/data/$set data/$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 98 | steps/compute_cmvn_stats.sh --fake \ 99 | $working_dir/data/$set $working_dir/_log $working_dir/_fmllr || exit 1; 100 | fi 101 | done 102 | 103 | echo ===================================================================== 104 | echo " Training and Cross-Validation Pfiles " 105 | echo ===================================================================== 106 | # By default, DNN inputs include 11 frames of fMLLR 107 | for set in tr95 cv05; do 108 | if [ ! -f $working_dir/${set}.pfile.done ]; then 109 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --norm-vars false \ 110 | --splice-opts "--left-context=5 --right-context=5" \ 111 | $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1 112 | ( cd $working_dir; mv concat.pfile ${set}.pfile; gzip ${set}.pfile; ) 113 | touch $working_dir/${set}.pfile.done 114 | fi 115 | done 116 | # Rename pfiles to keep consistency 117 | ( cd $working_dir; 118 | ln -s tr95.pfile.gz train.pfile.gz; ln -s cv05.pfile.gz valid.pfile.gz 119 | ) 120 | 121 | echo ===================================================================== 122 | echo " DNN Pre-training & Fine-tuning " 123 | echo ===================================================================== 124 | # Here we use maxout networks. When using maxout, we need to reduce the learning rate. To apply dropout, 125 | # add "--dropout-factor 0.2,0.2,0.2,0.2" and change the value of "--lrate" to "D:0.1:0.5:0.2,0.2:8" 126 | # Check run_timit/RESULTS for the results 127 | 128 | # The network structure is set in the way that this maxout network has approximately the same number of 129 | # parameters as the DNN model in run-dnn.sh 130 | 131 | feat_dim=$(gunzip -c $working_dir/train.pfile.gz |head |grep num_features| awk '{print $2}') || exit 1; 132 | 133 | if [ ! -f $working_dir/dnn.fine.done ]; then 134 | echo "Fine-tuning DNN" 135 | $cmd $working_dir/log/dnn.fine.log \ 136 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 137 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 138 | $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \ 139 | --valid-data "$working_dir/valid.pfile.gz,partition=200m,random=true,stream=false" \ 140 | --nnet-spec "$feat_dim:625:625:625:625:$num_pdfs" \ 141 | --activation "maxout:3" \ 142 | --lrate "D:0.008:0.5:0.2,0.2:8" \ 143 | --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1; 144 | touch $working_dir/dnn.fine.done 145 | fi 146 | 147 | echo ===================================================================== 148 | echo " Decoding " 149 | echo ===================================================================== 150 | if [ ! -f $working_dir/decode.done ]; then 151 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 152 | graph_dir=$gmmdir/graph 153 | steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \ 154 | $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1; 155 | steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \ 156 | $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1; 157 | touch $working_dir/decode.done 158 | fi 159 | 160 | echo "Finish !!" 161 | -------------------------------------------------------------------------------- /run_timit/run-dnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This is the script that trains DNN model over fMLLR features. It is to be 5 | # run after run.sh. Before running this, you should already build the initial 6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train 7 | # the DNN. 8 | 9 | # For more informaiton regarding the recipes and results, visit the webiste 10 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 11 | 12 | working_dir=exp_pdnn/dnn 13 | gmmdir=exp/tri3 14 | 15 | # Specify the gpu device to be used 16 | gpu=gpu 17 | 18 | cmd=run.pl 19 | . cmd.sh 20 | [ -f path.sh ] && . ./path.sh 21 | . parse_options.sh || exit 1; 22 | 23 | # At this point you may want to make sure the directory $working_dir is 24 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 25 | if [ ! -d pdnn ]; then 26 | echo "Checking out PDNN code." 27 | svn co https://github.com/yajiemiao/pdnn/trunk pdnn 28 | fi 29 | 30 | if [ ! -d steps_pdnn ]; then 31 | echo "Checking out steps_pdnn scripts." 32 | svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn 33 | fi 34 | 35 | if ! nvidia-smi; then 36 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 37 | echo "(Note: this script might still work, it would just be slower.)" 38 | fi 39 | 40 | # The hope here is that Theano has been installed either to python or to python2.6 41 | pythonCMD=python 42 | if ! python -c 'import theano;'; then 43 | if ! python2.6 -c 'import theano;'; then 44 | echo "Theano does not seem to be installed on your machine. Not continuing." 45 | echo "(Note: this script might still work, it would just be slower.)" 46 | exit 1; 47 | else 48 | pythonCMD=python2.6 49 | fi 50 | fi 51 | 52 | mkdir -p $working_dir/log 53 | 54 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 55 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 56 | 57 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 58 | 59 | echo ===================================================================== 60 | echo " Data Split & Alignment & Feature Preparation " 61 | echo ===================================================================== 62 | # Split training data into traing and cross-validation sets for DNN 63 | if [ ! -d data/train_tr95 ]; then 64 | utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1 65 | fi 66 | # Alignment on the training and validation data 67 | for set in tr95 cv05; do 68 | if [ ! -d ${gmmdir}_ali_$set ]; then 69 | steps/align_fmllr.sh --nj 16 --cmd "$train_cmd" \ 70 | data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1 71 | fi 72 | done 73 | 74 | # Dump fMLLR features. "Fake" cmvn states (0 means and 1 variance) are applied. 75 | for set in tr95 cv05; do 76 | if [ ! -d $working_dir/data/train_$set ]; then 77 | steps/nnet/make_fmllr_feats.sh --nj 16 --cmd "$train_cmd" \ 78 | --transform-dir ${gmmdir}_ali_$set \ 79 | $working_dir/data/train_$set data/train_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 80 | steps/compute_cmvn_stats.sh --fake \ 81 | $working_dir/data/train_$set $working_dir/_log $working_dir/_fmllr || exit 1; 82 | fi 83 | done 84 | for set in dev test; do 85 | if [ ! -d $working_dir/data/$set ]; then 86 | steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ 87 | --transform-dir $gmmdir/decode_$set \ 88 | $working_dir/data/$set data/$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 89 | steps/compute_cmvn_stats.sh --fake \ 90 | $working_dir/data/$set $working_dir/_log $working_dir/_fmllr || exit 1; 91 | fi 92 | done 93 | 94 | echo ===================================================================== 95 | echo " Training and Cross-Validation Pfiles " 96 | echo ===================================================================== 97 | # By default, DNN inputs include 11 frames of fMLLR 98 | for set in tr95 cv05; do 99 | if [ ! -f $working_dir/${set}.pfile.done ]; then 100 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --norm-vars false \ 101 | --splice-opts "--left-context=5 --right-context=5" \ 102 | $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1 103 | ( cd $working_dir; mv concat.pfile ${set}.pfile; gzip ${set}.pfile; ) 104 | touch $working_dir/${set}.pfile.done 105 | fi 106 | done 107 | # Rename pfiles to keep consistency 108 | ( cd $working_dir; 109 | ln -s tr95.pfile.gz train.pfile.gz; ln -s cv05.pfile.gz valid.pfile.gz 110 | ) 111 | 112 | echo ===================================================================== 113 | echo " DNN Pre-training & Fine-tuning " 114 | echo ===================================================================== 115 | feat_dim=$(gunzip -c $working_dir/train.pfile.gz |head |grep num_features| awk '{print $2}') || exit 1; 116 | 117 | if [ ! -f $working_dir/dnn.ptr.done ]; then 118 | echo "RBM Pre-training" 119 | $cmd $working_dir/log/dnn.ptr.log \ 120 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 121 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 122 | $pythonCMD pdnn/cmds/run_RBM.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \ 123 | --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" --wdir $working_dir \ 124 | --ptr-layer-number 4 --param-output-file $working_dir/dnn.ptr || exit 1; 125 | touch $working_dir/dnn.ptr.done 126 | fi 127 | 128 | # For SDA pre-training 129 | #$pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \ 130 | # --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" \ 131 | # --1stlayer-reconstruct-activation "tanh" \ 132 | # --wdir $working_dir --param-output-file $working_dir/dnn.ptr \ 133 | # --ptr-layer-number 4 --epoch-number 5 || exit 1; 134 | 135 | # To apply dropout, add "--dropout-factor 0.2,0.2,0.2,0.2" and change the value of "--lrate" to "D:0.8:0.5:0.2,0.2:8" 136 | # Check run_timit/RESULTS for the results 137 | 138 | if [ ! -f $working_dir/dnn.fine.done ]; then 139 | echo "Fine-tuning DNN" 140 | $cmd $working_dir/log/dnn.fine.log \ 141 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 142 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 143 | $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \ 144 | --valid-data "$working_dir/valid.pfile.gz,partition=200m,random=true,stream=false" \ 145 | --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" \ 146 | --ptr-file $working_dir/dnn.ptr --ptr-layer-number 4 \ 147 | --lrate "D:0.08:0.5:0.2,0.2:8" \ 148 | --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1; 149 | touch $working_dir/dnn.fine.done 150 | fi 151 | 152 | echo ===================================================================== 153 | echo " Decoding " 154 | echo ===================================================================== 155 | if [ ! -f $working_dir/decode.done ]; then 156 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 157 | graph_dir=$gmmdir/graph 158 | steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \ 159 | $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1; 160 | steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \ 161 | $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1; 162 | touch $working_dir/decode.done 163 | fi 164 | 165 | echo "Finish !!" 166 | -------------------------------------------------------------------------------- /run_wsj/RESULTS: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # results of tri4b, the SAT system 4 | %WER 9.39 [ 773 / 8234, 103 ins, 108 del, 562 sub ] exp/tri4b/decode_bd_tgpr_dev93/wer_16 5 | %WER 6.15 [ 347 / 5643, 66 ins, 25 del, 256 sub ] exp/tri4b/decode_bd_tgpr_eval92/wer_14 6 | 7 | # below are results of the pdnn recipes 8 | 9 | # run-dnn.sh 10 | %WER 7.18 [ 591 / 8234, 74 ins, 92 del, 425 sub ] exp_pdnn/dnn/decode_bd_tgpr_dev93/wer_11 11 | %WER 4.08 [ 230 / 5643, 29 ins, 21 del, 180 sub ] exp_pdnn/dnn/decode_bd_tgpr_eval92/wer_11 12 | 13 | # run-bnf-tandem.sh -- apply mean normalization over BNF; the default config 14 | %WER 7.66 [ 631 / 8234, 86 ins, 94 del, 451 sub ] exp_pdnn/bnf_tandem/tri5a/decode_bd_tgpr_dev93/wer_32 15 | %WER 4.61 [ 260 / 5643, 41 ins, 18 del, 201 sub ] exp_pdnn/bnf_tandem/tri5a/decode_bd_tgpr_eval92/wer_33 16 | %WER 6.72 [ 553 / 8234, 55 ins, 95 del, 403 sub ] exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_bd_tgpr_dev93_it4/wer_30 17 | %WER 3.81 [ 215 / 5643, 22 ins, 13 del, 180 sub ] exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_bd_tgpr_eval92_it4/wer_32 18 | %WER 7.10 [ 585 / 8234, 66 ins, 102 del, 417 sub ] exp_pdnn/bnf_tandem/sgmm5a/decode_bd_tgpr_dev93/wer_26 19 | %WER 4.27 [ 241 / 5643, 30 ins, 17 del, 194 sub ] exp_pdnn/bnf_tandem/sgmm5a/decode_bd_tgpr_eval92/wer_26 20 | %WER 6.64 [ 547 / 8234, 69 ins, 85 del, 393 sub ] exp_pdnn/bnf_tandem/sgmm5a_mmi_b0.1/decode_bd_tgpr_dev93_it4/wer_20 21 | %WER 3.69 [ 208 / 5643, 27 ins, 10 del, 171 sub ] exp_pdnn/bnf_tandem/sgmm5a_mmi_b0.1/decode_bd_tgpr_eval92_it4/wer_20 22 | 23 | # run-dnn-fbank.sh 24 | %WER 7.38 [ 608 / 8234, 80 ins, 90 del, 438 sub ] exp_pdnn/dnn_fbank/decode_bd_tgpr_dev93/wer_10 25 | %WER 4.27 [ 241 / 5643, 36 ins, 14 del, 191 sub ] exp_pdnn/dnn_fbank/decode_bd_tgpr_eval92/wer_9 26 | 27 | # run-cnn.sh 28 | 29 | 30 | -------------------------------------------------------------------------------- /run_wsj/run-dnn-fbank.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This is the script that trains DNN system over the filterbank features. It 5 | # is to be run after run.sh. Before running this, you should already build 6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn" 7 | # toolkit to train the DNN. The input filterbank features are with mean and 8 | # variance normalization. 9 | 10 | # For more informaiton regarding the recipes and results, visit the webiste 11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 12 | 13 | working_dir=exp_pdnn/dnn_fbank 14 | gmmdir=exp/tri4b 15 | 16 | # Specify the gpu device to be used 17 | gpu=gpu 18 | 19 | cmd=run.pl 20 | . cmd.sh 21 | [ -f path.sh ] && . ./path.sh 22 | . parse_options.sh || exit 1; 23 | 24 | # At this point you may want to make sure the directory $working_dir is 25 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 26 | if [ ! -d pdnn ]; then 27 | echo "Checking out PDNN code." 28 | svn co https://github.com/yajiemiao/pdnn/trunk pdnn 29 | fi 30 | 31 | if [ ! -d steps_pdnn ]; then 32 | echo "Checking out steps_pdnn scripts." 33 | svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn 34 | fi 35 | 36 | if ! nvidia-smi; then 37 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 38 | echo "(Note: this script might still work, it would just be slower.)" 39 | fi 40 | 41 | # The hope here is that Theano has been installed either to python or to python2.6 42 | pythonCMD=python 43 | if ! python -c 'import theano;'; then 44 | if ! python2.6 -c 'import theano;'; then 45 | echo "Theano does not seem to be installed on your machine. Not continuing." 46 | echo "(Note: this script might still work, it would just be slower.)" 47 | exit 1; 48 | else 49 | pythonCMD=python2.6 50 | fi 51 | fi 52 | 53 | mkdir -p $working_dir/log 54 | 55 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 56 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 57 | 58 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 59 | 60 | echo ===================================================================== 61 | echo " Data Split & Alignment & Feature Preparation " 62 | echo ===================================================================== 63 | # Split training data into traing and cross-validation sets for DNN 64 | if [ ! -d data/train_tr95 ]; then 65 | utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train_si284 data/train_tr95 data/train_cv05 || exit 1 66 | fi 67 | # Alignment on the training and validation data 68 | for set in tr95 cv05; do 69 | if [ ! -d ${gmmdir}_ali_$set ]; then 70 | steps/align_fmllr.sh --nj 14 --cmd "$train_cmd" \ 71 | data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1 72 | fi 73 | done 74 | 75 | # Generate the fbank features: 40-dimensional fbanks on each frame 76 | echo "--num-mel-bins=40" > conf/fbank.conf 77 | mkdir -p $working_dir/data 78 | for set in train_tr95 train_cv05; do 79 | if [ ! -d $working_dir/data/$set ]; then 80 | cp -r data/$set $working_dir/data/$set 81 | ( cd $working_dir/data/$set; rm -rf {cmvn,feats}.scp split*; ) 82 | steps/make_fbank.sh --cmd "$train_cmd" --nj 14 $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1; 83 | steps/compute_cmvn_stats.sh $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1; 84 | fi 85 | done 86 | 87 | for set in dev93 eval92; do 88 | if [ ! -d $working_dir/data/$set ]; then 89 | cp -r data/test_$set $working_dir/data/$set 90 | ( cd $working_dir/data/$set; rm -rf {cmvn,feats}.scp split*; ) 91 | steps/make_fbank.sh --cmd "$train_cmd" --nj 8 $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1; 92 | steps/compute_cmvn_stats.sh $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1; 93 | fi 94 | done 95 | 96 | echo ===================================================================== 97 | echo " Training and Cross-Validation Pfiles " 98 | echo ===================================================================== 99 | # By default, DNN inputs include 11 frames of filterbanks 100 | for set in tr95 cv05; do 101 | if [ ! -f $working_dir/${set}.pfile.done ]; then 102 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --norm-vars true --do-concat false \ 103 | --splice-opts "--left-context=5 --right-context=5" \ 104 | $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1 105 | touch $working_dir/${set}.pfile.done 106 | fi 107 | done 108 | 109 | echo ===================================================================== 110 | echo " DNN Pre-training & Fine-tuning " 111 | echo ===================================================================== 112 | feat_dim=$(gunzip -c $working_dir/train_tr95.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1; 113 | 114 | if [ ! -f $working_dir/dnn.ptr.done ]; then 115 | echo "SDA Pre-training" 116 | $cmd $working_dir/log/dnn.ptr.log \ 117 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 118 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 119 | $pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=false" \ 120 | --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 121 | --1stlayer-reconstruct-activation "tanh" \ 122 | --wdir $working_dir --param-output-file $working_dir/dnn.ptr \ 123 | --ptr-layer-number 6 --epoch-number 5 || exit 1; 124 | touch $working_dir/dnn.ptr.done 125 | fi 126 | 127 | if [ ! -f $working_dir/dnn.fine.done ]; then 128 | echo "Fine-tuning DNN" 129 | $cmd $working_dir/log/dnn.fine.log \ 130 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 131 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 132 | $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=true" \ 133 | --valid-data "$working_dir/train_cv05.pfile.*.gz,partition=600m,random=true,stream=true" \ 134 | --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 135 | --ptr-file $working_dir/dnn.ptr --ptr-layer-number 6 \ 136 | --lrate "D:0.08:0.5:0.2,0.2:8" \ 137 | --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1; 138 | touch $working_dir/dnn.fine.done 139 | fi 140 | 141 | echo ===================================================================== 142 | echo " Decoding " 143 | echo ===================================================================== 144 | if [ ! -f $working_dir/decode.done ]; then 145 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 146 | graph_dir=$gmmdir/graph_bd_tgpr 147 | steps_pdnn/decode_dnn.sh --nj 10 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 148 | $graph_dir $working_dir/data/dev93 ${gmmdir}_ali_tr95 $working_dir/decode_bd_tgpr_dev93 || exit 1; 149 | steps_pdnn/decode_dnn.sh --nj 8 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 150 | $graph_dir $working_dir/data/eval92 ${gmmdir}_ali_tr95 $working_dir/decode_bd_tgpr_eval92 || exit 1; 151 | touch $working_dir/decode.done 152 | fi 153 | 154 | echo "Finish !!" 155 | -------------------------------------------------------------------------------- /run_wsj/run-dnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University Apache 2.0 4 | # This is the script that trains DNN model over fMLLR features. It is to be 5 | # run after run.sh. Before running this, you should already build the initial 6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train 7 | # the DNN. 8 | 9 | # For more informaiton regarding the recipes and results, visit the webiste 10 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn 11 | 12 | working_dir=exp_pdnn/dnn 13 | gmmdir=exp/tri4b 14 | 15 | # Specify the gpu device to be used 16 | gpu=gpu 17 | 18 | cmd=run.pl 19 | . cmd.sh 20 | [ -f path.sh ] && . ./path.sh 21 | . parse_options.sh || exit 1; 22 | 23 | # At this point you may want to make sure the directory $working_dir is 24 | # somewhere with a lot of space, preferably on the local GPU-containing machine. 25 | if [ ! -d pdnn ]; then 26 | echo "Checking out PDNN code." 27 | svn co https://github.com/yajiemiao/pdnn/trunk pdnn 28 | fi 29 | 30 | if [ ! -d steps_pdnn ]; then 31 | echo "Checking out steps_pdnn scripts." 32 | svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn 33 | fi 34 | 35 | if ! nvidia-smi; then 36 | echo "The command nvidia-smi was not found: this probably means you don't have a GPU." 37 | echo "(Note: this script might still work, it would just be slower.)" 38 | fi 39 | 40 | # The hope here is that Theano has been installed either to python or to python2.6 41 | pythonCMD=python 42 | if ! python -c 'import theano;'; then 43 | if ! python2.6 -c 'import theano;'; then 44 | echo "Theano does not seem to be installed on your machine. Not continuing." 45 | echo "(Note: this script might still work, it would just be slower.)" 46 | exit 1; 47 | else 48 | pythonCMD=python2.6 49 | fi 50 | fi 51 | 52 | mkdir -p $working_dir/log 53 | 54 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \ 55 | echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1; 56 | 57 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1; 58 | 59 | echo ===================================================================== 60 | echo " Data Split & Alignment & Feature Preparation " 61 | echo ===================================================================== 62 | # Split training data into traing and cross-validation sets for DNN 63 | if [ ! -d data/train_tr95 ]; then 64 | utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train_si284 data/train_tr95 data/train_cv05 || exit 1 65 | fi 66 | # Alignment on the training and validation data. We set --nj to 14 because data/train_cv05 has 14 speakers. 67 | for set in tr95 cv05; do 68 | if [ ! -d ${gmmdir}_ali_$set ]; then 69 | steps/align_fmllr.sh --nj 14 --cmd "$train_cmd" \ 70 | data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1 71 | fi 72 | done 73 | # Dump fMLLR features. "Fake" cmvn states (0 means and 1 variance) are applied. 74 | for set in tr95 cv05; do 75 | if [ ! -d $working_dir/data/train_$set ]; then 76 | steps/nnet/make_fmllr_feats.sh --nj 14 --cmd "$train_cmd" \ 77 | --transform-dir ${gmmdir}_ali_$set \ 78 | $working_dir/data/train_$set data/train_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 79 | steps/compute_cmvn_stats.sh --fake \ 80 | $working_dir/data/train_$set $working_dir/_log $working_dir/_fmllr || exit 1; 81 | fi 82 | done 83 | for set in dev93 eval92; do 84 | if [ ! -d $working_dir/data/$set ]; then 85 | steps/nnet/make_fmllr_feats.sh --nj 8 --cmd "$train_cmd" \ 86 | --transform-dir $gmmdir/decode_bd_tgpr_$set \ 87 | $working_dir/data/$set data/test_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1 88 | steps/compute_cmvn_stats.sh --fake \ 89 | $working_dir/data/$set $working_dir/_log $working_dir/_fmllr || exit 1; 90 | fi 91 | done 92 | 93 | echo ===================================================================== 94 | echo " Training and Cross-Validation Pfiles " 95 | echo ===================================================================== 96 | # By default, DNN inputs include 11 frames of fMLLR 97 | for set in tr95 cv05; do 98 | if [ ! -f $working_dir/${set}.pfile.done ]; then 99 | steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --norm-vars false --do-concat false \ 100 | --splice-opts "--left-context=5 --right-context=5" \ 101 | $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1 102 | touch $working_dir/${set}.pfile.done 103 | fi 104 | done 105 | 106 | echo ===================================================================== 107 | echo " DNN Pre-training & Fine-tuning " 108 | echo ===================================================================== 109 | feat_dim=$(gunzip -c $working_dir/train_tr95.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1; 110 | 111 | if [ ! -f $working_dir/dnn.ptr.done ]; then 112 | echo "SDA Pre-training" 113 | $cmd $working_dir/log/dnn.ptr.log \ 114 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 115 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 116 | $pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=true" \ 117 | --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 118 | --1stlayer-reconstruct-activation "tanh" \ 119 | --wdir $working_dir --param-output-file $working_dir/dnn.ptr \ 120 | --ptr-layer-number 6 --epoch-number 5 || exit 1; 121 | touch $working_dir/dnn.ptr.done 122 | fi 123 | 124 | # To apply dropout, add "--dropout-factor 0.2,0.2,0.2,0.2,0.2,0.2" and change the value of "--lrate" to "D:0.8:0.5:0.2,0.2:4" 125 | if [ ! -f $working_dir/dnn.fine.done ]; then 126 | echo "Fine-tuning DNN" 127 | $cmd $working_dir/log/dnn.fine.log \ 128 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 129 | export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \ 130 | $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=true" \ 131 | --valid-data "$working_dir/train_cv05.pfile.*.gz,partition=600m,random=true,stream=true" \ 132 | --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \ 133 | --ptr-file $working_dir/dnn.ptr --ptr-layer-number 6 \ 134 | --lrate "D:0.08:0.5:0.2,0.2:8" \ 135 | --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1; 136 | touch $working_dir/dnn.fine.done 137 | fi 138 | 139 | echo ===================================================================== 140 | echo " Decoding " 141 | echo ===================================================================== 142 | if [ ! -f $working_dir/decode.done ]; then 143 | cp $gmmdir/final.mdl $working_dir || exit 1; # copy final.mdl for scoring 144 | graph_dir=$gmmdir/graph_bd_tgpr 145 | steps_pdnn/decode_dnn.sh --nj 10 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 146 | $graph_dir $working_dir/data/dev93 ${gmmdir}_ali_tr95 $working_dir/decode_bd_tgpr_dev93 || exit 1; 147 | steps_pdnn/decode_dnn.sh --nj 8 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \ 148 | $graph_dir $working_dir/data/eval92 ${gmmdir}_ali_tr95 $working_dir/decode_bd_tgpr_eval92 || exit 1; 149 | touch $working_dir/decode.done 150 | fi 151 | 152 | echo "Finish !!" 153 | -------------------------------------------------------------------------------- /steps_pdnn/align_nnet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012-2013 Brno University of Technology (Author: Karel Vesely) 3 | # Apache 2.0 4 | 5 | # Aligns 'data' to sequences of transition-ids using Neural Network based acoustic model. 6 | # Optionally produces alignment in lattice format, this is handy to get word alignment. 7 | 8 | # Begin configuration section. 9 | nj=4 10 | cmd=run.pl 11 | stage=0 12 | # Begin configuration. 13 | scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" 14 | beam=10 15 | retry_beam=40 16 | 17 | splice_opts= 18 | norm_vars= 19 | add_deltas= 20 | 21 | align_to_lats=false # optionally produce alignment in lattice format 22 | lats_decode_opts="--acoustic-scale=0.1 --beam=20 --lattice_beam=10" 23 | lats_graph_scales="--transition-scale=1.0 --self-loop-scale=0.1" 24 | 25 | use_gpu="no" # yes|no|optionaly 26 | # End configuration options. 27 | 28 | [ $# -gt 0 ] && echo "$0 $@" # Print the command line for logging 29 | 30 | [ -f path.sh ] && . ./path.sh # source the path. 31 | . parse_options.sh || exit 1; 32 | 33 | if [ $# != 4 ]; then 34 | echo "usage: $0 " 35 | echo "e.g.: $0 data/train data/lang exp/tri1 exp/tri1_ali" 36 | echo "main options (for others, see top of script file)" 37 | echo " --config # config containing options" 38 | echo " --nj # number of parallel jobs" 39 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 40 | exit 1; 41 | fi 42 | 43 | data=$1 44 | lang=$2 45 | srcdir=$3 46 | dir=$4 47 | 48 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options. 49 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null` 50 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null` 51 | 52 | oov=`cat $lang/oov.int` || exit 1; 53 | mkdir -p $dir/log 54 | echo $nj > $dir/num_jobs 55 | sdata=$data/split$nj 56 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; 57 | 58 | cp $srcdir/{tree,final.mdl} $dir || exit 1; 59 | 60 | # Select default locations to model files 61 | nnet=$srcdir/final.nnet; 62 | model=$dir/final.mdl 63 | class_frame_counts=$srcdir/train_class.counts 64 | 65 | # Check that files exist 66 | for f in $sdata/1/feats.scp $sdata/1/text $lang/L.fst $nnet $model $class_frame_counts; do 67 | [ ! -f $f ] && echo "$0: missing file $f" && exit 1; 68 | done 69 | 70 | # PREPARE FEATURE EXTRACTION PIPELINE 71 | # Create the feature stream: 72 | ## Set up the features 73 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})" 74 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" 75 | $add_deltas && feats="$feats add-deltas ark:- ark:- |" 76 | 77 | # Finally add feature_transform and the MLP 78 | feats="$feats nnet-forward --no-softmax=true --class-frame-counts=$class_frame_counts $nnet ark:- ark:- |" 79 | 80 | echo "$0: aligning data '$data' using nnet/model '$srcdir', putting alignments in '$dir'" 81 | # Map oovs in reference transcription 82 | tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|"; 83 | # We could just use align-mapped in the next line, but it's less efficient as it compiles the 84 | # training graphs one by one. 85 | if [ $stage -le 0 ]; then 86 | $cmd JOB=1:$nj $dir/log/align.JOB.log \ 87 | compile-train-graphs $dir/tree $dir/final.mdl $lang/L.fst "$tra" ark:- \| \ 88 | align-compiled-mapped $scale_opts --beam=$beam --retry-beam=$retry_beam $dir/final.mdl ark:- \ 89 | "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" || exit 1; 90 | fi 91 | 92 | # Optionally align to lattice format (handy to get word alignment) 93 | if [ "$align_to_lats" == "true" ]; then 94 | echo "$0: aligning also to lattices '$dir/lat.*.gz'" 95 | $cmd JOB=1:$nj $dir/log/align_lat.JOB.log \ 96 | compile-train-graphs $lat_graph_scale $dir/tree $dir/final.mdl $lang/L.fst "$tra" ark:- \| \ 97 | latgen-faster-mapped $lat_decode_opts --word-symbol-table=$lang/words.txt $dir/final.mdl ark:- \ 98 | "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1; 99 | fi 100 | 101 | echo "$0: done aligning data." 102 | -------------------------------------------------------------------------------- /steps_pdnn/build_avnnet_pfile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2013 Yajie Miao Carnegie Mellon University 3 | # 2015 Fei Tao 4 | # Apache 2.0 5 | 6 | # Create pfiles for deep neural network training. We assume that the training alignment 7 | # is ready and features (either fbanks and fMLLRs) have been generated. 8 | # Refer to the following comments for configurations. 9 | 10 | ## Begin configuration section. 11 | stage=1 12 | every_nth_frame=1 # for subsampling. 13 | nj=4 14 | cmd=run.pl 15 | 16 | splice_opts="--left-context=4 --right-context=4" # frame-splicing options for neural net input 17 | add_deltas=false 18 | norm_vars=false # when doing cmvn, whether to normalize variance 19 | 20 | do_concat=true # whether to concatenate the individual pfiles into a single one 21 | 22 | # Config for splitting pfile into training and valid set; not used for SWBD 23 | do_split=false # whether to do pfile splitting 24 | pfile_unit_size=40 # the number of utterances of each small unit into which the whole pfile is chopped 25 | cv_ratio=0.05 # the ratio of CV data 26 | 27 | shuffle_scp=false # whether the feature scp is shuffled 28 | 29 | ## End configuration options. 30 | 31 | echo "$0 $@" # Print the command line for logging 32 | 33 | [ -f path.sh ] && . ./path.sh # source the path. 34 | . parse_options.sh || exit 1; 35 | 36 | if [ $# != 5 ]; then 37 | echo "Wrong #arguments ($#, expected 4)" 38 | echo "usage: steps/build_nnet_pfile.sh " 39 | echo "e.g.: steps/build_nnet_pfile.sh data/train exp/tri4_ali exp/tri4_pfile" 40 | echo "main options (for others, see top of script file)" 41 | echo " --stage # starts from which stage" 42 | echo " --nj # number of parallel jobs" 43 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 44 | exit 1; 45 | fi 46 | 47 | data=$1 48 | alidir=$2 49 | dir=$3 50 | feat1dim=$4 #The first modality feature dimension 51 | feat2dim=$5 #The second modality feature dimension 52 | feat1ind=$(($feat1dim - 1)) 53 | feat2ind=$(($feat2dim - 1)) 54 | 55 | name=`basename $data` 56 | nj=`cat $alidir/num_jobs` || exit 1; 57 | sdata=$data/split$nj 58 | 59 | if ! which pfile_create >/dev/null; then # pfile_create not on our path. 60 | [ -z "$KALDI_ROOT" ] && KALDI_ROOT=`pwd`/../../.. # normal case. 61 | try_path=$KALDI_ROOT/tools/pfile_utils-v0_51/bin/ 62 | if [ -f $try_path/pfile_create ]; then 63 | PPATH=$try_path 64 | else 65 | echo "You do not have pfile_create (part of pfile-utils) on your path," 66 | echo "and it is not accessible in the normal place e.g. $try_path/pfile_create" 67 | echo "Try going to KALDI_ROOT/tools/ and running ./install_pfile_utils.sh" 68 | exit 1 69 | fi 70 | else 71 | PPATH=$(dirname `which pfile_create`) 72 | fi 73 | export PATH=$PATH:$PPATH 74 | 75 | mkdir -p $dir/log 76 | echo $splice_opts > $dir/splice_opts 77 | echo $norm_vars > $dir/norm_vars 78 | echo $add_deltas > $dir/add_deltas 79 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; 80 | 81 | ## Setup features 82 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})" 83 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" 84 | if $shuffle_scp; then 85 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp \"scp:cat $sdata/JOB/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} |\" ark:- | " 86 | audiofeats="$feats select-feats 0-$feat1ind ark:- ark:- |" 87 | videofeats="$feats select-feats $feat1dim-$feat2ind ark:- ark:- |" 88 | audiosplice="$audiofeats splice-feats $splice_opts ark:- ark:- |" 89 | videosplice="$videofeats splice-feats $splice_opts ark:- ark:- |" 90 | feats="paste-feats ark:$audiosplice ark:$videosplice ark:-" 91 | fi 92 | # Add first and second-order deltas if needed 93 | $add_deltas && feats="$feats add-deltas ark:- ark:- |" 94 | 95 | ## Get the dimension of the features 96 | $cmd JOB=1:1 $dir/log/get_feat_dim.log \ 97 | feat-to-dim "$feats subset-feats --n=1 ark:- ark:- |" ark,t:$dir/feat_dim || exit 1; 98 | feat_dim=`cat $dir/feat_dim | awk '{print $NF}'` 99 | echo "$0: network inputs have the dimension of $feat_dim" 100 | 101 | if [ $stage -le 2 ]; then 102 | $cmd JOB=1:$nj $dir/log/build_pfile.$name.JOB.log \ 103 | build-pfile-from-ali --every-nth-frame=$every_nth_frame $alidir/final.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" \ 104 | "$feats" "|$PPATH/pfile_create -i - -o $dir/$name.pfile.JOB -f $feat_dim -l 1" || exit 1; 105 | # Concatenate the pfiles into one 106 | all_pfiles="" 107 | for n in `seq 1 $nj`; do 108 | all_pfiles="$all_pfiles $dir/$name.pfile.$n" 109 | done 110 | if $do_concat; then 111 | $cmd $dir/log/pfile_cat.log \ 112 | $PPATH/pfile_concat -q $all_pfiles -o $dir/concat.pfile || exit 1; 113 | rm -rf $dir/$name.pfile.* 114 | else 115 | $cmd JOB=1:$nj $dir/log/gzip.$name.JOB.log gzip $dir/$name.pfile.JOB || exit 1; 116 | fi 117 | fi 118 | 119 | if [ $stage -le 3 ] && $do_split; then 120 | echo "Split data into training and cross-validation" 121 | mkdir -p $dir/concat 122 | # Chop the whole pfile into small units 123 | $cmd $dir/log/pfile_burst.log \ 124 | perl steps_pdnn/pfile_burst.pl -i $dir/concat.pfile -o $dir/concat -s $pfile_unit_size || exit 1; 125 | fi 126 | 127 | if [ $stage -le 4 ] && $do_split; then 128 | # Split the units accoring to cv_ratio 129 | $cmd $dir/log/pfile_rconcat.log \ 130 | perl steps_pdnn/pfile_rconcat.pl -t "$dir" -o $dir/valid.pfile,${cv_ratio} -o $dir/train.pfile $dir/concat/*.pfile || exit 1; 131 | rm -r $dir/concat 132 | echo "## Info of the training pfile: ##" 133 | $PPATH/pfile_info $dir/train.pfile 134 | echo "## Info of the cross-validation pfile: ##" 135 | $PPATH/pfile_info $dir/valid.pfile 136 | fi 137 | 138 | echo "$0: done creating pfiles." 139 | 140 | exit 0; 141 | -------------------------------------------------------------------------------- /steps_pdnn/build_nnet_pfile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2013 Yajie Miao Carnegie Mellon University 3 | # Apache 2.0 4 | 5 | # Create pfiles for deep neural network training. We assume that the training alignment 6 | # is ready and features (either fbanks and fMLLRs) have been generated. 7 | # Refer to the following comments for configurations. 8 | 9 | ## Begin configuration section. 10 | stage=1 11 | every_nth_frame=1 # for subsampling. 12 | nj=4 13 | cmd=run.pl 14 | 15 | splice_opts="--left-context=4 --right-context=4" # frame-splicing options for neural net input 16 | add_deltas=false 17 | norm_vars=false # when doing cmvn, whether to normalize variance 18 | 19 | do_concat=true # whether to concatenate the individual pfiles into a single one 20 | 21 | # Config for splitting pfile into training and valid set; not used for SWBD 22 | do_split=false # whether to do pfile splitting 23 | pfile_unit_size=40 # the number of utterances of each small unit into which the whole pfile is chopped 24 | cv_ratio=0.05 # the ratio of CV data 25 | 26 | shuffle_scp=false # whether the feature scp is shuffled 27 | 28 | ## End configuration options. 29 | 30 | echo "$0 $@" # Print the command line for logging 31 | 32 | [ -f path.sh ] && . ./path.sh # source the path. 33 | . parse_options.sh || exit 1; 34 | 35 | if [ $# != 3 ]; then 36 | echo "Wrong #arguments ($#, expected 4)" 37 | echo "usage: steps/build_nnet_pfile.sh " 38 | echo "e.g.: steps/build_nnet_pfile.sh data/train exp/tri4_ali exp/tri4_pfile" 39 | echo "main options (for others, see top of script file)" 40 | echo " --stage # starts from which stage" 41 | echo " --nj # number of parallel jobs" 42 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 43 | exit 1; 44 | fi 45 | 46 | data=$1 47 | alidir=$2 48 | dir=$3 49 | 50 | name=`basename $data` 51 | nj=`cat $alidir/num_jobs` || exit 1; 52 | sdata=$data/split$nj 53 | 54 | if ! which pfile_create >/dev/null; then # pfile_create not on our path. 55 | [ -z "$KALDI_ROOT" ] && KALDI_ROOT=`pwd`/../../.. # normal case. 56 | try_path=$KALDI_ROOT/tools/pfile_utils-v0_51/bin/ 57 | if [ -f $try_path/pfile_create ]; then 58 | PPATH=$try_path 59 | else 60 | echo "You do not have pfile_create (part of pfile-utils) on your path," 61 | echo "and it is not accessible in the normal place e.g. $try_path/pfile_create" 62 | echo "Try going to KALDI_ROOT/tools/ and running ./install_pfile_utils.sh" 63 | exit 1 64 | fi 65 | else 66 | PPATH=$(dirname `which pfile_create`) 67 | fi 68 | export PATH=$PATH:$PPATH 69 | 70 | mkdir -p $dir/log 71 | echo $splice_opts > $dir/splice_opts 72 | echo $norm_vars > $dir/norm_vars 73 | echo $add_deltas > $dir/add_deltas 74 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; 75 | 76 | ## Setup features 77 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})" 78 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" 79 | if $shuffle_scp; then 80 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp \"scp:cat $sdata/JOB/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} |\" ark:- | splice-feats $splice_opts ark:- ark:- |" 81 | fi 82 | # Add first and second-order deltas if needed 83 | $add_deltas && feats="$feats add-deltas ark:- ark:- |" 84 | 85 | ## Get the dimension of the features 86 | $cmd JOB=1:1 $dir/log/get_feat_dim.log \ 87 | feat-to-dim "$feats subset-feats --n=1 ark:- ark:- |" ark,t:$dir/feat_dim || exit 1; 88 | feat_dim=`cat $dir/feat_dim | awk '{print $NF}'` 89 | echo "$0: network inputs have the dimension of $feat_dim" 90 | 91 | if [ $stage -le 2 ]; then 92 | $cmd JOB=1:$nj $dir/log/build_pfile.$name.JOB.log \ 93 | build-pfile-from-ali --every-nth-frame=$every_nth_frame $alidir/final.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" \ 94 | "$feats" "|$PPATH/pfile_create -i - -o $dir/$name.pfile.JOB -f $feat_dim -l 1" || exit 1; 95 | # Concatenate the pfiles into one 96 | all_pfiles="" 97 | for n in `seq 1 $nj`; do 98 | all_pfiles="$all_pfiles $dir/$name.pfile.$n" 99 | done 100 | if $do_concat; then 101 | $cmd $dir/log/pfile_cat.log \ 102 | $PPATH/pfile_concat -q $all_pfiles -o $dir/concat.pfile || exit 1; 103 | rm -rf $dir/$name.pfile.* 104 | else 105 | $cmd JOB=1:$nj $dir/log/gzip.$name.JOB.log gzip $dir/$name.pfile.JOB || exit 1; 106 | fi 107 | fi 108 | 109 | if [ $stage -le 3 ] && $do_split; then 110 | echo "Split data into training and cross-validation" 111 | mkdir -p $dir/concat 112 | # Chop the whole pfile into small units 113 | $cmd $dir/log/pfile_burst.log \ 114 | perl steps_pdnn/pfile_burst.pl -i $dir/concat.pfile -o $dir/concat -s $pfile_unit_size || exit 1; 115 | fi 116 | 117 | if [ $stage -le 4 ] && $do_split; then 118 | # Split the units accoring to cv_ratio 119 | $cmd $dir/log/pfile_rconcat.log \ 120 | perl steps_pdnn/pfile_rconcat.pl -t "$dir" -o $dir/valid.pfile,${cv_ratio} -o $dir/train.pfile $dir/concat/*.pfile || exit 1; 121 | rm -r $dir/concat 122 | echo "## Info of the training pfile: ##" 123 | $PPATH/pfile_info $dir/train.pfile 124 | echo "## Info of the cross-validation pfile: ##" 125 | $PPATH/pfile_info $dir/valid.pfile 126 | fi 127 | 128 | echo "$0: done creating pfiles." 129 | 130 | exit 0; 131 | -------------------------------------------------------------------------------- /steps_pdnn/decode_avdnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2013 Yajie Miao Carnegie Mellon University 4 | # 2015 Fei Tao 5 | # Apache 2.0 6 | 7 | # Decode the DNN model. The [srcdir] in this script should be the same as dir in 8 | # build_nnet_pfile.sh. Also, the DNN model has been trained and put in srcdir. 9 | # All these steps will be done automatically if you run the recipe file run-dnn.sh 10 | 11 | ## Begin configuration section 12 | stage=0 13 | nj=16 14 | cmd=run.pl 15 | num_threads=1 16 | 17 | max_active=7000 # max-active 18 | beam=15.0 # beam used 19 | latbeam=7.0 # beam used in getting lattices 20 | acwt=0.1 # acoustic weight used in getting lattices 21 | max_arcs=-1 22 | 23 | skip_scoring=false # whether to skip WER scoring 24 | scoring_opts= 25 | 26 | splice_opts= 27 | norm_vars= 28 | add_deltas= 29 | 30 | ## End configuration section 31 | 32 | echo "$0 $@" # Print the command line for logging 33 | 34 | [ -f ./path.sh ] && . ./path.sh; # source the path. 35 | . parse_options.sh || exit 1; 36 | 37 | if [ $# != 5 ]; then 38 | echo "Wrong #arguments ($#, expected 4)" 39 | echo "Usage: steps/decode_dnn.sh [options] " 40 | echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp/tri4_dnn/decode KaldiScripts" 41 | echo "main options (for others, see top of script file)" 42 | echo " --stage # starts from which stage" 43 | echo " --nj # number of parallel jobs" 44 | echo " --cmd # command to run in parallel with" 45 | echo " --acwt # default 0.1 ... used to get posteriors" 46 | echo " --num-threads # number of threads to use, default 4." 47 | echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" 48 | echo " --scoring-opts # options to local/score.sh" 49 | exit 1; 50 | fi 51 | 52 | graphdir=$1 53 | data=$2 54 | alidir=$3 55 | dir=`echo $4 | sed 's:/$::g'` # remove any trailing slash. 56 | scoredir=$5 #directory of scoring script 57 | 58 | srcdir=`dirname $dir`; # assume model directory one level up from decoding directory. 59 | sdata=$data/split$nj; 60 | 61 | thread_string= 62 | [ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 63 | 64 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options. 65 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null` 66 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null` 67 | 68 | mkdir -p $dir/log 69 | split_data.sh $data $nj || exit 1; 70 | echo $nj > $dir/num_jobs 71 | 72 | # Some checks. Note: we don't need $srcdir/tree but we expect 73 | # it should exist, given the current structure of the scripts. 74 | for f in $graphdir/HCLG.fst $data/feats.scp $alidir/tree; do 75 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 76 | done 77 | 78 | # Generate state counts; will be used as prior 79 | $cmd $dir/log/class_count.log \ 80 | ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz |" ark:- \| \ 81 | analyze-counts --binary=false ark:- $dir/class.counts || exit 1; 82 | 83 | ## Set up the features 84 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})" 85 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" 86 | $add_deltas && feats="$feats add-deltas ark:- ark:- |" 87 | midfeats="$feats nnet-forward --no-softmax=true $srcdir/dnn.nnet.bran ark:- ark:-|" 88 | finalfeats="$midfeats nnet-forward --class-frame-counts=$dir/class.counts --apply-log=true --no-softmax=false $srcdir/dnn.nnet.main ark:- ark:- |" 89 | #finalfeats="$feats nnet-forward --class-frame-counts=$dir/class.counts --apply-log=true --no-softmax=false $srcdir/dnn.nnet ark:- ark:- |" 90 | 91 | $cmd JOB=1:$nj $dir/log/decode.JOB.log \ 92 | latgen-faster-mapped --max-active=$max_active --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt $alidir/final.mdl $graphdir/HCLG.fst "$finalfeats" "ark:|gzip -c > $dir/lat.JOB.gz" 93 | 94 | # Copy the source model in order for scoring 95 | cp $alidir/final.mdl $srcdir 96 | 97 | if ! $skip_scoring ; then 98 | [ ! -x $scoredir/iskld_score_kaldi.sh ] && \ 99 | echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1; 100 | $scoredir/iskld_score_kaldi.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir 101 | fi 102 | 103 | exit 0; 104 | -------------------------------------------------------------------------------- /steps_pdnn/decode_dnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2013 Yajie Miao Carnegie Mellon University 4 | # Apache 2.0 5 | 6 | # Decode the DNN model. The [srcdir] in this script should be the same as dir in 7 | # build_nnet_pfile.sh. Also, the DNN model has been trained and put in srcdir. 8 | # All these steps will be done automatically if you run the recipe file run-dnn.sh 9 | 10 | ## Begin configuration section 11 | stage=0 12 | nj=16 13 | cmd=run.pl 14 | num_threads=1 15 | 16 | max_active=7000 # max-active 17 | beam=15.0 # beam used 18 | latbeam=7.0 # beam used in getting lattices 19 | acwt=0.1 # acoustic weight used in getting lattices 20 | max_arcs=-1 21 | 22 | skip_scoring=false # whether to skip WER scoring 23 | scoring_opts= 24 | 25 | splice_opts= 26 | norm_vars= 27 | add_deltas= 28 | 29 | ## End configuration section 30 | 31 | echo "$0 $@" # Print the command line for logging 32 | 33 | [ -f ./path.sh ] && . ./path.sh; # source the path. 34 | . parse_options.sh || exit 1; 35 | 36 | if [ $# != 4 ]; then 37 | echo "Wrong #arguments ($#, expected 4)" 38 | echo "Usage: steps/decode_dnn.sh [options] " 39 | echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp/tri4_dnn/decode" 40 | echo "main options (for others, see top of script file)" 41 | echo " --stage # starts from which stage" 42 | echo " --nj # number of parallel jobs" 43 | echo " --cmd # command to run in parallel with" 44 | echo " --acwt # default 0.1 ... used to get posteriors" 45 | echo " --num-threads # number of threads to use, default 4." 46 | echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" 47 | echo " --scoring-opts # options to local/score.sh" 48 | exit 1; 49 | fi 50 | 51 | graphdir=$1 52 | data=$2 53 | alidir=$3 54 | dir=`echo $4 | sed 's:/$::g'` # remove any trailing slash. 55 | 56 | srcdir=`dirname $dir`; # assume model directory one level up from decoding directory. 57 | sdata=$data/split$nj; 58 | 59 | thread_string= 60 | [ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 61 | 62 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options. 63 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null` 64 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null` 65 | 66 | mkdir -p $dir/log 67 | split_data.sh $data $nj || exit 1; 68 | echo $nj > $dir/num_jobs 69 | 70 | # Some checks. Note: we don't need $srcdir/tree but we expect 71 | # it should exist, given the current structure of the scripts. 72 | for f in $graphdir/HCLG.fst $data/feats.scp $alidir/tree; do 73 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 74 | done 75 | 76 | # Generate state counts; will be used as prior 77 | $cmd $dir/log/class_count.log \ 78 | ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz |" ark:- \| \ 79 | analyze-counts --binary=false ark:- $dir/class.counts || exit 1; 80 | 81 | ## Set up the features 82 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})" 83 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" 84 | $add_deltas && feats="$feats add-deltas ark:- ark:- |" 85 | ## 86 | finalfeats="$feats nnet-forward --class-frame-counts=$dir/class.counts --apply-log=true --no-softmax=false $srcdir/dnn.nnet ark:- ark:- |" 87 | 88 | $cmd JOB=1:$nj $dir/log/decode.JOB.log \ 89 | latgen-faster-mapped --max-active=$max_active --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt $alidir/final.mdl $graphdir/HCLG.fst "$finalfeats" "ark:|gzip -c > $dir/lat.JOB.gz" 90 | 91 | # Copy the source model in order for scoring 92 | cp $alidir/final.mdl $srcdir 93 | 94 | if ! $skip_scoring ; then 95 | [ ! -x local/score.sh ] && \ 96 | echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1; 97 | local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir 98 | fi 99 | 100 | exit 0; 101 | -------------------------------------------------------------------------------- /steps_pdnn/make_bnf_feat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2013 Yajie Miao Carnegie Mellon University 3 | # Copyright 2014 Xiaohui Zhang Johns Hopkins University 4 | # Apache 2.0 5 | 6 | # Make BNF front-end with the trained neural network 7 | 8 | # Begin configuration section. 9 | stage=1 10 | nj=8 11 | cmd=run.pl 12 | 13 | splice_opts= 14 | norm_vars= 15 | add_deltas= 16 | # End configuration options. 17 | 18 | echo "$0 $@" # Print the command line for logging 19 | 20 | [ -f path.sh ] && . ./path.sh # source the path. 21 | . parse_options.sh || exit 1; 22 | 23 | if [ $# != 5 ]; then 24 | echo "usage: steps_pdnn/make_bnf_feat.sh " 25 | echo "e.g.: steps_pdnn/make_bnf_feat.sh data_bnf/train data/train exp/bnf_net exp/make_bnf/log exp/bnf" 26 | echo "main options (for others, see top of script file)" 27 | echo " --config # config containing options" 28 | echo " --nj # number of parallel jobs" 29 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 30 | exit 1; 31 | fi 32 | 33 | data=$1 34 | srcdata=$2 35 | netdir=$3 36 | logdir=$4 37 | feadir=$5 38 | 39 | sdata=$srcdata/split$nj; 40 | name=`basename $data` 41 | [ -z "$splice_opts" ] && splice_opts=`cat $netdir/splice_opts 2>/dev/null` # frame-splicing options. 42 | [ -z "$add_deltas" ] && add_deltas=`cat $netdir/add_deltas 2>/dev/null` 43 | [ -z "$norm_vars" ] && norm_vars=`cat $netdir/norm_vars 2>/dev/null` 44 | 45 | mkdir -p $data $logdir $feadir 46 | [[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1; 47 | 48 | for f in $netdir/bnf.nnet; do 49 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 50 | done 51 | 52 | ## Set up the features 53 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})" 54 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" 55 | ## 56 | 57 | # prepare the dir 58 | cp $srcdata/* $data 2>/dev/null; rm $data/{feats,cmvn}.scp; 59 | 60 | # get the absolute pathname 61 | feadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $feadir ${PWD}` 62 | 63 | echo "$0: making BNF scp and ark." 64 | $cmd JOB=1:$nj $logdir/make_bnf_$name.JOB.log \ 65 | nnet-forward --apply-log=false $netdir/bnf.nnet "$feats" \ 66 | ark,scp:$feadir/feats_bnf_$name.JOB.ark,$feadir/feats_bnf_$name.JOB.scp || exit 1; 67 | 68 | 69 | N0=$(cat $srcdata/feats.scp | wc -l) 70 | N1=$(cat $feadir/feats_bnf_$name.*.scp | wc -l) 71 | if [[ "$N0" != "$N1" ]]; then 72 | echo "$0: error happens when generating BNF for $name (Original:$N0 BNF:$N1)" 73 | exit 1; 74 | fi 75 | 76 | # Concatenate feats.scp into bnf_data 77 | for n in `seq 1 $nj`; do 78 | cat $feadir/feats_bnf_$name.$n.scp >> $data/feats.scp 79 | done 80 | 81 | echo "$0: done making BNF" 82 | 83 | exit 0; 84 | -------------------------------------------------------------------------------- /steps_pdnn/make_conv_feat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2014 Yajie Miao Carnegie Mellon University 3 | # Apache 2.0 4 | 5 | # Generate activation from convolution layers in CNNs and save the activation into 6 | # Kaldi format. 7 | 8 | ## Begin configuration section. 9 | stage=1 10 | nj=4 11 | cmd=run.pl 12 | 13 | splice_opts= 14 | norm_vars= 15 | add_deltas= 16 | 17 | layer_index=1 18 | 19 | ## End configuration options. 20 | 21 | echo "$0 $@" # Print the command line for logging 22 | 23 | [ -f path.sh ] && . ./path.sh # source the path. 24 | . parse_options.sh || exit 1; 25 | 26 | if [ $# != 7 ]; then 27 | echo "Wrong #arguments ($#, expected 7)" 28 | echo "usage: steps_pdnn/make_conv_feat.sh " 29 | echo " " 30 | echo "e.g.: steps_pdnn/make_conv_feat.sh data_conv/train data/train exp/cnn " 31 | echo "exp/cnn/nnet.param exp/cnn/nnet.cfg exp/cnn/_log exp/cnn/_conv" 32 | echo "main options (for others, see top of script file)" 33 | echo " --stage # starts from which stage" 34 | echo " --nj # number of parallel jobs" 35 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 36 | exit 1; 37 | fi 38 | 39 | data=$1 40 | srcdata=$2 41 | netdir=$3 42 | cnnparam=$4 43 | cnncfg=$5 44 | logdir=$6 45 | feadir=$7 46 | 47 | # get the absolute pathname 48 | feadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $feadir ${PWD}` 49 | 50 | name=`basename $data` 51 | sdata=$srcdata/split$nj 52 | [ -z "$splice_opts" ] && splice_opts=`cat $netdir/splice_opts 2>/dev/null` # frame-splicing options. 53 | [ -z "$add_deltas" ] && add_deltas=`cat $netdir/add_deltas 2>/dev/null` 54 | [ -z "$norm_vars" ] && norm_vars=`cat $netdir/norm_vars 2>/dev/null` 55 | 56 | mkdir -p $data $logdir $feadir 57 | [[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1; 58 | 59 | for f in $cnnparam $cnncfg; do 60 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 61 | done 62 | 63 | # prepare the dir 64 | cp $srcdata/* $data 2>/dev/null; rm $data/{feats,cmvn}.scp; 65 | 66 | ## First dump the network input into local files 67 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})" 68 | if $add_deltas; then 69 | $cmd JOB=1:$nj $logdir/nnet_input.$name.JOB.log \ 70 | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk \ 71 | scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- \| \ 72 | splice-feats $splice_opts ark:- ark:- \| \ 73 | add-deltas ark:- ark,scp:$feadir/nnet_input.$name.JOB.ark,$feadir/nnet_input.$name.JOB.scp || exit 1; 74 | 75 | else 76 | $cmd JOB=1:$nj $logdir/nnet_input.$name.JOB.log \ 77 | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk \ 78 | scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- \| \ 79 | splice-feats $splice_opts ark:- ark,scp:$feadir/nnet_input.$name.JOB.ark,$feadir/nnet_input.$name.JOB.scp || exit 1; 80 | fi 81 | 82 | # Generate conv-layer activation by calling PDNN 83 | $cmd JOB=1:$nj $logdir/conv_feat.$name.JOB.log \ 84 | export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \ 85 | export THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 \; \ 86 | python pdnn/cmds2/run_FeatExt_Kaldi.py --in-scp-file $feadir/nnet_input.$name.JOB.scp --out-ark-file $feadir/conv_feats.$name.JOB.ark --nnet-param $cnnparam --nnet-cfg $cnncfg --layer-index $layer_index 87 | 88 | rm $feadir/nnet_input.* 89 | 90 | # Generate the final scp and ark files 91 | $cmd JOB=1:$nj $logdir/copy_feat.$name.JOB.log \ 92 | copy-feats ark:$feadir/conv_feats.$name.JOB.ark ark,scp:$feadir/feats.$name.JOB.ark,$feadir/feats.$name.JOB.scp 93 | rm $feadir/conv_feats.* 94 | 95 | N0=$(cat $srcdata/feats.scp | wc -l) 96 | N1=$(cat $feadir/feats.$name.*.scp | wc -l) 97 | if [[ "$N0" != "$N1" ]]; then 98 | echo "$0: error happens when generating features for $name (Original:$N0 New:$N1)" 99 | exit 1; 100 | fi 101 | 102 | # Concatenate feats.scp into bnf_data 103 | for n in `seq 1 $nj`; do 104 | cat $feadir/feats.$name.$n.scp >> $data/feats.scp 105 | done 106 | 107 | echo "$0: done making features" 108 | 109 | exit 0; 110 | 111 | -------------------------------------------------------------------------------- /steps_pdnn/make_denlats_nnet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012-2013 Karel Vesely, Daniel Povey 3 | # Apache 2.0. 4 | 5 | # Create denominator lattices for MMI/MPE/sMBR training. 6 | # Creates its output in $dir/lat.*.ark,$dir/lat.scp 7 | # The lattices are uncompressed, we need random access for DNN training. 8 | 9 | # Begin configuration section. 10 | nj=4 11 | cmd=run.pl 12 | sub_split=1 13 | beam=13.0 14 | lattice_beam=7.0 15 | acwt=0.1 16 | max_active=5000 17 | nnet= 18 | max_mem=20000000 # This will stop the processes getting too large. 19 | # This is in bytes, but not "real" bytes-- you have to multiply 20 | # by something like 5 or 10 to get real bytes (not sure why so large) 21 | # End configuration section. 22 | use_gpu=no # yes|no|optional 23 | parallel_opts="-pe smp 2" 24 | 25 | splice_opts= 26 | norm_vars= 27 | add_deltas= 28 | 29 | echo "$0 $@" # Print the command line for logging 30 | 31 | [ -f ./path.sh ] && . ./path.sh; # source the path. 32 | . parse_options.sh || exit 1; 33 | 34 | if [ $# != 4 ]; then 35 | echo "Usage: steps/$0 [options] " 36 | echo " e.g.: steps/$0 data/train data/lang exp/tri1 exp/tri1_denlats" 37 | echo "Works for plain features (or CMN, delta), forwarded through feature-transform." 38 | echo "" 39 | echo "Main options (for others, see top of script file)" 40 | echo " --config # config containing options" 41 | echo " --nj # number of parallel jobs" 42 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 43 | echo " --sub-split # e.g. 40; use this for " 44 | echo " # large databases so your jobs will be smaller and" 45 | echo " # will (individually) finish reasonably soon." 46 | exit 1; 47 | fi 48 | 49 | data=$1 50 | lang=$2 51 | srcdir=$3 52 | dir=$4 53 | 54 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options. 55 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null` 56 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null` 57 | 58 | sdata=$data/split$nj 59 | mkdir -p $dir/log 60 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; 61 | echo $nj > $dir/num_jobs 62 | 63 | oov=`cat $lang/oov.int` || exit 1; 64 | 65 | mkdir -p $dir 66 | 67 | cp -r $lang $dir/ 68 | 69 | # Compute grammar FST which corresponds to unigram decoding graph. 70 | new_lang="$dir/"$(basename "$lang") 71 | echo "Making unigram grammar FST in $new_lang" 72 | cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \ 73 | awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \ 74 | utils/make_unigram_grammar.pl | fstcompile > $new_lang/G.fst \ 75 | || exit 1; 76 | 77 | # mkgraph.sh expects a whole directory "lang", so put everything in one directory... 78 | # it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and 79 | # final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph. 80 | 81 | echo "Compiling decoding graph in $dir/dengraph" 82 | if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then 83 | echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation." 84 | else 85 | utils/mkgraph.sh $new_lang $srcdir $dir/dengraph || exit 1; 86 | fi 87 | 88 | 89 | cp $srcdir/{tree,final.mdl} $dir 90 | 91 | # Select default locations to model files 92 | [ -z "$nnet" ] && nnet=$srcdir/final.nnet; 93 | class_frame_counts=$srcdir/train_class.counts 94 | model=$dir/final.mdl 95 | 96 | # Check that files exist 97 | for f in $sdata/1/feats.scp $nnet $model $class_frame_counts; do 98 | [ ! -f $f ] && echo "$0: missing file $f" && exit 1; 99 | done 100 | 101 | # PREPARE FEATURE EXTRACTION PIPELINE 102 | # Create the feature stream: 103 | ## Set up the features 104 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})" 105 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" 106 | $add_deltas && feats="$feats add-deltas ark:- ark:- |" 107 | feats="$feats nnet-forward --no-softmax=true --class-frame-counts=$class_frame_counts $nnet ark:- ark:- |" 108 | 109 | 110 | echo "$0: generating denlats from data '$data', putting lattices in '$dir'" 111 | # Generate the lattices 112 | $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \ 113 | latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ 114 | --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \ 115 | $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1; 116 | echo "$0: done generating denominator lattices." 117 | -------------------------------------------------------------------------------- /steps_pdnn/sat/build_nnet_pfile_ivec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2014 Yajie Miao Carnegie Mellon University 3 | # Apache 2.0 4 | 5 | # Create pfiles for deep neural network training with i-vectors appended to each frame. 6 | # We assume that the training alignment is ready and features (either fbanks and fMLLRs) 7 | # have been generated. 8 | # Refer to the following comments for configurations. 9 | 10 | ## Begin configuration section. 11 | stage=1 12 | every_nth_frame=1 # for subsampling. 13 | nj=4 14 | cmd=run.pl 15 | 16 | splice_opts="--left-context=4 --right-context=4" # frame-splicing options for neural net input 17 | add_deltas=false 18 | norm_vars=false # when doing cmvn, whether to normalize variance 19 | 20 | do_concat=true # whether to concatenate the individual pfiles into a single one 21 | 22 | # Config for splitting pfile into training and valid set; not used for SWBD 23 | pfile_unit_size=40 # the number of utterances of each small unit into which the whole pfile is chopped 24 | do_split=false 25 | cv_ratio=0.05 # the ratio of CV data 26 | 27 | shuffle_scp=false # whether the feature scp is shuffled 28 | 29 | # Config for ivector 30 | ivec_type=speaker # the type of appended features: speaker, utterance, frame 31 | 32 | ## End configuration options. 33 | 34 | echo "$0 $@" # Print the command line for logging 35 | 36 | [ -f path.sh ] && . ./path.sh # source the path. 37 | . parse_options.sh || exit 1; 38 | 39 | if [ $# != 4 ]; then 40 | echo "Wrong #arguments ($#, expected 4)" 41 | echo "usage: steps/build_nnet_pfile.sh " 42 | echo "e.g.: steps/build_nnet_pfile.sh data/train exp/tri4_ali exp_ivec/ivector_swbd1 exp/tri4_pfile" 43 | echo "main options (for others, see top of script file)" 44 | echo " --stage # starts from which stage" 45 | echo " --nj # number of parallel jobs" 46 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 47 | exit 1; 48 | fi 49 | 50 | data=$1 51 | alidir=$2 52 | ivdir=$3 53 | dir=$4 54 | 55 | name=`basename $data` 56 | nj=`cat $alidir/num_jobs` || exit 1; 57 | sdata=$data/split$nj 58 | 59 | # Check whether ivectors have been generated successfully. 60 | [ ! -f $ivdir/ivector.scp ] && echo "$0: no such file $ivdir/ivector.scp" && exit 1; 61 | 62 | if ! which pfile_create >/dev/null; then # pfile_create not on our path. 63 | [ -z "$KALDI_ROOT" ] && KALDI_ROOT=`pwd`/../../.. # normal case. 64 | try_path=$KALDI_ROOT/tools/pfile_utils-v0_51/bin/ 65 | if [ -f $try_path/pfile_create ]; then 66 | PPATH=$try_path 67 | else 68 | echo "You do not have pfile_create (part of pfile-utils) on your path," 69 | echo "and it is not accessible in the normal place e.g. $try_path/pfile_create" 70 | echo "Try going to KALDI_ROOT/tools/ and running ./install_pfile_utils.sh" 71 | exit 1 72 | fi 73 | else 74 | PPATH=$(dirname `which pfile_create`) 75 | fi 76 | export PATH=$PATH:$PPATH 77 | 78 | mkdir -p $dir/log 79 | #echo $splice_opts > $dir/splice_opts 80 | #echo $norm_vars > $dir/norm_vars 81 | #echo $add_deltas > $dir/add_deltas 82 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; 83 | 84 | ## Setup features 85 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})" 86 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" 87 | if $shuffle_scp; then 88 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp \"scp:cat $sdata/JOB/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} |\" ark:- | splice-feats $splice_opts ark:- ark:- |" 89 | fi 90 | # Add first and second-order deltas if needed 91 | $add_deltas && feats="$feats add-deltas ark:- ark:- |" 92 | 93 | # Setup the additional ivector features 94 | case $ivec_type in 95 | speaker) ivfeats="ark,s,cs:get-spkvec-feat --utt2spk=ark:$sdata/JOB/utt2spk scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";; 96 | utterance) ivfeats="ark,s,cs:get-spkvec-feat scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";; 97 | frame) ivfeats="scp:$ivdir/ivector.scp";; 98 | *) echo "$0: invalid ivector type $ivec_type" && exit 1; 99 | esac 100 | # On each frame, append the i-vector to the original feature vector 101 | if [ $stage -le 2 ]; then 102 | $cmd JOB=1:$nj $dir/log/append_feat_$name.JOB.log \ 103 | append-feats "$feats" "$ivfeats" ark,scp:$dir/append_feat.$name.JOB.ark,$dir/append_feat.$name.JOB.scp || exit 1; 104 | fi 105 | ## 106 | 107 | ## Get the dimension of the features 108 | $cmd JOB=1:1 $dir/log/get_feat_dim.log \ 109 | feat-to-dim "ark,s,cs:copy-feats scp:$dir/append_feat.$name.1.scp ark:- | subset-feats --n=1 ark:- ark:- |" ark,t:$dir/feat_dim || exit 1; 110 | feat_dim=`cat $dir/feat_dim | awk '{print $NF}'` 111 | echo "$0: network inputs have the dimension of $feat_dim" 112 | 113 | if [ $stage -le 3 ]; then 114 | $cmd JOB=1:$nj $dir/log/build_pfile.JOB.log \ 115 | build-pfile-from-ali --every-nth-frame=$every_nth_frame $alidir/final.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" \ 116 | scp:$dir/append_feat.$name.JOB.scp "|$PPATH/pfile_create -i - -o $dir/$name.pfile.JOB -f $feat_dim -l 1" || exit 1; 117 | # Concatenate the pfiles into one 118 | all_pfiles="" 119 | for n in `seq 1 $nj`; do 120 | all_pfiles="$all_pfiles $dir/pfile.$n" 121 | done 122 | if $do_concat; then 123 | $cmd $dir/log/pfile_cat.log \ 124 | $PPATH/pfile_concat -q $all_pfiles -o $dir/concat.pfile || exit 1; 125 | rm -rf $dir/$name.pfile.* 126 | else 127 | $cmd JOB=1:$nj $dir/log/gzip.$name.JOB.log gzip $dir/$name.pfile.JOB || exit 1; 128 | fi 129 | fi 130 | 131 | if [ $stage -le 4 ] && $do_split; then 132 | echo "Split data into training and cross-validation" 133 | mkdir -p $dir/concat 134 | # Chop the whole pfile into small units 135 | $cmd $dir/log/pfile_burst.log \ 136 | perl steps_pdnn/pfile_burst.pl -i $dir/concat.pfile -o $dir/concat -s $pfile_unit_size || exit 1; 137 | fi 138 | 139 | if [ $stage -le 5 ] && $do_split; then 140 | # Split the units accoring to cv_ratio 141 | $cmd $dir/log/pfile_rconcat.log \ 142 | perl steps_pdnn/pfile_rconcat.pl -t "$dir" -o $dir/valid.pfile,${cv_ratio} -o $dir/train.pfile $dir/concat/*.pfile || exit 1; 143 | rm -r $dir/concat 144 | echo "## Info of the training pfile: ##" 145 | $PPATH/pfile_info $dir/train.pfile 146 | echo "## Info of the cross-validation pfile: ##" 147 | $PPATH/pfile_info $dir/valid.pfile 148 | fi 149 | 150 | echo "$0: done creating pfiles." 151 | 152 | exit 0; 153 | -------------------------------------------------------------------------------- /steps_pdnn/sat/decode_dnn_concat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University 4 | # Apache 2.0 5 | 6 | # Decode the DNN model. The features are a concatenation of the original input 7 | # features and additional features. These additional features can be on the 8 | # speaker, utterance or frame level. 9 | 10 | ## Begin configuration section 11 | stage=0 12 | nj=16 13 | cmd=run.pl 14 | num_threads=1 15 | 16 | max_active=7000 # max-active 17 | beam=15.0 # beam used 18 | latbeam=7.0 # beam used in getting lattices 19 | acwt=0.1 # acoustic weight used in getting lattices 20 | max_arcs=-1 21 | 22 | skip_scoring=false # whether to skip WER scoring 23 | scoring_opts= 24 | 25 | splice_opts= 26 | norm_vars= 27 | add_deltas= 28 | ivec_type=speaker # the type of the i-vectors: speaker, utterance, frame 29 | 30 | ## End configuration section 31 | 32 | echo "$0 $@" # Print the command line for logging 33 | 34 | [ -f ./path.sh ] && . ./path.sh; # source the path. 35 | . parse_options.sh || exit 1; 36 | 37 | if [ $# != 5 ]; then 38 | echo "Wrong #arguments ($#, expected 5)" 39 | echo "Usage: steps/decode_dnn.sh [options] " 40 | echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp_ivec/ivector_eval2000 exp/tri4_dnn/decode" 41 | echo "main options (for others, see top of script file)" 42 | echo " --stage # starts from which stage" 43 | echo " --nj # number of parallel jobs" 44 | echo " --cmd # command to run in parallel with" 45 | echo " --acwt # default 0.1 ... used to get posteriors" 46 | echo " --num-threads # number of threads to use, default 4." 47 | echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" 48 | echo " --scoring-opts # options to local/score.sh" 49 | exit 1; 50 | fi 51 | 52 | graphdir=$1 53 | data=$2 54 | alidir=$3 55 | ivdir=$4 56 | dir=`echo $5 | sed 's:/$::g'` # remove any trailing slash. 57 | 58 | srcdir=`dirname $dir`; # assume model directory one level up from decoding directory. 59 | name=`basename $data` 60 | sdata=$data/split$nj; 61 | 62 | thread_string= 63 | [ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 64 | 65 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options. 66 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null` 67 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null` 68 | 69 | mkdir -p $dir/log 70 | split_data.sh $data $nj || exit 1; 71 | echo $nj > $dir/num_jobs 72 | 73 | # Some checks. Note: we don't need $srcdir/tree but we expect 74 | # it should exist, given the current structure of the scripts. 75 | for f in $graphdir/HCLG.fst $data/feats.scp $alidir/tree $ivdir/ivector.scp; do 76 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 77 | done 78 | 79 | # Generate state counts; will be used as prior 80 | $cmd $dir/log/class_count.log \ 81 | ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz |" ark:- \| \ 82 | analyze-counts --binary=false ark:- $dir/class.counts || exit 1; 83 | 84 | ## Set up the features 85 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars})" 86 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" 87 | 88 | # Setup the additional ivector features 89 | case $ivec_type in 90 | speaker) ivfeats="ark,s,cs:get-spkvec-feat --utt2spk=ark:$sdata/JOB/utt2spk scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";; 91 | utterance) ivfeats="ark,s,cs:get-spkvec-feat scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";; 92 | frame) ivfeats="scp:$ivdir/ivector.scp";; 93 | *) echo "$0: invalid ivector type $ivec_type" && exit 1; 94 | esac 95 | 96 | # On each frame, append the i-vector to the original feature vector 97 | $cmd JOB=1:$nj $dir/log/append_feat_$name.JOB.log \ 98 | append-feats "$feats" "$ivfeats" ark,scp:$dir/append_feat.$name.JOB.ark,$dir/append_feat.$name.JOB.scp || exit 1; 99 | ## 100 | 101 | finalfeats="ark:nnet-forward --class-frame-counts=$dir/class.counts --apply-log=true --no-softmax=false $srcdir/dnn.nnet scp:$dir/append_feat.$name.JOB.scp ark:- |" 102 | 103 | $cmd JOB=1:$nj $dir/log/decode.JOB.log \ 104 | latgen-faster-mapped --max-active=$max_active --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt $alidir/final.mdl $graphdir/HCLG.fst "$finalfeats" "ark:|gzip -c > $dir/lat.JOB.gz" 105 | 106 | rm -rf $dir/append_feat.$name.* 107 | 108 | # Copy the source model in order for scoring 109 | cp $alidir/final.mdl $srcdir 110 | 111 | if ! $skip_scoring ; then 112 | [ ! -x local/score.sh ] && \ 113 | echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1; 114 | local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir 115 | fi 116 | 117 | exit 0; 118 | -------------------------------------------------------------------------------- /steps_pdnn/sat/decode_dnn_ivec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University 4 | # Apache 2.0 5 | 6 | # Decode the SAT-DNN model. You should already have the canonical DNN model 7 | # and the iVecNN network in srcdir. 8 | 9 | ## Begin configuration section 10 | stage=0 11 | nj=16 12 | cmd=run.pl 13 | num_threads=1 14 | 15 | max_active=7000 # max-active 16 | beam=15.0 # beam used 17 | latbeam=7.0 # beam used in getting lattices 18 | acwt=0.1 # acoustic weight used in getting lattices 19 | max_arcs=-1 20 | 21 | skip_scoring=false # whether to skip WER scoring 22 | scoring_opts= 23 | 24 | splice_opts= 25 | norm_vars= 26 | add_deltas= 27 | ivec_type=speaker # the type of the i-vectors: speaker, utterance, frame 28 | 29 | ## End configuration section 30 | 31 | echo "$0 $@" # Print the command line for logging 32 | 33 | [ -f ./path.sh ] && . ./path.sh; # source the path. 34 | . parse_options.sh || exit 1; 35 | 36 | if [ $# != 5 ]; then 37 | echo "Wrong #arguments ($#, expected 5)" 38 | echo "Usage: steps/decode_dnn.sh [options] " 39 | echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp_ivec/ivector_eval2000 exp/tri4_dnn/decode" 40 | echo "main options (for others, see top of script file)" 41 | echo " --stage # starts from which stage" 42 | echo " --nj # number of parallel jobs" 43 | echo " --cmd # command to run in parallel with" 44 | echo " --acwt # default 0.1 ... used to get posteriors" 45 | echo " --num-threads # number of threads to use, default 4." 46 | echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" 47 | echo " --scoring-opts # options to local/score.sh" 48 | exit 1; 49 | fi 50 | 51 | graphdir=$1 52 | data=$2 53 | alidir=$3 54 | ivdir=$4 55 | dir=`echo $5 | sed 's:/$::g'` # remove any trailing slash. 56 | 57 | srcdir=`dirname $dir`; # assume model directory one level up from decoding directory. 58 | name=`basename $data` 59 | sdata=$data/split$nj; 60 | 61 | thread_string= 62 | [ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 63 | 64 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options. 65 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null` 66 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null` 67 | 68 | mkdir -p $dir/log 69 | split_data.sh $data $nj || exit 1; 70 | echo $nj > $dir/num_jobs 71 | 72 | # Some checks. Note: we don't need $srcdir/tree but we expect 73 | # it should exist, given the current structure of the scripts. 74 | for f in $graphdir/HCLG.fst $data/feats.scp $alidir/tree $ivdir/ivector.scp; do 75 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 76 | done 77 | 78 | # Generate state counts; will be used as prior 79 | $cmd $dir/log/class_count.log \ 80 | ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz |" ark:- \| \ 81 | analyze-counts --binary=false ark:- $dir/class.counts || exit 1; 82 | 83 | ## Set up the features 84 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars})" 85 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" 86 | 87 | # Setup the additional ivector features 88 | case $ivec_type in 89 | speaker) ivfeats="ark,s,cs:get-spkvec-feat --utt2spk=ark:$sdata/JOB/utt2spk scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";; 90 | utterance) ivfeats="ark,s,cs:get-spkvec-feat scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";; 91 | frame) ivfeats="ark:copy-feats scp:$ivdir/ivector.scp ark:- |";; 92 | *) echo "$0: invalid ivector type $ivec_type" && exit 1; 93 | esac 94 | ivfeats="$ivfeats nnet-forward $srcdir/dnn.nnet.adapt ark:- ark:- |" 95 | 96 | # Add the linear feature shifts to the original DNN inputs 97 | $cmd JOB=1:$nj $dir/log/add_feat_$name.JOB.log \ 98 | add-feats "$feats" "$ivfeats" ark,scp:$dir/add_feat.$name.JOB.ark,$dir/add_feat.$name.JOB.scp || exit 1; 99 | 100 | # Use the add-feats-wgt if you want to try the weighted-sum feature fusion 101 | #featbin/add-feats-wgt --feat1wgt=ark:$srcdir/feat.wgt.ark --feat2wgt=ark:$srcdir/ivec.wgt.ark --biaswgt=ark:$srcdir/bias.wgt.ark "$feats" "$ivfeats" ark,scp:$dir/add_feat.$name.JOB.ark,$dir/add_feat.$name.JOB.scp || exit 1; 102 | 103 | ## 104 | 105 | finalfeats="ark:nnet-forward --class-frame-counts=$dir/class.counts --apply-log=true --no-softmax=false $srcdir/dnn.nnet.si scp:$dir/add_feat.$name.JOB.scp ark:- |" 106 | 107 | $cmd JOB=1:$nj $dir/log/decode.JOB.log \ 108 | latgen-faster-mapped --max-active=$max_active --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt $alidir/final.mdl $graphdir/HCLG.fst "$finalfeats" "ark:|gzip -c > $dir/lat.JOB.gz" 109 | 110 | # Copy the source model in order for scoring 111 | cp $alidir/final.mdl $srcdir 112 | 113 | if ! $skip_scoring ; then 114 | [ ! -x local/score.sh ] && \ 115 | echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1; 116 | local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir 117 | fi 118 | 119 | exit 0; 120 | -------------------------------------------------------------------------------- /steps_pdnn/sat/make_bnf_feat_ivec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2013 Yajie Miao Carnegie Mellon University 3 | # Copyright 2014 Xiaohui Zhang Johns Hopkins University 4 | # Apache 2.0 5 | 6 | # Make BNF front-end with the SAT-trained neural network 7 | 8 | # Begin configuration section. 9 | stage=1 10 | nj=8 11 | cmd=run.pl 12 | 13 | norm_vars=false # when doing cmvn, whether to normalize variance; has to be consistent with build_nnet_pfile.sh 14 | 15 | # Config for ivector 16 | is_spk_mode=false # whether the i-vectors are per-speaker 17 | 18 | # End configuration options. 19 | 20 | echo "$0 $@" # Print the command line for logging 21 | 22 | [ -f path.sh ] && . ./path.sh # source the path. 23 | . parse_options.sh || exit 1; 24 | 25 | if [ $# != 6 ]; then 26 | echo "usage: steps_pdnn/make_bnf_feat.sh " 27 | echo "e.g.: steps_pdnn/make_bnf_feat.sh data_bnf/train data/train exp/bnf_net exp/make_bnf/log exp/bnf" 28 | echo "main options (for others, see top of script file)" 29 | echo " --config # config containing options" 30 | echo " --nj # number of parallel jobs" 31 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 32 | exit 1; 33 | fi 34 | 35 | data=$1 36 | srcdata=$2 37 | netdir=$3 38 | ivdir=$4 39 | logdir=$5 40 | feadir=$6 41 | 42 | sdata=$srcdata/split$nj; 43 | splice_opts=`cat $netdir/splice_opts 2>/dev/null` # frame-splicing options. 44 | name=`basename $data` 45 | 46 | mkdir -p $data $logdir $feadir 47 | [[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1; 48 | 49 | for f in $netdir/bnf.nnet; do 50 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 51 | done 52 | 53 | ## Set up the features 54 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars})" 55 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" 56 | 57 | if $is_spk_mode; then 58 | ivfeats="ark,s,cs:get-spkvec-feat --utt2spk=ark:$sdata/JOB/utt2spk scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- | nnet-forward $netdir/ivec.nnet ark:- ark:- |" 59 | else 60 | ivfeats="ark,s,cs:get-spkvec-feat scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- | nnet-forward $netdir/ivec.nnet ark:- ark:- |" 61 | fi 62 | 63 | $cmd JOB=1:$nj $logdir/add_feat_$name.JOB.log \ 64 | add-feats "$feats" "$ivfeats" ark,scp:$feadir/add_feat.$name.JOB.ark,$feadir/add_feat.$name.JOB.scp || exit 1; 65 | ## 66 | 67 | # prepare the dir 68 | cp $srcdata/* $data 2>/dev/null; rm $data/{feats,cmvn}.scp; 69 | 70 | # get the absolute pathname 71 | feadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $feadir ${PWD}` 72 | 73 | echo "$0: making BNF scp and ark." 74 | $cmd JOB=1:$nj $logdir/make_bnf_$name.JOB.log \ 75 | nnet-forward --apply-log=false $netdir/bnf.nnet scp:$feadir/append_feat.$name.JOB.scp \ 76 | ark,scp:$feadir/feats_bnf_$name.JOB.ark,$feadir/feats_bnf_$name.JOB.scp || exit 1; 77 | 78 | 79 | N0=$(cat $srcdata/feats.scp | wc -l) 80 | N1=$(cat $feadir/feats_bnf_$name.*.scp | wc -l) 81 | if [[ "$N0" != "$N1" ]]; then 82 | echo "$0: error happens when generating BNF for $name (Original:$N0 BNF:$N1)" 83 | exit 1; 84 | fi 85 | 86 | # Concatenate feats.scp into bnf_data 87 | for n in `seq 1 $nj`; do 88 | cat $feadir/feats_bnf_$name.$n.scp >> $data/feats.scp 89 | done 90 | 91 | echo "$0: done making BNF" 92 | 93 | exit 0; 94 | -------------------------------------------------------------------------------- /steps_pdnn/sat/make_feat_with_ivec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Yajie Miao Carnegie Mellon University 4 | # Apache 2.0 5 | 6 | # Decode the SAT-DNN model. You should already have the canonical DNN model 7 | # and the iVecNN network in srcdir. 8 | 9 | ## Begin configuration section 10 | stage=0 11 | nj=16 12 | cmd=run.pl 13 | num_threads=1 14 | 15 | splice_opts= 16 | norm_vars= 17 | add_deltas= 18 | ivec_type=speaker # the type of the i-vectors: speaker, utterance, frame 19 | 20 | ## End configuration section 21 | 22 | echo "$0 $@" # Print the command line for logging 23 | 24 | [ -f ./path.sh ] && . ./path.sh; # source the path. 25 | . parse_options.sh || exit 1; 26 | 27 | if [ $# != 6 ]; then 28 | echo "Wrong #arguments ($#, expected 5)" 29 | echo "Usage: steps/decode_dnn.sh [options] " 30 | echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp_ivec/ivector_eval2000 exp/tri4_dnn/decode" 31 | echo "main options (for others, see top of script file)" 32 | echo " --stage # starts from which stage" 33 | echo " --nj # number of parallel jobs" 34 | echo " --cmd # command to run in parallel with" 35 | echo " --acwt # default 0.1 ... used to get posteriors" 36 | echo " --num-threads # number of threads to use, default 4." 37 | echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" 38 | echo " --scoring-opts # options to local/score.sh" 39 | exit 1; 40 | fi 41 | 42 | data=$1 43 | srcdata=$2 44 | netdir=$3 45 | ivdir=$4 46 | logdir=$5 47 | feadir=$6 48 | 49 | name=`basename $data` 50 | sdata=$srcdata/split$nj; 51 | [ -z "$splice_opts" ] && splice_opts=`cat $netdir/splice_opts 2>/dev/null` # frame-splicing options. 52 | [ -z "$add_deltas" ] && add_deltas=`cat $netdir/add_deltas 2>/dev/null` 53 | [ -z "$norm_vars" ] && norm_vars=`cat $netdir/norm_vars 2>/dev/null` 54 | 55 | mkdir -p $data $logdir $feadir 56 | [[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1; 57 | 58 | for f in $netdir/dnn.nnet.adapt; do 59 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 60 | done 61 | 62 | ## Set up the features 63 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})" 64 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" 65 | $add_deltas && feats="$feats add-deltas ark:- ark:- |" 66 | 67 | # Setup the additional ivector features 68 | case $ivec_type in 69 | speaker) ivfeats="ark,s,cs:get-spkvec-feat --utt2spk=ark:$sdata/JOB/utt2spk scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";; 70 | utterance) ivfeats="ark,s,cs:get-spkvec-feat scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";; 71 | frame) ivfeats="ark:copy-feats $ivdir/ivector.scp ark:- |";; 72 | *) echo "$0: invalid ivector type $ivec_type" && exit 1; 73 | esac 74 | ivfeats="$ivfeats nnet-forward $netdir/dnn.nnet.adapt ark:- ark:- |" 75 | 76 | # prepare the dir 77 | cp $srcdata/* $data 2>/dev/null; rm $data/{feats,cmvn}.scp; 78 | 79 | # get the absolute pathname 80 | feadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $feadir ${PWD}` 81 | 82 | # Add the linear feature shifts to the original DNN inputs 83 | echo "$0: making feature + [ivector shift] scp and ark." 84 | $cmd JOB=1:$nj $logdir/add_feat_$name.JOB.log \ 85 | add-feats "$feats" "$ivfeats" ark,scp:$feadir/add_feat.$name.JOB.ark,$feadir/add_feat.$name.JOB.scp || exit 1; 86 | 87 | N0=$(cat $srcdata/feats.scp | wc -l) 88 | N1=$(cat $feadir/add_feat.$name.*.scp | wc -l) 89 | if [[ "$N0" != "$N1" ]]; then 90 | echo "$0: error happens when generating feature + [ivector shift] for $name (Original:$N0 Now:$N1)" 91 | exit 1; 92 | fi 93 | 94 | # Concatenate feats.scp into bnf_data 95 | for n in `seq 1 $nj`; do 96 | cat $feadir/add_feat.$name.$n.scp >> $data/feats.scp 97 | done 98 | 99 | echo "$0: done making feature + [ivector shift]" 100 | 101 | exit 0; 102 | -------------------------------------------------------------------------------- /steps_pdnn/tmp/decode_dnn_lhuc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2013 Yajie Miao Carnegie Mellon University 4 | # Apache 2.0 5 | 6 | # Decode the DNN model. The [srcdir] in this script should be the same as dir in 7 | # build_nnet_pfile.sh. Also, the DNN model has been trained and put in srcdir. 8 | # All these steps will be done automatically if you run the recipe file run-dnn.sh 9 | 10 | ## Begin configuration section 11 | stage=0 12 | nj=16 13 | cmd=run.pl 14 | num_threads=1 15 | 16 | max_active=7000 # max-active 17 | beam=15.0 # beam used 18 | latbeam=7.0 # beam used in getting lattices 19 | acwt=0.1 # acoustic weight used in getting lattices 20 | max_arcs=-1 21 | 22 | skip_scoring=false # whether to skip WER scoring 23 | scoring_opts= 24 | 25 | splice_opts= 26 | norm_vars= 27 | add_deltas= 28 | 29 | ## End configuration section 30 | 31 | echo "$0 $@" # Print the command line for logging 32 | 33 | [ -f ./path.sh ] && . ./path.sh; # source the path. 34 | . parse_options.sh || exit 1; 35 | 36 | if [ $# != 4 ]; then 37 | echo "Wrong #arguments ($#, expected 4)" 38 | echo "Usage: steps/decode_dnn.sh [options] " 39 | echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp/tri4_dnn/decode" 40 | echo "main options (for others, see top of script file)" 41 | echo " --stage # starts from which stage" 42 | echo " --nj # number of parallel jobs" 43 | echo " --cmd # command to run in parallel with" 44 | echo " --acwt # default 0.1 ... used to get posteriors" 45 | echo " --num-threads # number of threads to use, default 4." 46 | echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" 47 | echo " --scoring-opts # options to local/score.sh" 48 | exit 1; 49 | fi 50 | 51 | graphdir=$1 52 | data=$2 53 | alidir=$3 54 | dir=`echo $4 | sed 's:/$::g'` # remove any trailing slash. 55 | 56 | srcdir=`dirname $dir`; # assume model directory one level up from decoding directory. 57 | sdata=$data/split$nj; 58 | 59 | thread_string= 60 | [ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 61 | 62 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options. 63 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null` 64 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null` 65 | 66 | mkdir -p $dir/log 67 | split_data.sh $data $nj || exit 1; 68 | echo $nj > $dir/num_jobs 69 | 70 | # Some checks. Note: we don't need $srcdir/tree but we expect 71 | # it should exist, given the current structure of the scripts. 72 | for f in $graphdir/HCLG.fst $data/feats.scp $alidir/tree; do 73 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 74 | done 75 | 76 | # Generate state counts; will be used as prior 77 | $cmd $dir/log/class_count.log \ 78 | ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz |" ark:- \| \ 79 | analyze-counts --binary=false ark:- $dir/class.counts || exit 1; 80 | 81 | ## Set up the features 82 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})" 83 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |" 84 | $add_deltas && feats="$feats add-deltas ark:- ark:- |" 85 | ## 86 | finalfeats="$feats nnet-forward --class-frame-counts=$dir/class.counts --apply-log=true --no-softmax=false $dir/dnn.nnet.JOB ark:- ark:- |" 87 | 88 | $cmd JOB=1:$nj $dir/log/decode.JOB.log \ 89 | latgen-faster-mapped --max-active=$max_active --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt $alidir/final.mdl $graphdir/HCLG.fst "$finalfeats" "ark:|gzip -c > $dir/lat.JOB.gz" 90 | 91 | # Copy the source model in order for scoring 92 | cp $alidir/final.mdl $srcdir 93 | 94 | if ! $skip_scoring ; then 95 | [ ! -x local/score.sh ] && \ 96 | echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1; 97 | local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir 98 | fi 99 | 100 | exit 0; 101 | -------------------------------------------------------------------------------- /steps_pdnn/tmp/make_nnet_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2014 Yajie Miao Carnegie Mellon University 3 | # Apache 2.0 4 | 5 | # Generate DNN input features and also create txt-formatted alignment files 6 | # It's used when PDNN directly reads Kaldi .ark and .ali files for training. 7 | 8 | ## Begin configuration section. 9 | stage=1 10 | nj=4 11 | cmd=run.pl 12 | 13 | splice_opts="--left-context=4 --right-context=4" # frame-splicing options for neural net input 14 | add_deltas=false 15 | norm_vars=false # when doing cmvn, whether to normalize variance 16 | 17 | ## End configuration options. 18 | 19 | echo "$0 $@" # Print the command line for logging 20 | 21 | [ -f path.sh ] && . ./path.sh # source the path. 22 | . parse_options.sh || exit 1; 23 | 24 | if [ $# != 5 ]; then 25 | echo "Wrong #arguments ($#, expected 5)" 26 | echo "usage: make_nnet_data.sh " 27 | echo "e.g.: make_nnet_data.sh data/nnet_input data/train exp/feat exp/_log exp/" 28 | echo "main options (for others, see top of script file)" 29 | echo " --stage # starts from which stage" 30 | echo " --nj # number of parallel jobs" 31 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 32 | exit 1; 33 | fi 34 | 35 | data=$1 36 | srcdata=$2 37 | feadir=$3 38 | alidir=$4 39 | dir=$5 40 | 41 | # get the absolute pathname 42 | feadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $feadir ${PWD}` 43 | 44 | name=`basename $data` 45 | sdata=$srcdata/split$nj 46 | echo $splice_opts > $dir/splice_opts; echo $add_deltas > $dir/add_deltas; echo $norm_vars > $dir/norm_vars 47 | 48 | mkdir -p $dir/log $data $feadir 49 | [[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1; 50 | 51 | # prepare the dir 52 | cp $srcdata/* $data 2>/dev/null; rm $data/{feats,cmvn}.scp; 53 | 54 | ## First dump the network input into local files 55 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})" 56 | if $add_deltas; then 57 | $cmd JOB=1:$nj $dir/log/nnet_input.$name.JOB.log \ 58 | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk \ 59 | scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- \| \ 60 | splice-feats $splice_opts ark:- ark:- \| \ 61 | add-deltas ark:- ark,scp:$feadir/nnet_input.$name.JOB.ark,$feadir/nnet_input.$name.JOB.scp || exit 1; 62 | 63 | else 64 | $cmd JOB=1:$nj $dir/log/nnet_input.$name.JOB.log \ 65 | apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk \ 66 | scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- \| \ 67 | splice-feats $splice_opts ark:- ark,scp:$feadir/nnet_input.$name.JOB.ark,$feadir/nnet_input.$name.JOB.scp || exit 1; 68 | fi 69 | 70 | #$cmd JOB=1:$nj $dir/log/gzip.$name.JOB.log \ 71 | # gzip $feadir/nnet_input.$name.JOB.ark || exit 1; 72 | 73 | N0=$(cat $srcdata/feats.scp | wc -l) 74 | N1=$(cat $feadir/nnet_input.$name.*.scp | wc -l) 75 | if [[ "$N0" != "$N1" ]]; then 76 | echo "$0: error happens when generating features for $name (Original:$N0 New:$N1)" 77 | exit 1; 78 | fi 79 | 80 | # Concatenate feats.scp into bnf_data 81 | for n in `seq 1 $nj`; do 82 | cat $feadir/nnet_input.$name.$n.scp >> $data/feats.scp 83 | done 84 | 85 | $cmd $dir/log/ali2post.$name.log \ 86 | ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz|" "ark,t:|gzip -c >$dir/$name.ali.gz" || exit 1; 87 | 88 | echo "$0: done making features" 89 | 90 | exit 0; 91 | --------------------------------------------------------------------------------