├── .gitignore
├── LICENSE
├── README.md
├── run_hkust
    ├── RESULTS
    ├── run-bnf-tandem.sh
    ├── run-dnn-fbank+pitch.sh
    └── run-dnn.sh
├── run_swbd
    ├── RESULTS
    ├── run-bnf-tandem.sh
    ├── run-dnn-fbank.sh
    └── run-dnn.sh
├── run_swbd_110h
    ├── RESULTS
    ├── run-bnf-fbank-tandem.sh
    ├── run-bnf-tandem.sh
    ├── run-cnn.sh
    ├── run-dnn-fbank.sh
    ├── run-dnn.sh
    ├── sat
    │   ├── run-dnn-fbank-sat.sh
    │   ├── run-dnn-sat.sh
    │   └── run-ivec-extract.sh
    └── tmp
    │   ├── run-bnf-fbank-tandem-sat.sh
    │   └── run-bnf-tandem-sat.sh
├── run_tedlium
    ├── .nfs0000001c872b1d8100008f9b
    ├── RESULTS
    ├── run-bnf-tandem-fbank.sh
    ├── run-bnf-tandem.sh
    ├── run-cnn.sh
    ├── run-dnn-fbank.sh
    ├── run-dnn-maxout.sh
    ├── run-dnn.sh
    └── tmp
    │   ├── run-bnf-tandem-fbank-sat.sh
    │   ├── run-cnn-sat.sh
    │   ├── run-dnn-fbank-sat-dnnalign.sh
    │   ├── run-dnn-fbank-sat.sh
    │   ├── run-dnn-lhuc-bkup.sh
    │   ├── run-dnn-lhuc.sh
    │   └── run-dnn-sat-lhuc.sh
├── run_timit
    ├── RESULTS
    ├── kaldi_io
    │   └── run-dnn.sh
    ├── run-bnf-tandem.sh
    ├── run-cnn.sh
    ├── run-dnn-fbank.sh
    ├── run-dnn-maxout.sh
    └── run-dnn.sh
├── run_wsj
    ├── RESULTS
    ├── run-bnf-tandem.sh
    ├── run-cnn.sh
    ├── run-dnn-fbank.sh
    └── run-dnn.sh
└── steps_pdnn
    ├── align_nnet.sh
    ├── build_avnnet_pfile.sh
    ├── build_nnet_pfile.sh
    ├── decode_avdnn.sh
    ├── decode_dnn.sh
    ├── make_bnf_feat.sh
    ├── make_conv_feat.sh
    ├── make_denlats_nnet.sh
    ├── sat
        ├── build_nnet_pfile_ivec.sh
        ├── decode_dnn_concat.sh
        ├── decode_dnn_ivec.sh
        ├── make_bnf_feat_ivec.sh
        └── make_feat_with_ivec.sh
    └── tmp
        ├── decode_dnn_lhuc.sh
        └── make_nnet_data.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | make_post_nnet_bkup.sh
2 | make_post_nnet.sh
3 | make_seq_data.sh
4 | run-dnn-mpe.sh
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Kaldi+PDNN
2 | =========
3 | 
4 | Kaldi+PDNN builds state-of-the-art DNN-based ASR systems using the [Kaldi](http://kaldi.sourceforge.net/) and [PDNN](http://www.cs.cmu.edu/~ymiao/pdnntk.html) tooklits. Check the [project webpage](http://www.cs.cmu.edu/~ymiao/kaldipdnn.html) for more information.
5 | 


--------------------------------------------------------------------------------
/run_hkust/RESULTS:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # the SAT model
 4 | %WER 44.50 [ 74529 / 167480, 8902 ins, 13372 del, 52255 sub ] exp/tri5a/decode/cer_13
 5 | 
 6 | # run-dnn.sh
 7 | %WER 36.77 [ 61582 / 167480, 7783 ins, 11586 del, 42213 sub ] exp_pdnn/dnn/decode/cer_10
 8 | 
 9 | # run-bnf-tandem.sh
10 | %WER 37.24 [ 62374 / 167480, 6921 ins, 12198 del, 43255 sub ] exp_pdnn/bnf_tandem/tri6a/decode/cer_26
11 | %WER 35.84 [ 60021 / 167480, 5949 ins, 11217 del, 42855 sub ] exp_pdnn/bnf_tandem/tri6a_mmi_b0.1/decode_it1/cer_22
12 | %WER 35.45 [ 59373 / 167480, 5428 ins, 11971 del, 41974 sub ] exp_pdnn/bnf_tandem/tri6a_mmi_b0.1/decode_it2/cer_22
13 | %WER 35.42 [ 59326 / 167480, 5199 ins, 12598 del, 41529 sub ] exp_pdnn/bnf_tandem/tri6a_mmi_b0.1/decode_it3/cer_22
14 | %WER 35.49 [ 59435 / 167480, 5017 ins, 13097 del, 41321 sub ] exp_pdnn/bnf_tandem/tri6a_mmi_b0.1/decode_it4/cer_22
15 | 
16 | # run-dnn-fbank+pitch.sh
17 | %WER 36.67 [ 61420 / 167480, 8207 ins, 10761 del, 42452 sub ] exp_pdnn/dnn_fbank_pitch/decode/cer_9
18 | 
19 | 
20 | 
21 | # The #states in the SAT model exp/tri5a looks too small. So we build a larger SAT model with
22 | # steps/train_sat.sh \
23 | #   6000 120000 data/train data/lang exp/tri5a_ali exp/tri6a || exit 1;
24 | # the SAT model exp/tri6a
25 | %WER 44.16 [ 73964 / 167480, 9945 ins, 12499 del, 51520 sub ] exp/tri6a/decode/cer_13
26 | 
27 | # run-dnn.sh : so we get some gains on DNN. Other PDNN systems are likely to be boosted as well.
28 | %WER 35.60 [ 59621 / 167480, 7667 ins, 11147 del, 40807 sub ] exp_pdnn/dnn_6a/decode/cer_11
29 | 


--------------------------------------------------------------------------------
/run_hkust/run-dnn-fbank+pitch.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Apache 2.0
  4 | # This is the script that trains DNN system over the fbank+pitch features.It
  5 | # is to  be  run after run.sh. Before running this, you should already build
  6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn"
  7 | # toolkit to train the DNN. The input filterbank features are with mean  and
  8 | # variance normalization. 
  9 | 
 10 | # For more informaiton regarding the recipes and results, visit our webiste
 11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 12 | 
 13 | working_dir=exp_pdnn/dnn_fbank_pitch
 14 | do_ptr=true    # whether to do pre-training
 15 | delete_pfile=true # whether to delete pfiles after DNN training
 16 | 
 17 | gmmdir=exp/tri5a
 18 | 
 19 | # Specify the gpu device to be used
 20 | gpu=gpu
 21 | 
 22 | cmd=run.pl
 23 | . cmd.sh
 24 | [ -f path.sh ] && . ./path.sh
 25 | . parse_options.sh || exit 1;
 26 | 
 27 | # At this point you may want to make sure the directory $working_dir is
 28 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 29 | if [ ! -d pdnn ]; then
 30 |   echo "Checking out PDNN code."
 31 |   svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/pdnn pdnn
 32 | fi
 33 | 
 34 | if [ ! -d steps_pdnn ]; then
 35 |   echo "Checking out steps_pdnn scripts."
 36 |   svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/steps_pdnn steps_pdnn
 37 | fi
 38 | 
 39 | if ! nvidia-smi; then
 40 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 41 |   echo "(Note: this script might still work, it would just be slower.)"
 42 | fi
 43 | 
 44 | # The hope here is that Theano has been installed either to python or to python2.6
 45 | pythonCMD=python
 46 | if ! python -c 'import theano;'; then
 47 |   if ! python2.6 -c 'import theano;'; then
 48 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 49 |     echo "(Note: this script might still work, it would just be slower.)"
 50 |     exit 1;
 51 |   else
 52 |     pythonCMD=python2.6
 53 |   fi
 54 | fi
 55 | 
 56 | mkdir -p $working_dir/log
 57 | 
 58 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 59 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 60 | 
 61 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 62 | 
 63 | echo ---------------------------------------------------------------------
 64 | echo "Creating DNN training and validation data (pfiles)"
 65 | echo ---------------------------------------------------------------------
 66 | # Alignment on the training data
 67 | if [ ! -d ${gmmdir}_ali ]; then
 68 |   echo "Generate alignment on train"
 69 |   steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \
 70 |     data/train data/lang $gmmdir ${gmmdir}_ali || exit 1
 71 | fi
 72 | 
 73 | # Generate the fbank features. The fbanks are 40-dimensional on each frame
 74 | echo "--num-mel-bins=40" > conf/fbank.conf
 75 | echo "--sample-frequency=8000" >> conf/fbank.conf
 76 | mkdir -p $working_dir/data_fbank
 77 | for set in train dev; do
 78 |   if [ ! -d $working_dir/data_fbank/$set ]; then
 79 |     cp -r data/$set $working_dir/data_fbank/$set
 80 |     ( cd $working_dir/data_fbank/$set; rm -rf {cmvn,feats}.scp split*; )
 81 |     steps/make_fbank.sh --cmd "$train_cmd" --nj 16 \
 82 |       $working_dir/data_fbank/$set $working_dir/_log $working_dir/_fbank || exit 1;
 83 |   fi
 84 | done
 85 | 
 86 | # Generate the pitch features. The pitches are 3-dimensional on each frame
 87 | echo "--sample-frequency=8000" >> conf/pitch.conf
 88 | mkdir -p $working_dir/data_pitch
 89 | for set in train dev; do
 90 |   if [ ! -d $working_dir/data_pitch/$set ]; then
 91 |     cp -r data/$set $working_dir/data_pitch/$set
 92 |     ( cd $working_dir/data_pitch/$set; rm -rf {cmvn,feats}.scp split*; )
 93 |     steps/make_pitch_kaldi.sh --cmd "$train_cmd" --nj 16 \
 94 |       $working_dir/data_pitch/$set $working_dir/_log $working_dir/_pitch || exit 1;
 95 |   fi
 96 | done
 97 | 
 98 | # Combine fbank and pitch together
 99 | mkdir -p $working_dir/data
100 | for set in train dev; do
101 |   if [ ! -d $working_dir/data/$set ]; then
102 |     steps/append_feats.sh --cmd "$train_cmd" --nj 16 \
103 |       $working_dir/data_fbank/$set $working_dir/data_pitch/$set \
104 |       $working_dir/data/$set $working_dir/_log $working_dir/_append || exit 1;
105 |     # We need to compute CMVN stats on the appended features
106 |     steps/compute_cmvn_stats.sh $working_dir/data/$set $working_dir/_log $working_dir/_append || exit 1;
107 |   fi
108 | done
109 | 
110 | # By default, inputs include 11 frames (+/-5) of 43-dimensional appended features, with 473 dimensions.
111 | if [ ! -f $working_dir/train.pfile.done ]; then
112 |   steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --every-nth-frame 1 --norm-vars true \
113 |     --do-split true --pfile-unit-size 30 --cv-ratio 0.05 \
114 |     --splice-opts "--left-context=5 --right-context=5" --input-dim 473 \
115 |     $working_dir/data/train ${gmmdir}_ali $working_dir || exit 1
116 |   ( cd $working_dir; rm concat.pfile; )
117 |   touch $working_dir/train.pfile.done
118 | fi
119 | 
120 | echo ---------------------------------------------------------------------
121 | echo "Starting DNN training"
122 | echo ---------------------------------------------------------------------
123 | feat_dim=$(cat $working_dir/train.pfile |head |grep num_features| awk '{print $2}') || exit 1;
124 | 
125 | if $do_ptr && [ ! -f $working_dir/dnn.ptr.done ]; then
126 |   echo "SDA Pre-training"
127 |   $cmd $working_dir/log/dnn.ptr.log \
128 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
129 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
130 |     $pythonCMD pdnn/run_SdA.py --train-data "$working_dir/train.pfile,partition=2000m,random=true,stream=true" \
131 |                           --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
132 |                           --first-reconstruct-activation "tanh" \
133 |                           --wdir $working_dir --output-file $working_dir/dnn.ptr \
134 |                           --ptr-layer-number 6 --epoch-number 5 || exit 1;
135 |   touch $working_dir/dnn.ptr.done
136 | fi
137 | 
138 | if [ ! -f $working_dir/dnn.fine.done ]; then
139 |   echo "Fine-tuning DNN"
140 |   $cmd $working_dir/log/dnn.fine.log \
141 |     export PYTHONPATH=$PYTHONPATH:`pwd`/ptdnn/ \; \
142 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
143 |     $pythonCMD pdnn/run_DNN.py --train-data "$working_dir/train.pfile,partition=2000m,random=true,stream=true" \
144 |                           --valid-data "$working_dir/valid.pfile,partition=600m,random=true,stream=true" \
145 |                           --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
146 |                           --ptr-file $working_dir/dnn.ptr --ptr-layer-number 6 \
147 |                           --output-format kaldi --lrate "D:0.08:0.5:0.2,0.2:8" \
148 |                           --wdir $working_dir --output-file $working_dir/dnn.nnet || exit 1;
149 |   touch $working_dir/dnn.fine.done
150 |   $delete_pfile && rm -rf $working_dir/*.pfile
151 | fi
152 | 
153 | echo ---------------------------------------------------------------------
154 | echo "Decode the final system"
155 | echo ---------------------------------------------------------------------
156 | if [ ! -f  $working_dir/decode.done ]; then
157 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
158 |   graph_dir=$gmmdir/graph
159 |   # Here norm-vars has to be the same as steps_pdnn/build_nnet_pfile.sh 
160 |   steps_pdnn/decode_dnn.sh --nj 24 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" --norm-vars true \
161 |     $graph_dir $working_dir/data/dev ${gmmdir}_ali $working_dir/decode || exit 1;
162 |   touch $working_dir/decode.done
163 | fi
164 | 
165 | echo "Finish !!"
166 | 


--------------------------------------------------------------------------------
/run_hkust/run-dnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Apache 2.0
  4 | # This  is the script that trains DNN system. It is to be run after run.sh. 
  5 | # Before running this, you should already build the initial GMM model. This
  6 | # script requires a GPU, and also the "pdnn" toolkit to train the DNN.
  7 | 
  8 | # For more informaiton regarding the recipes and results, visit our webiste
  9 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 10 | 
 11 | working_dir=exp_pdnn/dnn
 12 | do_ptr=true      # whether to do pre-training
 13 | delete_pfile=false # whether to delete pfiles after DNN training
 14 | 
 15 | gmmdir=exp/tri5a
 16 | 
 17 | # Specify the gpu device to be used
 18 | gpu=gpu
 19 | 
 20 | cmd=run.pl
 21 | . cmd.sh
 22 | [ -f path.sh ] && . ./path.sh
 23 | . parse_options.sh || exit 1;
 24 | 
 25 | # At this point you may want to make sure the directory $working_dir is
 26 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 27 | if [ ! -d pdnn ]; then
 28 |   echo "Checking out PDNN code."
 29 |   svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/pdnn pdnn
 30 | fi
 31 | 
 32 | if [ ! -d steps_pdnn ]; then
 33 |   echo "Checking out steps_pdnn scripts."
 34 |   svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/steps_pdnn steps_pdnn
 35 | fi
 36 | 
 37 | if ! nvidia-smi; then
 38 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 39 |   echo "(Note: this script might still work, it would just be slower.)"
 40 | fi
 41 | 
 42 | # The hope here is that Theano has been installed either to python or to python2.6
 43 | pythonCMD=python
 44 | if ! python -c 'import theano;'; then
 45 |   if ! python2.6 -c 'import theano;'; then
 46 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 47 |     echo "(Note: this script might still work, it would just be slower.)"
 48 |     exit 1;
 49 |   else
 50 |     pythonCMD=python2.6
 51 |   fi
 52 | fi
 53 | 
 54 | mkdir -p $working_dir/log
 55 | 
 56 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 57 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 58 | 
 59 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 60 | 
 61 | echo ---------------------------------------------------------------------
 62 | echo "Generate alignment and prepare fMLLR features"
 63 | echo ---------------------------------------------------------------------
 64 | # Alignment on the training data
 65 | if [ ! -d ${gmmdir}_ali ]; then
 66 |   echo "Generate alignment on train"
 67 |   steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \
 68 |     data/train data/lang $gmmdir ${gmmdir}_ali || exit 1
 69 | fi
 70 | 
 71 | # Dump fMLLR features. We generate "fake" cmvn states (0 means and 1 variance) which apply no normalization 
 72 | if [ ! -d $working_dir/data/train ]; then
 73 |   echo "Save fmllr features of train"
 74 |   steps/nnet/make_fmllr_feats.sh --nj 24 --cmd "$train_cmd" \
 75 |     --transform-dir ${gmmdir}_ali \
 76 |     $working_dir/data/train data/train $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 77 |   steps/compute_cmvn_stats.sh --fake \
 78 |     $working_dir/data/train $working_dir/_log $working_dir/_fmllr || exit 1;
 79 | fi
 80 | if [ ! -d $working_dir/data/dev ]; then
 81 |   echo "Save fmllr features of dev"
 82 |   steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
 83 |     --transform-dir $gmmdir/decode \
 84 |     $working_dir/data/dev data/dev $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 85 |   steps/compute_cmvn_stats.sh --fake \
 86 |     $working_dir/data/dev $working_dir/_log $working_dir/_fmllr || exit 1;
 87 | fi
 88 | 
 89 | echo ---------------------------------------------------------------------
 90 | echo "Create DNN training and validation pfiles"
 91 | echo ---------------------------------------------------------------------
 92 | 
 93 | # By default, DNN inputs include: spliced 11 frames (+/-5) of fMLLR with 440 dimensions
 94 | if [ ! -f $working_dir/train.pfile.done ]; then
 95 |   steps_pdnn/build_nnet_pfile.sh --every-nth-frame 1 --norm-vars false \
 96 |     --do-split true --pfile-unit-size 50 --cv-ratio 0.05 \
 97 |     --splice-opts "--left-context=5 --right-context=5" --input-dim 440 \
 98 |     $working_dir/data/train ${gmmdir}_ali $working_dir || exit 1
 99 |   ( cd $working_dir; rm concat.pfile; )
100 |   touch $working_dir/train.pfile.done
101 | fi
102 | 
103 | echo ---------------------------------------------------------------------
104 | echo "Start DNN training"
105 | echo ---------------------------------------------------------------------
106 | feat_dim=$(cat $working_dir/train.pfile |head |grep num_features| awk '{print $2}') || exit 1;
107 | 
108 | if $do_ptr && [ ! -f $working_dir/dnn.ptr.done ]; then
109 |   echo "SDA Pre-training"
110 |   $cmd $working_dir/log/dnn.ptr.log \
111 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
112 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
113 |     $pythonCMD pdnn/run_SdA.py --train-data "$working_dir/train.pfile,partition=2000m,random=true,stream=true" \
114 |                           --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
115 |                           --first-reconstruct-activation "tanh" \
116 |                           --wdir $working_dir --output-file $working_dir/dnn.ptr \
117 |                           --ptr-layer-number 6 --epoch-number 5 || exit 1;
118 |   touch $working_dir/dnn.ptr.done
119 | fi
120 | 
121 | if [ ! -f $working_dir/dnn.fine.done ]; then
122 |   echo "Fine-tuning DNN"
123 |   $cmd $working_dir/log/dnn.fine.log \
124 |     export PYTHONPATH=$PYTHONPATH:`pwd`/ptdnn/ \; \
125 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
126 |     $pythonCMD pdnn/run_DNN.py --train-data "$working_dir/train.pfile,partition=2000m,random=true,stream=true" \
127 |                           --valid-data "$working_dir/valid.pfile,partition=600m,random=true,stream=true" \
128 |                           --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
129 |                           --ptr-file $working_dir/dnn.ptr --ptr-layer-number 6 \
130 |                           --output-format kaldi --lrate "D:0.08:0.5:0.2,0.2:8" \
131 |                           --wdir $working_dir --output-file $working_dir/dnn.nnet || exit 1;
132 |   touch $working_dir/dnn.fine.done
133 |   $delete_pfile && rm -rf $working_dir/*.pfile
134 | fi
135 | 
136 | echo ---------------------------------------------------------------------
137 | echo "Decode the final system"
138 | echo ---------------------------------------------------------------------
139 | if [ ! -f  $working_dir/decode.done ]; then
140 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
141 |   graph_dir=$gmmdir/graph
142 |   steps_pdnn/decode_dnn.sh --nj 24 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" --norm-vars false \
143 |     $graph_dir $working_dir/data/dev ${gmmdir}_ali $working_dir/decode || exit 1;
144 |   touch $working_dir/decode.done
145 | fi
146 | 
147 | echo "Finish !!"
148 | 


--------------------------------------------------------------------------------
/run_swbd/RESULTS:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # exp/tri4b : the SAT model
 4 | %WER 22.6 | 1831 21395 | 79.9 13.9 6.3 2.5 22.6 60.5 | exp/tri4b/decode_eval2000_sw1_tg/score_16/eval2000.ctm.swbd.filt.sys
 5 | %WER 30.2 | 4459 42989 | 73.4 18.9 7.6 3.6 30.2 66.8 | exp/tri4b/decode_eval2000_sw1_tg/score_14/eval2000.ctm.filt.sys
 6 | 
 7 | # results of pdnn systems
 8 | 
 9 | # run-dnn.sh
10 | %WER 15.4 | 1831 21395 | 86.2 8.9 4.8 1.7 15.4 52.7 | exp_pdnn/dnn/decode_eval2000_sw1_tg/score_12/eval2000.ctm.swbd.filt.sys
11 | %WER 21.4 | 4459 42989 | 81.0 12.9 6.1 2.4 21.4 58.8 | exp_pdnn/dnn/decode_eval2000_sw1_tg/score_11/eval2000.ctm.filt.sys
12 | 
13 | # run-bnf-tandem.sh
14 | %WER 15.9 | 1831 21395 | 85.7 9.5 4.8 1.6 15.9 53.7 | exp_pdnn/bnf_tandem/tri5a/decode_eval2000_sw1_tg/score_31/eval2000.ctm.swbd.filt.sys
15 | %WER 22.5 | 4459 42989 | 79.9 13.7 6.4 2.5 22.5 60.9 | exp_pdnn/bnf_tandem/tri5a/decode_eval2000_sw1_tg/score_27/eval2000.ctm.filt.sys
16 | 
17 | %WER 15.3 | 1831 21395 | 86.4 9.3 4.3 1.7 15.3 52.1 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it1/score_24/eval2000.ctm.swbd.filt.sys
18 | %WER 15.1 | 1831 21395 | 86.5 9.2 4.4 1.6 15.1 52.3 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it2/score_24/eval2000.ctm.swbd.filt.sys
19 | %WER 15.0 | 1831 21395 | 86.4 8.9 4.7 1.5 15.0 52.3 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it3/score_26/eval2000.ctm.swbd.filt.sys
20 | %WER 15.2 | 1831 21395 | 86.5 9.1 4.5 1.6 15.2 52.9 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it4/score_24/eval2000.ctm.swbd.filt.sys
21 | %WER 21.8 | 4459 42989 | 80.6 13.1 6.3 2.3 21.8 59.7 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it1/score_24/eval2000.ctm.filt.sys
22 | %WER 21.7 | 4459 42989 | 80.5 12.9 6.6 2.2 21.7 59.9 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it2/score_24/eval2000.ctm.filt.sys
23 | %WER 21.7 | 4459 42989 | 80.4 12.7 6.9 2.1 21.7 59.9 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it3/score_24/eval2000.ctm.filt.sys
24 | %WER 21.9 | 4459 42989 | 80.2 12.6 7.2 2.1 21.9 60.4 | exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it4/score_24/eval2000.ctm.filt.sys
25 | 
26 | 
27 | # run-dnn-fbank.sh
28 | 


--------------------------------------------------------------------------------
/run_swbd/run-dnn-fbank.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014     Yajie Miao   Carnegie Mellon University      Apache 2.0
  4 | # This is the script that trains DNN system over the filterbank features. It
  5 | # is to  be  run after run.sh. Before running this, you should already build
  6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn"
  7 | # toolkit to train the DNN. The input filterbank features are with mean  and
  8 | # variance normalization.
  9 | 
 10 | # For more informaiton regarding the recipes and results, visit the webiste
 11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 12 | 
 13 | working_dir=exp_pdnn/dnn_fbank
 14 | gmmdir=exp/tri4b
 15 | 
 16 | # Specify the gpu device to be used
 17 | gpu=gpu
 18 | 
 19 | cmd=run.pl
 20 | . cmd.sh
 21 | [ -f path.sh ] && . ./path.sh
 22 | . parse_options.sh || exit 1;
 23 | 
 24 | # At this point you may want to make sure the directory $working_dir is
 25 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 26 | if [ ! -d pdnn ]; then
 27 |   echo "Checking out PDNN code."
 28 |   svn co https://github.com/yajiemiao/pdnn/trunk pdnn
 29 | fi
 30 | 
 31 | if [ ! -d steps_pdnn ]; then
 32 |   echo "Checking out steps_pdnn scripts."
 33 |   svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn
 34 | fi
 35 | 
 36 | if ! nvidia-smi; then
 37 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 38 |   echo "(Note: this script might still work, it would just be slower.)"
 39 | fi
 40 | 
 41 | # The hope here is that Theano has been installed either to python or to python2.6
 42 | pythonCMD=python
 43 | if ! python -c 'import theano;'; then
 44 |   if ! python2.6 -c 'import theano;'; then
 45 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 46 |     echo "(Note: this script might still work, it would just be slower.)"
 47 |     exit 1;
 48 |   else
 49 |     pythonCMD=python2.6
 50 |   fi
 51 | fi
 52 | 
 53 | mkdir -p $working_dir/log
 54 | 
 55 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 56 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 57 | 
 58 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 59 | 
 60 | echo =====================================================================
 61 | echo "                   Alignment & Feature Preparation                 "
 62 | echo =====================================================================
 63 | # Alignment on the training and validation data
 64 | if [ ! -d ${gmmdir}_ali_nodup ]; then
 65 |   steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \
 66 |     data/train_nodup data/lang $gmmdir ${gmmdir}_ali_nodup || exit 1
 67 | fi
 68 | if [ ! -d ${gmmdir}_ali_dev ]; then
 69 |   steps/align_fmllr.sh --nj 12 --cmd "$train_cmd" \
 70 |     data/train_dev data/lang $gmmdir ${gmmdir}_ali_dev || exit 1
 71 | fi
 72 | 
 73 | # Generate the fbank features. We generate the 40-dimensional fbanks on each frame
 74 | echo "--num-mel-bins=40" > conf/fbank.conf
 75 | echo "--sample-frequency=8000" >> conf/fbank.conf
 76 | mkdir -p $working_dir/data
 77 | if [ ! -d $working_dir/data/train ]; then
 78 |   cp -r data/train_nodup $working_dir/data/train
 79 |   ( cd $working_dir/data/train; rm -rf {cmvn,feats}.scp split*; )
 80 |   steps/make_fbank.sh --cmd "$train_cmd" --nj 24 $working_dir/data/train $working_dir/_log $working_dir/_fbank || exit 1;
 81 |   utils/fix_data_dir.sh $working_dir/data/train || exit;
 82 |   steps/compute_cmvn_stats.sh $working_dir/data/train $working_dir/_log $working_dir/_fbank || exit 1;
 83 | fi
 84 | if [ ! -d $working_dir/data/valid ]; then
 85 |   cp -r data/train_dev $working_dir/data/valid
 86 |   ( cd $working_dir/data/valid; rm -rf {cmvn,feats}.scp split*; )
 87 |   steps/make_fbank.sh --cmd "$train_cmd" --nj 12 $working_dir/data/valid $working_dir/_log $working_dir/_fbank || exit 1;
 88 |   utils/fix_data_dir.sh $working_dir/data/valid || exit;
 89 |   steps/compute_cmvn_stats.sh $working_dir/data/valid $working_dir/_log $working_dir/_fbank || exit 1;
 90 | fi
 91 | if [ ! -d $working_dir/data/eval2000 ]; then
 92 |   cp -r data/eval2000 $working_dir/data/eval2000
 93 |   ( cd $working_dir/data/eval2000; rm -rf {cmvn,feats}.scp split*; )
 94 |   steps/make_fbank.sh --cmd "$train_cmd" --nj 12 $working_dir/data/eval2000 $working_dir/_log $working_dir/_fbank || exit 1;
 95 |   utils/fix_data_dir.sh $working_dir/data/eval2000 || exit;
 96 |   steps/compute_cmvn_stats.sh $working_dir/data/eval2000 $working_dir/_log $working_dir/_fbank || exit 1;
 97 | fi
 98 | 
 99 | echo =====================================================================
100 | echo "               Training and Cross-Validation Pfiles                "
101 | echo =====================================================================
102 | # By default, inputs include 11 frames of filterbanks
103 | if [ ! -f $working_dir/train.pfile.done ]; then
104 |   steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \
105 |     --norm-vars true --splice-opts "--left-context=5 --right-context=5" \
106 |     $working_dir/data/train ${gmmdir}_ali_nodup $working_dir || exit 1
107 |   touch $working_dir/train.pfile.done
108 | fi
109 | if [ ! -f $working_dir/valid.pfile.done ]; then
110 |   steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \
111 |     --norm-vars true --splice-opts "--left-context=5 --right-context=5" \
112 |     $working_dir/data/valid ${gmmdir}_ali_dev $working_dir || exit 1
113 |   touch $working_dir/valid.pfile.done
114 | fi
115 | 
116 | echo =====================================================================
117 | echo "                  DNN Pre-training & Fine-tuning                   "
118 | echo =====================================================================
119 | feat_dim=$(gunzip -c $working_dir/train.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1;
120 | 
121 | # We use SDA because it's faster than RBM
122 | if [ ! -f $working_dir/dnn.ptr.done ]; then
123 |   echo "SDA Pre-training"
124 |   $cmd $working_dir/log/dnn.ptr.log \
125 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
126 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
127 |     $pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \
128 |                                     --nnet-spec "$feat_dim:2048:2048:2048:2048:2048:2048:2048:$num_pdfs" \
129 |                                     --1stlayer-reconstruct-activation "tanh" \
130 |                                     --wdir $working_dir --param-output-file $working_dir/dnn.ptr \
131 |                                     --ptr-layer-number 7 --epoch-number 5 || exit 1;
132 |   touch $working_dir/dnn.ptr.done
133 | fi
134 | 
135 | if [ ! -f $working_dir/dnn.fine.done ]; then
136 |   echo "Fine-tuning DNN"
137 |   $cmd $working_dir/log/dnn.fine.log \
138 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
139 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
140 |     $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \
141 |                                     --valid-data "$working_dir/valid.pfile.*.gz,partition=600m,random=true,stream=true" \
142 |                                     --nnet-spec "$feat_dim:2048:2048:2048:2048:2048:2048:2048:$num_pdfs" \
143 |                                     --ptr-file $working_dir/dnn.ptr --ptr-layer-number 7 \
144 |                                     --lrate "D:0.08:0.5:0.2,0.2:8" \
145 |                                     --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1;
146 |   touch $working_dir/dnn.fine.done
147 | fi
148 | 
149 | echo =====================================================================
150 | echo "                           Decoding                                "
151 | echo =====================================================================
152 | if [ ! -f  $working_dir/decode.done ]; then
153 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
154 |   graph_dir=$gmmdir/graph_sw1_tg
155 |   steps_pdnn/decode_dnn.sh --nj 24 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
156 |      $graph_dir $working_dir/data/eval2000 ${gmmdir}_ali_nodup $working_dir/decode_eval2000_sw1_tg || exit 1;
157 |   touch $working_dir/decode.done
158 | fi
159 | 
160 | echo "Finish !!"
161 | 


--------------------------------------------------------------------------------
/run_swbd/run-dnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014     Yajie Miao   Carnegie Mellon University       Apache 2.0
  4 | # This is the script  that trains DNN model over fMLLR features.  It is to be
  5 | # run after run.sh. Before running this, you should already build the initial
  6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train
  7 | # the DNN.
  8 | 
  9 | # For more informaiton regarding the recipes and results, visit the webiste
 10 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 11 | 
 12 | working_dir=exp_pdnn/dnn
 13 | gmmdir=exp/tri4b
 14 | 
 15 | # Specify the gpu device to be used
 16 | gpu=gpu
 17 | 
 18 | cmd=run.pl
 19 | . cmd.sh
 20 | [ -f path.sh ] && . ./path.sh
 21 | . parse_options.sh || exit 1;
 22 | 
 23 | # At this point you may want to make sure the directory $working_dir is
 24 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 25 | if [ ! -d pdnn ]; then
 26 |   echo "Checking out PDNN code."
 27 |   svn co https://github.com/yajiemiao/pdnn/trunk pdnn
 28 | fi
 29 | 
 30 | if [ ! -d steps_pdnn ]; then
 31 |   echo "Checking out steps_pdnn scripts."
 32 |   svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn
 33 | fi
 34 | 
 35 | if ! nvidia-smi; then
 36 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 37 |   echo "(Note: this script might still work, it would just be slower.)"
 38 | fi
 39 | 
 40 | # The hope here is that Theano has been installed either to python or to python2.6
 41 | pythonCMD=python
 42 | if ! python -c 'import theano;'; then
 43 |   if ! python2.6 -c 'import theano;'; then
 44 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 45 |     echo "(Note: this script might still work, it would just be slower.)"
 46 |     exit 1;
 47 |   else
 48 |     pythonCMD=python2.6
 49 |   fi
 50 | fi
 51 | 
 52 | mkdir -p $working_dir/log
 53 | 
 54 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 55 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 56 | 
 57 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 58 | 
 59 | echo =====================================================================
 60 | echo "                   Alignment & Feature Preparation                 "
 61 | echo =====================================================================
 62 | # Alignment on the training and validation data
 63 | if [ ! -d ${gmmdir}_ali_nodup ]; then
 64 |   steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \
 65 |     data/train_nodup data/lang $gmmdir ${gmmdir}_ali_nodup || exit 1
 66 | fi
 67 | if [ ! -d ${gmmdir}_ali_dev ]; then
 68 |   steps/align_fmllr.sh --nj 12 --cmd "$train_cmd" \
 69 |     data/train_dev data/lang $gmmdir ${gmmdir}_ali_dev || exit 1
 70 | fi
 71 | 
 72 | # Dump fMLLR features. "fake" cmvn states (0 means and 1 variance) which apply no normalization
 73 | if [ ! -d $working_dir/data/train ]; then
 74 |   steps/make_fmllr_feats.sh --nj 24 --cmd "$train_cmd" \
 75 |     --transform-dir ${gmmdir}_ali_nodup \
 76 |     $working_dir/data/train data/train_nodup $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 77 |   steps/compute_cmvn_stats.sh --fake \
 78 |     $working_dir/data/train $working_dir/_log $working_dir/_fmllr || exit 1;
 79 | fi
 80 | if [ ! -d $working_dir/data/valid ]; then
 81 |   steps/make_fmllr_feats.sh --nj 12 --cmd "$train_cmd" \
 82 |     --transform-dir ${gmmdir}_ali_dev \
 83 |     $working_dir/data/valid data/train_dev $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 84 |   steps/compute_cmvn_stats.sh --fake \
 85 |     $working_dir/data/valid $working_dir/_log $working_dir/_fmllr || exit 1;
 86 | fi
 87 | if [ ! -d $working_dir/data/eval2000 ]; then
 88 |   steps/make_fmllr_feats.sh --nj 12 --cmd "$train_cmd" \
 89 |     --transform-dir $gmmdir/decode_eval2000_sw1_tg \
 90 |     $working_dir/data/eval2000 data/eval2000 $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 91 |   steps/compute_cmvn_stats.sh --fake \
 92 |     $working_dir/data/eval2000 $working_dir/_log $working_dir/_fmllr || exit 1;
 93 | fi
 94 | 
 95 | echo =====================================================================
 96 | echo "               Training and Cross-Validation Pfiles                "
 97 | echo =====================================================================
 98 | # By default, DNN inputs include 11 frames of fMLLR
 99 | if [ ! -f $working_dir/train.pfile.done ]; then
100 |   steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \
101 |     --norm-vars false --splice-opts "--left-context=5 --right-context=5" \
102 |     $working_dir/data/train ${gmmdir}_ali_nodup $working_dir || exit 1
103 |   touch $working_dir/train.pfile.done
104 | fi
105 | if [ ! -f $working_dir/valid.pfile.done ]; then
106 |   steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \
107 |     --norm-vars false --splice-opts "--left-context=5 --right-context=5" \
108 |     $working_dir/data/valid ${gmmdir}_ali_dev $working_dir || exit 1
109 |   touch $working_dir/valid.pfile.done
110 | fi
111 | 
112 | echo =====================================================================
113 | echo "                  DNN Pre-training & Fine-tuning                   "
114 | echo =====================================================================
115 | feat_dim=$(gunzip -c $working_dir/train.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1;
116 | 
117 | # We use SDA because it's faster than RBM
118 | if [ ! -f $working_dir/dnn.ptr.done ]; then
119 |   echo "SDA Pre-training"
120 |   $cmd $working_dir/log/dnn.ptr.log \
121 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
122 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
123 |     $pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \
124 |                                     --nnet-spec "$feat_dim:2048:2048:2048:2048:2048:2048:2048:$num_pdfs" \
125 |                                     --1stlayer-reconstruct-activation "tanh" \
126 |                                     --wdir $working_dir --param-output-file $working_dir/dnn.ptr \
127 |                                     --ptr-layer-number 7 --epoch-number 5 || exit 1;
128 |   touch $working_dir/dnn.ptr.done
129 | fi
130 | 
131 | if [ ! -f $working_dir/dnn.fine.done ]; then
132 |   echo "Fine-tuning DNN"
133 |   $cmd $working_dir/log/dnn.fine.log \
134 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
135 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
136 |     $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \
137 |                                     --valid-data "$working_dir/valid.pfile.*.gz,partition=600m,random=true,stream=true" \
138 |                                     --nnet-spec "$feat_dim:2048:2048:2048:2048:2048:2048:2048:$num_pdfs" \
139 |                                     --ptr-file $working_dir/dnn.ptr --ptr-layer-number 7 \
140 |                                     --lrate "D:0.08:0.5:0.2,0.2:8" \
141 |                                     --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1;
142 |   touch $working_dir/dnn.fine.done
143 | fi
144 | 
145 | echo =====================================================================
146 | echo "                           Decoding                                "
147 | echo =====================================================================
148 | if [ ! -f  $working_dir/decode.done ]; then
149 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
150 |   graph_dir=$gmmdir/graph_sw1_tg
151 |   steps_pdnn/decode_dnn.sh --nj 24 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
152 |      $graph_dir $working_dir/data/eval2000 ${gmmdir}_ali_nodup $working_dir/decode_eval2000_sw1_tg || exit 1;
153 |   touch $working_dir/decode.done
154 | fi
155 | 
156 | echo "Finish !!"
157 | 


--------------------------------------------------------------------------------
/run_swbd_110h/RESULTS:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # exp/tri4a : the SAT model
 4 | %WER 25.1 | 1831 21395 | 77.5 15.5 7.0 2.6 25.1 62.8 | exp/tri4a/decode_eval2000_sw1_tg/score_16/eval2000.ctm.swbd.filt.sys
 5 | %WER 32.7 | 4459 42989 | 70.9 20.7 8.5 3.5 32.7 69.1 | exp/tri4a/decode_eval2000_sw1_tg/score_14/eval2000.ctm.filt.sys
 6 | 
 7 | # results of pdnn systems
 8 | 
 9 | # run-dnn.sh
10 | %WER 19.2 | 1831 21395 | 83.0 11.5 5.5 2.2 19.2 57.8 | exp_pdnn_110h/dnn/decode_eval2000_sw1_tg/score_10/eval2000.ctm.swbd.filt.sys
11 | %WER 25.6 | 4459 42989 | 77.2 15.8 7.0 2.8 25.6 63.2 | exp_pdnn_110h/dnn/decode_eval2000_sw1_tg/score_10/eval2000.ctm.filt.sys
12 | 
13 | # run-bnf-tandem.sh
14 | %WER 18.7 | 1831 21395 | 83.0 11.2 5.7 1.8 18.7 56.8 | exp_pdnn_110h/bnf_tandem/tri5a/decode_eval2000_sw1_tg/score_29/eval2000.ctm.swbd.filt.sys
15 | %WER 25.5 | 4459 42989 | 76.9 15.5 7.6 2.4 25.5 62.9 | exp_pdnn_110h/bnf_tandem/tri5a/decode_eval2000_sw1_tg/score_27/eval2000.ctm.filt.sys
16 | 
17 | %WER 18.1 | 1831 21395 | 83.5 10.9 5.6 1.6 18.1 55.3 | exp_pdnn_110h/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it1/score_27/eval2000.ctm.swbd.filt.sys
18 | %WER 18.0 | 1831 21395 | 83.5 10.8 5.6 1.6 18.0 55.3 | exp_pdnn_110h/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it2/score_26/eval2000.ctm.swbd.filt.sys
19 | %WER 18.0 | 1831 21395 | 83.6 10.7 5.7 1.6 18.0 55.5 | exp_pdnn_110h/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it3/score_26/eval2000.ctm.swbd.filt.sys
20 | %WER 25.1 | 4459 42989 | 77.1 14.9 8.1 2.1 25.1 62.4 | exp_pdnn_110h/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it1/score_26/eval2000.ctm.filt.sys
21 | %WER 25.0 | 4459 42989 | 77.0 14.6 8.5 2.0 25.0 62.4 | exp_pdnn_110h/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it2/score_26/eval2000.ctm.filt.sys
22 | %WER 25.2 | 4459 42989 | 76.8 14.4 8.8 2.0 25.2 62.5 | exp_pdnn_110h/bnf_tandem/tri5a_mmi_b0.1/decode_eval2000_sw1_tg_it3/score_26/eval2000.ctm.filt.sys
23 | 
24 | # run-dnn-fbank.sh
25 | %WER 21.7 | 1831 21395 | 80.8 13.2 6.0 2.5 21.7 60.0 | exp_pdnn_110h/dnn_fbank/decode_eval2000_sw1_tg/score_10/eval2000.ctm.swbd.filt.sys
26 | %WER 28.2 | 4459 42989 | 74.7 17.4 8.0 2.9 28.2 65.2 | exp_pdnn_110h/dnn_fbank/decode_eval2000_sw1_tg/score_10/eval2000.ctm.filt.sys
27 | 
28 | # run-cnn.sh. 
29 | 
30 | 


--------------------------------------------------------------------------------
/run_swbd_110h/run-dnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014     Yajie Miao   Carnegie Mellon University       Apache 2.0
  4 | # This is the script  that trains DNN model over fMLLR features.  It is to be
  5 | # run after run.sh. Before running this, you should already build the initial
  6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train
  7 | # the DNN.
  8 | 
  9 | # For more informaiton regarding the recipes and results, visit the webiste
 10 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 11 | 
 12 | working_dir=exp_pdnn_110h/dnn
 13 | gmmdir=exp/tri4a
 14 | 
 15 | # Specify the gpu device to be used
 16 | gpu=gpu
 17 | 
 18 | cmd=run.pl
 19 | . cmd.sh
 20 | [ -f path.sh ] && . ./path.sh
 21 | . parse_options.sh || exit 1;
 22 | 
 23 | # At this point you may want to make sure the directory $working_dir is
 24 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 25 | if [ ! -d pdnn ]; then
 26 |   echo "Checking out PDNN code."
 27 |   svn co https://github.com/yajiemiao/pdnn/trunk pdnn
 28 | fi
 29 | 
 30 | if [ ! -d steps_pdnn ]; then
 31 |   echo "Checking out steps_pdnn scripts."
 32 |   svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn
 33 | fi
 34 | 
 35 | if ! nvidia-smi; then
 36 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 37 |   echo "(Note: this script might still work, it would just be slower.)"
 38 | fi
 39 | 
 40 | # The hope here is that Theano has been installed either to python or to python2.6
 41 | pythonCMD=python
 42 | if ! python -c 'import theano;'; then
 43 |   if ! python2.6 -c 'import theano;'; then
 44 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 45 |     echo "(Note: this script might still work, it would just be slower.)"
 46 |     exit 1;
 47 |   else
 48 |     pythonCMD=python2.6
 49 |   fi
 50 | fi
 51 | 
 52 | mkdir -p $working_dir/log
 53 | 
 54 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 55 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 56 | 
 57 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 58 | 
 59 | 
 60 | echo =====================================================================
 61 | echo "                   Alignment & Feature Preparation                 "
 62 | echo =====================================================================
 63 | # Alignment on the training and validation data
 64 | if [ ! -d ${gmmdir}_ali_100k_nodup ]; then
 65 |   steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \
 66 |     data/train_100k_nodup data/lang $gmmdir ${gmmdir}_ali_100k_nodup || exit 1
 67 | fi
 68 | if [ ! -d ${gmmdir}_ali_dev ]; then
 69 |   steps/align_fmllr.sh --nj 12 --cmd "$train_cmd" \
 70 |     data/train_dev data/lang $gmmdir ${gmmdir}_ali_dev || exit 1
 71 | fi
 72 | 
 73 | # Dump fMLLR features. "fake" cmvn states (0 means and 1 variance) which apply no normalization
 74 | if [ ! -d $working_dir/data/train ]; then
 75 |   steps/nnet/make_fmllr_feats.sh --nj 24 --cmd "$train_cmd" \
 76 |     --transform-dir ${gmmdir}_ali_100k_nodup \
 77 |     $working_dir/data/train data/train_100k_nodup $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 78 |   steps/compute_cmvn_stats.sh --fake \
 79 |     $working_dir/data/train $working_dir/_log $working_dir/_fmllr || exit 1;
 80 | fi
 81 | if [ ! -d $working_dir/data/valid ]; then
 82 |   steps/nnet/make_fmllr_feats.sh --nj 12 --cmd "$train_cmd" \
 83 |     --transform-dir ${gmmdir}_ali_dev \
 84 |     $working_dir/data/valid data/train_dev $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 85 |   steps/compute_cmvn_stats.sh --fake \
 86 |     $working_dir/data/valid $working_dir/_log $working_dir/_fmllr || exit 1;
 87 | fi
 88 | if [ ! -d $working_dir/data/eval2000 ]; then
 89 |   steps/nnet/make_fmllr_feats.sh --nj 12 --cmd "$train_cmd" \
 90 |     --transform-dir $gmmdir/decode_eval2000_sw1_tg \
 91 |     $working_dir/data/eval2000 data/eval2000 $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 92 |   steps/compute_cmvn_stats.sh --fake \
 93 |     $working_dir/data/eval2000 $working_dir/_log $working_dir/_fmllr || exit 1;
 94 | fi
 95 | 
 96 | echo =====================================================================
 97 | echo "               Training and Cross-Validation Pfiles                "
 98 | echo =====================================================================
 99 | # By default, DNN inputs include 11 frame of fMLLRs
100 | if [ ! -f $working_dir/train.pfile.done ]; then
101 |   steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \
102 |     --norm-vars false --splice-opts "--left-context=5 --right-context=5" \
103 |     $working_dir/data/train ${gmmdir}_ali_100k_nodup $working_dir || exit 1
104 |   touch $working_dir/train.pfile.done
105 | fi
106 | if [ ! -f $working_dir/valid.pfile.done ]; then
107 |   steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \
108 |     --norm-vars false --splice-opts "--left-context=5 --right-context=5" \
109 |     $working_dir/data/valid ${gmmdir}_ali_dev $working_dir || exit 1
110 |   touch $working_dir/valid.pfile.done
111 | fi
112 | 
113 | echo =====================================================================
114 | echo "                  DNN Pre-training & Fine-tuning                   "
115 | echo =====================================================================
116 | feat_dim=$(gunzip -c $working_dir/train.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1;
117 | 
118 | # We use SDA because it's faster than RBM
119 | if [ ! -f $working_dir/dnn.ptr.done ]; then
120 |   echo "SDA Pre-training"
121 |   $cmd $working_dir/log/dnn.ptr.log \
122 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
123 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
124 |     $pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \
125 |                                     --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
126 |                                     --1stlayer-reconstruct-activation "tanh" \
127 |                                     --wdir $working_dir --param-output-file $working_dir/dnn.ptr \
128 |                                     --ptr-layer-number 6 --epoch-number 5 || exit 1;
129 |   touch $working_dir/dnn.ptr.done
130 | fi
131 | 
132 | # To apply dropout, add "--dropout-factor 0.2,0.2,0.2,0.2,0.2,0.2" and change the value of "--lrate" to "D:0.8:0.5:0.2,0.2:4"
133 | if [ ! -f $working_dir/dnn.fine.done ]; then
134 |   echo "Fine-tuning DNN"
135 |   $cmd $working_dir/log/dnn.fine.log \
136 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
137 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
138 |     $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \
139 |                                     --valid-data "$working_dir/valid.pfile.*.gz,partition=600m,random=true,stream=true" \
140 |                                     --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
141 |                                     --ptr-file $working_dir/dnn.ptr --ptr-layer-number 6 \
142 |                                     --lrate "D:0.08:0.5:0.2,0.2:8" --param-output-file $working_dir/nnet.param \
143 |                                     --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1;
144 |   touch $working_dir/dnn.fine.done
145 | fi
146 | 
147 | echo =====================================================================
148 | echo "                           Decoding                                "
149 | echo =====================================================================
150 | if [ ! -f  $working_dir/decode.done ]; then
151 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
152 |   graph_dir=$gmmdir/graph_sw1_tg
153 |   steps_pdnn/decode_dnn.sh --nj 24 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
154 |      $graph_dir $working_dir/data/eval2000 ${gmmdir}_ali_100k_nodup $working_dir/decode_eval2000_sw1_tg || exit 1;
155 |   touch $working_dir/decode.done
156 | fi
157 | 
158 | echo "Finish !!"
159 | 


--------------------------------------------------------------------------------
/run_swbd_110h/sat/run-dnn-fbank-sat.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Apache 2.0
  4 | # This is the script that performs speaker adaptive training (SAT) of the
  5 | # DNN model which has been trained on the filterbank features.It is to be
  6 | # run after run-dnn-fbank.sh.
  7 | 
  8 | # Yajie Miao, Hao Zhang, Florian Metze. "Towards Speaker Adaptive Training
  9 | # of Deep Neural Network Acoustic Models". Interspeech 2014.
 10 | 
 11 | # You need two additional commands to execute this recipe: get-spkvec-feat
 12 | # and add-feats.Download the following two source files and put them under
 13 | # src/featbin. Then compiling them will give you the required commands.
 14 | 
 15 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/get-spkvec-feat.cc
 16 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/add-feats.cc
 17 | 
 18 | # For more informaiton regarding the recipes and results, visit our webiste
 19 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 20 | 
 21 | working_dir=exp_pdnn_110h/dnn_fbank_sat
 22 | initdnn_dir=exp_pdnn_110h/dnn_fbank # the directory of the initial DNN model
 23 | 
 24 | gmmdir=exp/tri4a # GMM model directory
 25 | 
 26 | # I-vectors for the training and decoding speakers. There should be an ivector.scp
 27 | # file in each of both directories.
 28 | train_ivec=exp_ivec/ivectors_swbd1
 29 | decode_ivec=exp_ivec/ivectors_eval2000
 30 | 
 31 | # Specify the gpu device to be used
 32 | gpu=gpu
 33 | 
 34 | cmd=run.pl
 35 | . cmd.sh
 36 | [ -f path.sh ] && . ./path.sh
 37 | . parse_options.sh || exit 1;
 38 | 
 39 | # At this point you may want to make sure the directory $working_dir is
 40 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 41 | if [ ! -d pdnn ]; then
 42 |   echo "Checking out PDNN code."
 43 |   svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/pdnn pdnn
 44 | fi
 45 | 
 46 | if [ ! -d steps_pdnn ]; then
 47 |   echo "Checking out steps_pdnn scripts."
 48 |   svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/steps_pdnn steps_pdnn
 49 | fi
 50 | 
 51 | if ! nvidia-smi; then
 52 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 53 |   echo "(Note: this script might still work, it would just be slower.)"
 54 | fi
 55 | 
 56 | # The hope here is that Theano has been installed either to python or to python2.6
 57 | pythonCMD=python
 58 | if ! python -c 'import theano;'; then
 59 |   if ! python2.6 -c 'import theano;'; then
 60 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 61 |     echo "(Note: this script might still work, it would just be slower.)"
 62 |     exit 1;
 63 |   else
 64 |     pythonCMD=python2.6
 65 |   fi
 66 | fi
 67 | 
 68 | mkdir -p $working_dir/log
 69 | 
 70 | # Check whether i-vectors have been generated
 71 | for f in $train_ivec/ivector.scp $decode_ivec/ivector.scp; do
 72 |   [ ! -f $f ] && echo "Error i-vectors for $f have NOT been extracted. Check/Run run_swbd_110h/sat/run-ivec-extract.sh." && exit 1;
 73 | done
 74 | # Check whether the initial DNN has been trained 
 75 | if [ ! -f $initdnn_dir/nnet.param ]; then
 76 |   echo "Error the initial DNN $initdnn_dir/nnet.param has NOT been trained" && exit 1;
 77 | fi
 78 | 
 79 | # Prepare dataset; copy related files from the initial DNN directory
 80 | ln -s $PWD/$initdnn_dir/data $working_dir/data || exit 1;
 81 | cp $initdnn_dir/{splice_opts,norm_vars,add_deltas} $working_dir || exit 1;
 82 | splice_opts=`cat $working_dir/splice_opts 2>/dev/null` # frame-splicing options.
 83 | norm_vars=`cat $working_dir/norm_vars 2>/dev/null`     # variance normalization?
 84 | add_deltas=`cat $working_dir/add_deltas 2>/dev/null`   # add deltas?
 85 | 
 86 | echo =====================================================================
 87 | echo "               Training and Cross-Validation Pfiles                "
 88 | echo =====================================================================
 89 | 
 90 | if [ ! -f $working_dir/train.pfile.done ]; then
 91 |   steps_pdnn/sat/build_nnet_pfile_ivec.sh --cmd "$train_cmd" --every-nth-frame 1 --do-concat false \
 92 |     --norm-vars $norm_vars --splice-opts "$splice_opts" --add-deltas $add_deltas \
 93 |     --ivec-type speaker \
 94 |     $working_dir/data/train ${gmmdir}_ali_100k_nodup $train_ivec $working_dir || exit 1
 95 |   touch $working_dir/train.pfile.done
 96 | fi
 97 | if [ ! -f $working_dir/valid.pfile.done ]; then
 98 |   steps_pdnn/build_nnet_pfile_ivec.sh --cmd "$train_cmd" --every-nth-frame 1 --do-concat false \
 99 |     --norm-vars $norm_vars --splice-opts "$splice_opts" --add-deltas $add_deltas \ 
100 |     --ivec-type speaker \
101 |     $working_dir/data/valid ${gmmdir}_ali_dev $train_ivec $working_dir || exit 1
102 |   touch $working_dir/valid.pfile.done
103 | fi
104 | 
105 | echo =====================================================================
106 | echo "                        SAT-DNN Fine-tuning                        "
107 | echo =====================================================================
108 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
109 | ivec_dim=`feat-to-dim scp:$train_ivec/ivector.scp ark,t:- | head -1 | awk '{print $2}'` || exit 1;
110 | feat_dim=$(gunzip -c $working_dir/train.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1;
111 | feat_dim=$[$feat_dim-$ivec_dim]
112 | 
113 | # NOTE: the definition of "--si-nnet-spec" here has to be the same as "--nnet-spec" in run-dnn-fbank.sh
114 | if [ ! -f $working_dir/sat.fine.done ]; then
115 |   echo "Fine-tuning DNN"
116 |   $cmd $working_dir/log/sat.fine.log \
117 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
118 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
119 |     $pythonCMD pdnn/run_DNN_SAT.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \
120 |                           --valid-data "$working_dir/valid.pfile.*.gz,partition=600m,random=true,stream=true" \
121 |                           --si-nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
122 |                           --adapt-nnet-spec "$ivec_dim:512:512:512" --init-model $initdnn_dir/nnet.param \
123 |                           --lrate "D:0.08:0.5:0.05,0.05:0" --param-output-file $working_dir/nnet.param \
124 |                           --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1;                          
125 |   touch $working_dir/sat.fine.done
126 | fi
127 | 
128 | # Remove the last line "<sigmoid> *** ***" of dnn.nnet.adapt, because the output layer of the adaptation network
129 | # uses the linear activation function 
130 | ( cd $working_dir; head -n -1 dnn.nnet.adapt > dnn.nnet.adapt.tmp; mv dnn.nnet.adapt.tmp dnn.nnet.adapt; )
131 | 
132 | echo =====================================================================
133 | echo "                           Decoding                                "
134 | echo =====================================================================
135 | if [ ! -f  $working_dir/decode.done ]; then
136 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
137 |   graph_dir=$gmmdir/graph_sw1_tg
138 |   steps_pdnn/sat/decode_dnn_ivec.sh --nj 24 --scoring-opts "--min-lmwt 8 --max-lmwt 16" --cmd "$decode_cmd" --ivec-type speaker \
139 |     $graph_dir $working_dir/data/eval2000 ${gmmdir}_ali_100k_nodup $decode_ivec $working_dir/decode_eval2000_sw1_tg || exit 1;
140 |   touch $working_dir/decode.done
141 | fi
142 | 
143 | echo "Finish !!"
144 | 


--------------------------------------------------------------------------------
/run_swbd_110h/sat/run-dnn-sat.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Apache 2.0
  4 | # This is the script that performs speaker adaptive training (SAT) of the
  5 | # DNN model which has been trained on the fMLLR features. It is to be run
  6 | # after run-dnn.sh.
  7 | 
  8 | # Yajie Miao, Hao Zhang, Florian Metze. "Towards Speaker Adaptive Training
  9 | # of Deep Neural Network Acoustic Models". Interspeech 2014.
 10 | 
 11 | # You need two additional commands to execute this recipe: get-spkvec-feat
 12 | # and add-feats.Download the following two source files and put them under
 13 | # src/featbin. Then compiling them will give you the required commands.
 14 | 
 15 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/get-spkvec-feat.cc
 16 | # http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/add-feats.cc
 17 | 
 18 | # For more informaiton regarding the recipes and results, visit our webiste
 19 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 20 | 
 21 | working_dir=exp_pdnn_110h/dnn_sat
 22 | initdnn_dir=exp_pdnn_110h/dnn # the directory of the initial DNN model
 23 | 
 24 | gmmdir=exp/tri4a # GMM model directory
 25 | 
 26 | # I-vectors for the training and decoding speakers. There should be an ivector.scp
 27 | # file in each of both directories.
 28 | train_ivec=exp_ivec/ivectors_swbd1
 29 | decode_ivec=exp_ivec/ivectors_eval2000
 30 | 
 31 | # Specify the gpu device to be used
 32 | gpu=gpu
 33 | 
 34 | cmd=run.pl
 35 | . cmd.sh
 36 | [ -f path.sh ] && . ./path.sh
 37 | . parse_options.sh || exit 1;
 38 | 
 39 | # At this point you may want to make sure the directory $working_dir is
 40 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 41 | if [ ! -d pdnn ]; then
 42 |   echo "Checking out PDNN code."
 43 |   svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/pdnn pdnn
 44 | fi
 45 | 
 46 | if [ ! -d steps_pdnn ]; then
 47 |   echo "Checking out steps_pdnn scripts."
 48 |   svn co svn://svn.code.sf.net/p/kaldipdnn/code-0/trunk/steps_pdnn steps_pdnn
 49 | fi
 50 | 
 51 | if ! nvidia-smi; then
 52 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 53 |   echo "(Note: this script might still work, it would just be slower.)"
 54 | fi
 55 | 
 56 | # The hope here is that Theano has been installed either to python or to python2.6
 57 | pythonCMD=python
 58 | if ! python -c 'import theano;'; then
 59 |   if ! python2.6 -c 'import theano;'; then
 60 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 61 |     echo "(Note: this script might still work, it would just be slower.)"
 62 |     exit 1;
 63 |   else
 64 |     pythonCMD=python2.6
 65 |   fi
 66 | fi
 67 | 
 68 | mkdir -p $working_dir/log
 69 | 
 70 | # Check whether i-vectors have been generated
 71 | for f in $train_ivec/ivector.scp $decode_ivec/ivector.scp; do
 72 |   [ ! -f $f ] && echo "Error i-vectors for $f have NOT been extracted. Check/Run run_swbd_110h/sat/run-ivec-extract.sh." && exit 1;
 73 | done
 74 | # Check whether the initial DNN has been trained 
 75 | [ ! -f $initdnn_dir/nnet.param ] && echo "Error the initial DNN $initdnn_dir/nnet.param has NOT been trained" && exit 1;
 76 | 
 77 | # Prepare dataset; copy related files from the initial DNN directory
 78 | ln -s $PWD/$initdnn_dir/data $working_dir/data || exit 1;
 79 | cp $initdnn_dir/{splice_opts,norm_vars,add_deltas} $working_dir || exit 1;
 80 | splice_opts=`cat $working_dir/splice_opts 2>/dev/null` # frame-splicing options.
 81 | norm_vars=`cat $working_dir/norm_vars 2>/dev/null`     # variance normalization?
 82 | add_deltas=`cat $working_dir/add_deltas 2>/dev/null`   # add deltas?
 83 | 
 84 | echo =====================================================================
 85 | echo "               Training and Cross-Validation Pfiles                "
 86 | echo =====================================================================
 87 | if [ ! -f $working_dir/train.pfile.done ]; then
 88 |   steps_pdnn/sat/build_nnet_pfile_ivec.sh --cmd "$train_cmd" --every-nth-frame 1 --do-concat false \
 89 |     --norm-vars $norm_vars --splice-opts "$splice_opts" --add-deltas $add_deltas \
 90 |     --ivec-type speaker \
 91 |     $working_dir/data/train ${gmmdir}_ali_100k_nodup $train_ivec $working_dir || exit 1
 92 |   touch $working_dir/train.pfile.done
 93 | fi
 94 | if [ ! -f $working_dir/valid.pfile.done ]; then
 95 |   steps_pdnn/build_nnet_pfile_ivec.sh --cmd "$train_cmd" --every-nth-frame 1 --do-concat false \
 96 |     --norm-vars $norm_vars --splice-opts "$splice_opts" --add-deltas $add_deltas \
 97 |     --ivec-type speaker \
 98 |     $working_dir/data/valid ${gmmdir}_ali_dev $train_ivec $working_dir || exit 1
 99 |   touch $working_dir/valid.pfile.done
100 | fi
101 | 
102 | echo =====================================================================
103 | echo "                        SAT-DNN Fine-tuning                        "
104 | echo =====================================================================
105 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
106 | ivec_dim=`feat-to-dim scp:$train_ivec/ivector.scp ark,t:- | head -1 | awk '{print $2}'` || exit 1;
107 | feat_dim=$(gunzip -c $working_dir/train.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1;
108 | feat_dim=$[$feat_dim-$ivec_dim]
109 | 
110 | # NOTE: the definition of "--si-nnet-spec" here has to be the same as "--nnet-spec" in run-dnn-fbank.sh
111 | if [ ! -f $working_dir/sat.fine.done ]; then
112 |   echo "Fine-tuning DNN"
113 |   $cmd $working_dir/log/sat.fine.log \
114 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
115 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
116 |     $pythonCMD pdnn/run_DNN_SAT.py --train-data "$working_dir/train.pfile.*.gz,partition=2000m,random=true,stream=true" \
117 |                           --valid-data "$working_dir/valid.pfile.*.gz,partition=600m,random=true,stream=true" \
118 |                           --si-nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
119 |                           --adapt-nnet-spec "$ivec_dim:512:512:512" --init-model $initdnn_dir/nnet.param \
120 |                           --lrate "D:0.08:0.5:0.05,0.05:0" --param-output-file $working_dir/nnet.param \
121 |                           --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1;
122 |   touch $working_dir/sat.fine.done
123 | fi
124 | 
125 | # Remove the last line "<sigmoid> *** ***" of dnn.nnet.adapt, because the output layer of the adaptation network
126 | # uses the linear activation function 
127 | ( cd $working_dir; head -n -1 dnn.nnet.adapt > dnn.nnet.adapt.tmp; mv dnn.nnet.adapt.tmp dnn.nnet.adapt; )
128 | 
129 | echo =====================================================================
130 | echo "                           Decoding                                "
131 | echo =====================================================================
132 | if [ ! -f  $working_dir/decode.done ]; then
133 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
134 |   graph_dir=$gmmdir/graph_sw1_tg
135 |   steps_pdnn/sat/decode_dnn_ivec.sh --nj 24 --scoring-opts "--min-lmwt 8 --max-lmwt 16" --cmd "$decode_cmd" --ivec-type speaker \
136 |     $graph_dir $working_dir/data/eval2000 ${gmmdir}_ali_100k_nodup $decode_ivec $working_dir/decode_eval2000_sw1_tg || exit 1;
137 |   touch $working_dir/decode.done
138 | 
139 | echo "Finish !!"
140 | 


--------------------------------------------------------------------------------
/run_swbd_110h/sat/run-ivec-extract.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Apache 2.0
 4 | # This is the script that trains an i-vector extractor on the entire set of
 5 | # swbd1. The i-vectors will be used in the run-dnn-fbank-sat.sh and run-dnn
 6 | # -sat.sh recipes.
 7 | 
 8 | # For more informaiton regarding the recipes and results, visit our webiste
 9 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
10 | 
11 | cmd=run.pl
12 | . cmd.sh
13 | [ -f path.sh ] && . ./path.sh
14 | . parse_options.sh || exit 1;
15 | 
16 | # Link the scripts from the sre recipe to here
17 | if [ ! -d sid ]; then
18 |   ln -s ../../sre08/v1/sid ./
19 | fi
20 | mkdir -p data_ivec mfcc_ivec exp_ivec
21 | 
22 | echo ---------------------------------------------------------------------
23 | echo "Train the i-vector extractor with the entire SWBD set (310 hours)"
24 | echo ---------------------------------------------------------------------
25 | 
26 | # MFCC config borrowed from egs/sre08/v1
27 | echo "--sample-frequency=8000" > conf/mfcc.conf.ivec
28 | echo "--frame-length=20" >> conf/mfcc.conf.ivec
29 | echo "--low-freq=20" >> conf/mfcc.conf.ivec
30 | echo "--high-freq=3700" >> conf/mfcc.conf.ivec
31 | echo "--num-ceps=20" >> conf/mfcc.conf.ivec
32 | # Config for VAD (voice activity detection) borrowed from egs/sre08/v1
33 | echo "--vad-energy-threshold=5.5" > conf/vad.conf
34 | echo "--vad-energy-mean-scale=0.5" >> conf/vad.conf
35 | 
36 | if [ ! -d data_ivec/swbd1 ]; then
37 |   echo "Save features for swbd1"
38 |   mkdir -p data_ivec/swbd1; cat data/train/wav.scp | awk '{gsub("^sw0","",$1); print $1 " " $2;}' > data_ivec/swbd1/wav.scp
39 |   ( cd data_ivec/swbd1; cat wav.scp | awk '{print $1 " " $1}' > utt2spk; cp utt2spk spk2utt; )
40 | 
41 |   steps/make_mfcc.sh --mfcc-config conf/mfcc.conf.ivec --nj 24 --cmd "$train_cmd" \
42 |       data_ivec/swbd1 exp_ivec/make_mfcc mfcc_ivec || exit 1;
43 |   sid/compute_vad_decision.sh --nj 24 --cmd "$train_cmd" \
44 |       data_ivec/swbd1 exp_ivec/make_mfcc mfcc_ivec || exit 1;
45 |   utils/fix_data_dir.sh data_ivec/swbd1 || exit 1;
46 | fi
47 | 
48 | if [ ! -d data_ivec/eval2000 ]; then
49 |   echo "Save features for eval2000"
50 |   mkdir -p data_ivec/eval2000; cp data/eval2000/wav.scp data_ivec/eval2000
51 |   ( cd data_ivec/eval2000; cat wav.scp | awk '{print $1 " " $1}' > utt2spk; cp utt2spk spk2utt; )
52 |   steps/make_mfcc.sh --mfcc-config conf/mfcc.conf.ivec --nj 24 --cmd "$train_cmd" \
53 |       data_ivec/eval2000 exp_ivec/make_mfcc mfcc_ivec || exit 1;
54 |   sid/compute_vad_decision.sh --nj 24 --cmd "$train_cmd" \
55 |       data_ivec/eval2000 exp_ivec/make_mfcc mfcc_ivec || exit 1;
56 |   utils/fix_data_dir.sh data_ivec/eval2000 || exit 1;
57 | fi
58 | 
59 | # Train the diagonal and full UBMs
60 | if [ ! -f exp_ivec/diag_ubm_2048/final.dubm ]; then
61 |   echo "Train the diagonal UBM"
62 |   sid/train_diag_ubm.sh --parallel-opts "" --nj 24 --cmd "$train_cmd" \
63 |     data_ivec/swbd1 2048 exp_ivec/diag_ubm_2048 || exit 1;
64 | fi
65 | if [ ! -f full_ubm_2048/final.ubm ]; then
66 |   echo "Train the full UBM"
67 |   sid/train_full_ubm.sh --nj 24 --cmd "$train_cmd" \
68 |     data_ivec/swbd1 exp_ivec/diag_ubm_2048 exp_ivec/full_ubm_2048 || exit 1;
69 | fi
70 | 
71 | # Train the i-vector extractor
72 | if [ ! -f exp_ivec/extractor_2048/final.ie ]; then
73 |   echo "Train the i-vector extractor"
74 |   sid/train_ivector_extractor.sh --nj 24 --num-threads 1 --num-processes 1 \
75 |     --cmd "$train_cmd" --ivector-dim 100 --num-iters 10 \
76 |     exp_ivec/full_ubm_2048/final.ubm data_ivec/swbd1 exp_ivec/extractor_2048
77 | fi
78 | 
79 | # Generate i-vectors on the training and testing (decoding) sets
80 | if [ ! -f exp_ivec/ivectors_swbd1/ivector.scp ]; then
81 |   echo "Extract i-vectors for the swbd1 speakers"
82 |   sid/extract_ivectors.sh --cmd "$train_cmd" --nj 24 \
83 |     exp_ivec/extractor_2048 data_ivec/swbd1 exp_ivec/ivectors_swbd1
84 | fi
85 | if [ ! -f exp_ivec/ivectors_eval2000/ivector.scp ]; then
86 |   echo "Extract i-vectors for the eval2000 speakers"
87 |   sid/extract_ivectors.sh --cmd "$train_cmd" --nj 24 \
88 |     exp_ivec/extractor_2048 data_ivec/eval2000 exp_ivec/ivectors_eval2000
89 | fi
90 | 
91 | echo "Finish! Now you can safely delete data_ivec and mfcc_ivec."
92 | 


--------------------------------------------------------------------------------
/run_tedlium/RESULTS:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # results of tri3, the SAT system 
 4 | %WER 27.4 | 507 17792 | 77.9 16.6 5.5 5.4 27.4 96.1 | -0.176 | exp/tri3/decode_dev/score_18/ctm.filt.filt.sys
 5 | %WER 24.7 | 1155 27512 | 79.6 16.3 4.1 4.3 24.7 93.2 | -0.053 | exp/tri3/decode_test/score_15/ctm.filt.filt.sys
 6 | 
 7 | # --------------------------------------------------------------------------------------------------------------
 8 | # run-dnn.sh : DNN Models over fMLLR features
 9 | # --------------------------------------------------------------------------------------------------------------
10 | %WER 23.3 | 507 17792 | 81.9 13.9 4.2 5.2 23.3 94.1 | -0.117 | exp_pdnn/dnn/decode_dev/score_10/ctm.filt.filt.sys
11 | %WER 20.4 | 1155 27512 | 83.1 13.0 3.9 3.5 20.4 90.7 | 0.020 | exp_pdnn/dnn/decode_test/score_11/ctm.filt.filt.sys
12 | # when set "--momentum 0.9" during SdA pre-training
13 | %WER 23.4 | 507 17792 | 81.7 13.9 4.4 5.1 23.4 93.9 | -0.094 | exp_pdnn/dnn/decode_dev/score_10/ctm.filt.filt.sys
14 | %WER 20.2 | 1155 27512 | 83.2 12.9 3.9 3.4 20.2 91.1 | 0.003 | exp_pdnn/dnn/decode_test/score_11/ctm.filt.filt.sys
15 | 
16 | # --------------------------------------------------------------------------------------------------------------
17 | # run-dnn-maxout.sh : Deep Maxout Networks over fMLLR features
18 | # --------------------------------------------------------------------------------------------------------------
19 | %WER 22.9 | 507 17792 | 82.2 13.4 4.4 5.2 22.9 94.1 | -0.148 | exp_pdnn/dnn_maxout/decode_dev/score_11/ctm.filt.filt.sys
20 | %WER 19.7 | 1155 27512 | 83.6 12.6 3.8 3.3 19.7 90.1 | -0.032 | exp_pdnn/dnn_maxout/decode_test/score_12/ctm.filt.filt.sys
21 | 
22 | # --------------------------------------------------------------------------------------------------------------
23 | # run-bnf-tandem.sh : Tandem Systems with BNFs Trained on fMLLRs
24 | # --------------------------------------------------------------------------------------------------------------
25 | # LDA+MLLT
26 | %WER 23.4 | 507 17792 | 81.9 13.6 4.5 5.3 23.4 93.7 | -0.185 | exp_pdnn/bnf_tandem/tri4/decode_dev/score_29/ctm.filt.filt.sys
27 | %WER 20.4 | 1155 27512 | 83.5 13.1 3.4 4.0 20.4 90.1 | -0.075 | exp_pdnn/bnf_tandem/tri4/decode_test/score_30/ctm.filt.filt.sys
28 | # MMI
29 | %WER 22.7 | 507 17792 | 82.2 13.2 4.6 4.9 22.7 93.7 | -0.157 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_dev_it1/score_29/ctm.filt.filt.sys
30 | %WER 22.4 | 507 17792 | 82.4 13.2 4.3 4.8 22.4 93.7 | -0.164 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_dev_it2/score_26/ctm.filt.filt.sys
31 | %WER 22.0 | 507 17792 | 82.5 13.0 4.5 4.6 22.0 93.7 | -0.150 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_dev_it3/score_26/ctm.filt.filt.sys
32 | %WER 22.1 | 507 17792 | 82.4 12.9 4.7 4.5 22.1 94.3 | -0.146 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_dev_it4/score_26/ctm.filt.filt.sys
33 | %WER 19.8 | 1155 27512 | 83.9 12.6 3.4 3.8 19.8 89.6 | -0.050 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_test_it1/score_28/ctm.filt.filt.sys
34 | %WER 19.5 | 1155 27512 | 84.0 12.3 3.7 3.5 19.5 89.4 | -0.029 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_test_it2/score_28/ctm.filt.filt.sys
35 | %WER 19.3 | 1155 27512 | 84.2 12.2 3.6 3.5 19.3 88.9 | -0.018 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_test_it3/score_26/ctm.filt.filt.sys
36 | %WER 19.4 | 1155 27512 | 84.0 12.2 3.9 3.4 19.4 89.7 | 0.004 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_test_it4/score_26/ctm.filt.filt.sys
37 | # SGMM
38 | %WER 23.0 | 507 17792 | 81.5 13.4 5.1 4.5 23.0 94.1 | -0.183 | exp_pdnn/bnf_tandem/sgmm5a/decode_dev/score_20/ctm.filt.filt.sys
39 | %WER 20.6 | 1155 27512 | 82.7 13.1 4.2 3.3 20.6 90.7 | -0.070 | exp_pdnn/bnf_tandem/sgmm5a/decode_test/score_20/ctm.filt.filt.sys
40 | 
41 | # --------------------------------------------------------------------------------------------------------------
42 | # run-dnn-fbank.sh : DNN Models over filterbank features
43 | # --------------------------------------------------------------------------------------------------------------
44 | %WER 24.5 | 507 17792 | 80.8 14.8 4.4 5.4 24.5 96.1 | -0.007 | exp_pdnn/dnn_fbank/decode_dev/score_10/ctm.filt.filt.sys
45 | %WER 21.4 | 1155 27512 | 82.6 13.8 3.7 3.9 21.4 91.7 | 0.068 | exp_pdnn/dnn_fbank/decode_test/score_10/ctm.filt.filt.sys
46 | 
47 | # --------------------------------------------------------------------------------------------------------------
48 | # run-cnn.sh : CNN Models over filterbank features
49 | # -------------------------------------------------------------------------------------------------------------- 
50 | %WER 22.7 | 507 17792 | 82.7 13.6 3.7 5.4 22.7 93.7 | -0.174 | exp_pdnn/cnn/decode_dev/score_10/ctm.filt.filt.sys
51 | %WER 19.7 | 1155 27512 | 83.9 12.6 3.4 3.7 19.7 90.0 | -0.014 | exp_pdnn/cnn/decode_test/score_11/ctm.filt.filt.sys
52 | 


--------------------------------------------------------------------------------
/run_tedlium/run-dnn-maxout.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014     Yajie Miao   Carnegie Mellon University       Apache 2.0
  4 | # This script trains  Maxout Network  models over fMLLR features. It is to be
  5 | # run after run.sh. Before running this, you should already build the initial
  6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train
  7 | # the DNN.
  8 | 
  9 | # We implement the <Maxout> activation function, based on Kaldi "revision 4960".
 10 | # Please follow the following steps:
 11 | # 1. Go to /path/to/kaldi/src/nnet and *backup* nnet-component.h, nnet-component.cc, nnet-activation.h
 12 | # 2. Download these 3 files from here:
 13 | #    http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/nnet-component.h
 14 | #    http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/nnet-component.cc
 15 | #    http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/nnet-activation.h
 16 | # 3. Recompile Kaldi
 17 | 
 18 | # For more informaiton regarding the recipes and results, visit the webiste
 19 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 20 | 
 21 | working_dir=exp_pdnn/dnn_maxout
 22 | gmmdir=exp/tri3
 23 | 
 24 | # Specify the gpu device to be used
 25 | gpu=gpu
 26 | 
 27 | cmd=run.pl
 28 | . cmd.sh
 29 | [ -f path.sh ] && . ./path.sh
 30 | . parse_options.sh || exit 1;
 31 | 
 32 | # At this point you may want to make sure the directory $working_dir is
 33 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 34 | if [ ! -d pdnn ]; then
 35 |   echo "Checking out PDNN code."
 36 |   svn co https://github.com/yajiemiao/pdnn/trunk pdnn
 37 | fi
 38 | 
 39 | if [ ! -d steps_pdnn ]; then
 40 |   echo "Checking out steps_pdnn scripts."
 41 |   svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn
 42 | fi
 43 | 
 44 | if ! nvidia-smi; then
 45 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 46 |   echo "(Note: this script might still work, it would just be slower.)"
 47 | fi
 48 | 
 49 | # The hope here is that Theano has been installed either to python or to python2.6
 50 | pythonCMD=python
 51 | if ! python -c 'import theano;'; then
 52 |   if ! python2.6 -c 'import theano;'; then
 53 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 54 |     echo "(Note: this script might still work, it would just be slower.)"
 55 |     exit 1;
 56 |   else
 57 |     pythonCMD=python2.6
 58 |   fi
 59 | fi
 60 | 
 61 | mkdir -p $working_dir/log
 62 | 
 63 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 64 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 65 | 
 66 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 67 | 
 68 | echo =====================================================================
 69 | echo "           Data Split & Alignment & Feature Preparation            "
 70 | echo =====================================================================
 71 | # Split training data into traing and cross-validation sets for DNN
 72 | if [ ! -d data/train_tr95 ]; then
 73 |   utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1
 74 | fi
 75 | # Alignment on the training and validation data.
 76 | for set in tr95 cv05; do
 77 |   if [ ! -d ${gmmdir}_ali_$set ]; then
 78 |     steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \
 79 |       data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1
 80 |   fi
 81 | done
 82 | # Dump fMLLR features. "Fake" cmvn states (0 means and 1 variance) are applied. 
 83 | for set in tr95 cv05; do
 84 |   if [ ! -d $working_dir/data/train_$set ]; then
 85 |     steps/nnet/make_fmllr_feats.sh --nj 24 --cmd "$train_cmd" \
 86 |       --transform-dir ${gmmdir}_ali_$set \
 87 |       $working_dir/data/train_$set data/train_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 88 |     steps/compute_cmvn_stats.sh --fake \
 89 |       $working_dir/data/train_$set $working_dir/_log $working_dir/_fmllr || exit 1;
 90 |   fi
 91 | done
 92 | for set in dev test; do
 93 |   if [ ! -d $working_dir/data/$set ]; then
 94 |     steps/nnet/make_fmllr_feats.sh --nj 8 --cmd "$train_cmd" \
 95 |       --transform-dir $gmmdir/decode_$set \
 96 |       $working_dir/data/$set data/$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 97 |     steps/compute_cmvn_stats.sh --fake \
 98 |       $working_dir/data/$set $working_dir/_log $working_dir/_fmllr || exit 1;
 99 |   fi
100 | done
101 | 
102 | echo =====================================================================
103 | echo "               Training and Cross-Validation Pfiles                "
104 | echo =====================================================================
105 | # By default, DNN inputs include 11 frames of fMLLR
106 | for set in tr95 cv05; do
107 |   if [ ! -f $working_dir/${set}.pfile.done ]; then
108 |     steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \
109 |       --norm-vars false --splice-opts "--left-context=5 --right-context=5" \
110 |       $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1
111 |     touch $working_dir/${set}.pfile.done
112 |   fi
113 | done
114 | 
115 | echo =====================================================================
116 | echo "                  DNN Pre-training & Fine-tuning                   "
117 | echo =====================================================================
118 | feat_dim=$(gunzip -c $working_dir/train_tr95.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1;
119 | 
120 | if [ ! -f $working_dir/dnn.fine.done ]; then
121 |   echo "Fine-tuning DNN"
122 |   $cmd $working_dir/log/dnn.fine.log \
123 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
124 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
125 |     $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=true" \
126 |                                     --valid-data "$working_dir/train_cv05.pfile.*.gz,partition=600m,random=true,stream=true" \
127 |                                     --nnet-spec "$feat_dim:650:650:650:650:650:650:$num_pdfs" \
128 |                                     --activation "maxout:3" \
129 |                                     --lrate "D:0.008:0.5:0.2,0.2:8" \
130 |                                     --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1;
131 |   touch $working_dir/dnn.fine.done
132 | fi
133 | 
134 | echo =====================================================================
135 | echo "                           Decoding                                "
136 | echo =====================================================================
137 | if [ ! -f  $working_dir/decode.done ]; then
138 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
139 |   graph_dir=$gmmdir/graph
140 |   steps_pdnn/decode_dnn.sh --nj 8 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
141 |     $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1;
142 |   steps_pdnn/decode_dnn.sh --nj 11 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
143 |     $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1;
144 |   touch $working_dir/decode.done
145 | fi
146 | # Decoding with our own pruned trigram LM. 
147 | if [ ! -f  $working_dir/decode.bd.done ] && [ -d $gmmdir/graph_bd_tgpr ]; then
148 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
149 |   graph_dir=$gmmdir/graph_bd_tgpr
150 |   steps_pdnn/decode_dnn.sh --nj 8 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
151 |     $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev_bd_tgpr || exit 1;
152 |   steps_pdnn/decode_dnn.sh --nj 11 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
153 |     $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test_bd_tgpr || exit 1;
154 |   touch $working_dir/decode.bd.done
155 | fi
156 | 
157 | echo "Finish !!"
158 | 


--------------------------------------------------------------------------------
/run_tedlium/run-dnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014     Yajie Miao   Carnegie Mellon University       Apache 2.0
  4 | # This is the script  that trains DNN model over fMLLR features.  It is to be
  5 | # run after run.sh. Before running this, you should already build the initial
  6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train
  7 | # the DNN.
  8 | 
  9 | # For more informaiton regarding the recipes and results, visit the webiste
 10 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 11 | 
 12 | working_dir=exp_pdnn/dnn
 13 | gmmdir=exp/tri3
 14 | 
 15 | # Specify the gpu device to be used
 16 | gpu=gpu
 17 | 
 18 | cmd=run.pl
 19 | . cmd.sh
 20 | [ -f path.sh ] && . ./path.sh
 21 | . parse_options.sh || exit 1;
 22 | 
 23 | # At this point you may want to make sure the directory $working_dir is
 24 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 25 | if [ ! -d pdnn ]; then
 26 |   echo "Checking out PDNN code."
 27 |   svn co https://github.com/yajiemiao/pdnn/trunk pdnn
 28 | fi
 29 | 
 30 | if [ ! -d steps_pdnn ]; then
 31 |   echo "Checking out steps_pdnn scripts."
 32 |   svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn
 33 | fi
 34 | 
 35 | if ! nvidia-smi; then
 36 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 37 |   echo "(Note: this script might still work, it would just be slower.)"
 38 | fi
 39 | 
 40 | # The hope here is that Theano has been installed either to python or to python2.6
 41 | pythonCMD=python
 42 | if ! python -c 'import theano;'; then
 43 |   if ! python2.6 -c 'import theano;'; then
 44 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 45 |     echo "(Note: this script might still work, it would just be slower.)"
 46 |     exit 1;
 47 |   else
 48 |     pythonCMD=python2.6
 49 |   fi
 50 | fi
 51 | 
 52 | mkdir -p $working_dir/log
 53 | 
 54 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 55 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 56 | 
 57 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 58 | 
 59 | echo =====================================================================
 60 | echo "           Data Split & Alignment & Feature Preparation            "
 61 | echo =====================================================================
 62 | # Split training data into traing and cross-validation sets for DNN
 63 | if [ ! -d data/train_tr95 ]; then
 64 |   utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1
 65 | fi
 66 | # Alignment on the training and validation data.
 67 | for set in tr95 cv05; do
 68 |   if [ ! -d ${gmmdir}_ali_$set ]; then
 69 |     steps/align_fmllr.sh --nj 24 --cmd "$train_cmd" \
 70 |       data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1
 71 |   fi
 72 | done
 73 | # Dump fMLLR features. "Fake" cmvn states (0 means and 1 variance) are applied. 
 74 | for set in tr95 cv05; do
 75 |   if [ ! -d $working_dir/data/train_$set ]; then
 76 |     steps/nnet/make_fmllr_feats.sh --nj 24 --cmd "$train_cmd" \
 77 |       --transform-dir ${gmmdir}_ali_$set \
 78 |       $working_dir/data/train_$set data/train_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 79 |     steps/compute_cmvn_stats.sh --fake \
 80 |       $working_dir/data/train_$set $working_dir/_log $working_dir/_fmllr || exit 1;
 81 |   fi
 82 | done
 83 | for set in dev test; do
 84 |   if [ ! -d $working_dir/data/$set ]; then
 85 |     steps/nnet/make_fmllr_feats.sh --nj 8 --cmd "$train_cmd" \
 86 |       --transform-dir $gmmdir/decode_$set \
 87 |       $working_dir/data/$set data/$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 88 |     steps/compute_cmvn_stats.sh --fake \
 89 |       $working_dir/data/$set $working_dir/_log $working_dir/_fmllr || exit 1;
 90 |   fi
 91 | done
 92 | 
 93 | echo =====================================================================
 94 | echo "               Training and Cross-Validation Pfiles                "
 95 | echo =====================================================================
 96 | # By default, DNN inputs include 11 frames of fMLLR
 97 | for set in tr95 cv05; do
 98 |   if [ ! -f $working_dir/${set}.pfile.done ]; then
 99 |     steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \
100 |       --norm-vars false --splice-opts "--left-context=5 --right-context=5" \
101 |       $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1
102 |     touch $working_dir/${set}.pfile.done
103 |   fi
104 | done
105 | 
106 | echo =====================================================================
107 | echo "                  DNN Pre-training & Fine-tuning                   "
108 | echo =====================================================================
109 | feat_dim=$(gunzip -c $working_dir/train_tr95.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1;
110 | 
111 | if [ ! -f $working_dir/dnn.ptr.done ]; then
112 |   echo "SDA Pre-training"
113 |   $cmd $working_dir/log/dnn.ptr.log \
114 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
115 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
116 |     $pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=true" \
117 |                                     --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
118 |                                     --1stlayer-reconstruct-activation "tanh" \
119 |                                     --wdir $working_dir --param-output-file $working_dir/dnn.ptr \
120 |                                     --ptr-layer-number 6 --epoch-number 5 || exit 1;
121 |   touch $working_dir/dnn.ptr.done
122 | fi
123 | 
124 | if [ ! -f $working_dir/dnn.fine.done ]; then
125 |   echo "Fine-tuning DNN"
126 |   $cmd $working_dir/log/dnn.fine.log \
127 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
128 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
129 |     $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=true" \
130 |                                     --valid-data "$working_dir/train_cv05.pfile.*.gz,partition=600m,random=true,stream=true" \
131 |                                     --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
132 |                                     --ptr-file $working_dir/dnn.ptr --ptr-layer-number 6 \
133 |                                     --lrate "D:0.08:0.5:0.2,0.2:8" \
134 |                                     --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1;
135 |   touch $working_dir/dnn.fine.done
136 | fi
137 | 
138 | echo =====================================================================
139 | echo "                           Decoding                                "
140 | echo =====================================================================
141 | if [ ! -f  $working_dir/decode.done ]; then
142 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
143 |   graph_dir=$gmmdir/graph
144 |   steps_pdnn/decode_dnn.sh --nj 8 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
145 |     $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1;
146 |   steps_pdnn/decode_dnn.sh --nj 11 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
147 |     $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1;
148 |   touch $working_dir/decode.done
149 | fi
150 | # Decoding with our own LM. This trigram LM is trained over TED talk transcripts and is pruned
151 | if [ ! -f  $working_dir/decode.bd.done ] && [ -d $gmmdir/graph_bd_tgpr ]; then
152 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
153 |   graph_dir=$gmmdir/graph_bd_tgpr
154 |   steps_pdnn/decode_dnn.sh --nj 8 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \
155 |     $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev_bd_tgpr || exit 1;
156 |   steps_pdnn/decode_dnn.sh --nj 11 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \
157 |     $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test_bd_tgpr || exit 1;
158 |   touch $working_dir/decode.bd.done
159 | fi
160 | 
161 | echo "Finish !!"
162 | 


--------------------------------------------------------------------------------
/run_tedlium/tmp/run-dnn-lhuc-bkup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014     Yajie Miao   Carnegie Mellon University      Apache 2.0
 4 | # This is the script that trains DNN system over the filterbank features. It
 5 | # is to  be  run after run.sh. Before running this, you should already build
 6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn"
 7 | # toolkit to train the DNN. The input filterbank features are with mean  and
 8 | # variance normalization.
 9 | 
10 | # For more informaiton regarding the recipes and results, visit the webiste
11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
12 | 
13 | working_dir=exp_pdnn/dnn_fbank
14 | gmmdir=exp/tri3
15 | 
16 | # Specify the gpu device to be used
17 | gpu=gpu
18 | stage=1
19 | 
20 | cmd=run.pl
21 | . cmd.sh
22 | [ -f path.sh ] && . ./path.sh
23 | . parse_options.sh || exit 1;
24 | 
25 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
26 | 
27 | echo =====================================================================
28 | echo "             Prepare Adaptation Data & Alignment                   "
29 | echo =====================================================================
30 | 
31 | firstpass=$working_dir/decode_dev_bd_tgpr
32 | dir=$working_dir/decode_dev_bd_tgpr_lhuc_V3
33 | mkdir -p $dir/log
34 | 
35 | nj=8
36 | echo $nj > $dir/num_jobs
37 | 
38 | if [ $stage -eq 1 ]; then
39 | 
40 | $cmd JOB=1:$nj $dir/log/best_path.JOB.log \
41 |   lattice-scale --inv-acoustic-scale=10 "ark:gunzip -c $firstpass/lat.JOB.gz|" ark:- \| \
42 |   lattice-best-path ark:- ark,t:$dir/tra.JOB "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
43 | rm -rf $dir/tra.*
44 | 
45 | cp $gmmdir/final.mdl $dir
46 | for set in dev; do
47 |   if [ ! -f $dir/${set}.pfile.done ]; then
48 |     steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \
49 |       --norm-vars true --splice-opts "--left-context=5 --right-context=5" \
50 |       $working_dir/data/$set $dir $dir || exit 1
51 |     touch $dir/${set}.pfile.done
52 |   fi
53 | done
54 | 
55 | fi
56 | 
57 | echo =====================================================================
58 | echo "                  DNN Pre-training & Fine-tuning                   "
59 | echo =====================================================================
60 | #feat_dim=$(gunzip -c $dir/dev.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1;
61 | 
62 | #if [ ! -f $decode_dir/dnn.fine.done ]; then
63 | #  $train_cmd JOB=1:$nj $decode_dir/log/dnn.fine.JOB.log \
64 | #    export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn_lhuc/ \; \
65 | #    export THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 \; \
66 | #    python pdnn_lhuc/cmds/run_DNN.py --train-data "$decode_dir/data.pfile.JOB.gz,partition=2000m,random=true,stream=true" \
67 | #                                    --valid-data "$decode_dir/data.pfile.JOB.gz,partition=600m,random=true,stream=true" \
68 | #                                    --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
69 | #                                    --ptr-file $working_dir/nnet.param --ptr-layer-number 7 \
70 | #                                    --lrate "C:0.8:3" \
71 | #                                    --wdir $decode_dir --kaldi-output-file $decode_dir/dnn.nnet.JOB || exit 1;
72 | #  touch $working_dir/dnn.fine.done
73 | #fi
74 | 
75 | echo =====================================================================
76 | echo "                           Decoding                                "
77 | echo =====================================================================
78 | if [ $stage -eq 2 ]; then
79 |   graph_dir=$gmmdir/graph_bd_tgpr
80 |   steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 8 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
81 |     $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $dir || exit 1;
82 | fi
83 | 
84 | echo "Finish !!"
85 | 


--------------------------------------------------------------------------------
/run_tedlium/tmp/run-dnn-lhuc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014     Yajie Miao   Carnegie Mellon University      Apache 2.0
 4 | # This is the script that trains DNN system over the filterbank features. It
 5 | # is to  be  run after run.sh. Before running this, you should already build
 6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn"
 7 | # toolkit to train the DNN. The input filterbank features are with mean  and
 8 | # variance normalization.
 9 | 
10 | # For more informaiton regarding the recipes and results, visit the webiste
11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
12 | 
13 | working_dir=exp_pdnn/dnn_fbank
14 | gmmdir=exp/tri3
15 | 
16 | # Specify the gpu device to be used
17 | gpu=gpu
18 | stage=1
19 | inv_acoustic_scale=10
20 | 
21 | cmd=run.pl
22 | . cmd.sh
23 | [ -f path.sh ] && . ./path.sh
24 | . parse_options.sh || exit 1;
25 | 
26 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
27 | 
28 | echo =====================================================================
29 | echo "             Prepare Adaptation Data & Alignment                   "
30 | echo =====================================================================
31 | 
32 | if [ $stage -eq 1 ]; then
33 | 
34 |   for set in dev test; do
35 |     dir=$working_dir/decode_${set}_bd_tgpr_lhuc
36 |     mkdir -p $dir/log
37 | 
38 |     case $set in
39 |       dev) nj=8;;
40 |       test) nj=11;;
41 |       *) echo "$0: invalid set name $set" && exit 1;
42 |     esac
43 | 
44 |     echo $nj > $dir/num_jobs
45 | 
46 |     $cmd JOB=1:$nj $dir/log/best_path.JOB.log \
47 |       lattice-scale --inv-acoustic-scale=$inv_acoustic_scale "ark:gunzip -c $working_dir/decode_${set}_bd_tgpr/lat.JOB.gz|" ark:- \| \
48 |       lattice-best-path ark:- ark,t:$dir/tra.JOB "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
49 | 
50 |     rm -rf $dir/tra.*
51 | 
52 |     cp $gmmdir/final.mdl $dir
53 |     if [ ! -f $dir/${set}.pfile.done ]; then
54 |       steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \
55 |         --norm-vars false --splice-opts "--left-context=5 --right-context=5" \
56 |         $working_dir/data/$set $dir $dir || exit 1
57 |       touch $dir/${set}.pfile.done
58 |     fi
59 |   done
60 | fi
61 | 
62 | echo =====================================================================
63 | echo "                  DNN Pre-training & Fine-tuning                   "
64 | echo =====================================================================
65 | #feat_dim=$(gunzip -c $dir/dev.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1;
66 | 
67 | #if [ ! -f $decode_dir/dnn.fine.done ]; then
68 | #  $train_cmd JOB=1:$nj $decode_dir/log/dnn.fine.JOB.log \
69 | #    export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn_lhuc/ \; \
70 | #    export THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 \; \
71 | #    python pdnn_lhuc/cmds/run_DNN.py --train-data "$decode_dir/data.pfile.JOB.gz,partition=2000m,random=true,stream=true" \
72 | #                                    --valid-data "$decode_dir/data.pfile.JOB.gz,partition=600m,random=true,stream=true" \
73 | #                                    --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
74 | #                                    --ptr-file $working_dir/nnet.param --ptr-layer-number 7 \
75 | #                                    --lrate "C:0.8:3" \
76 | #                                    --wdir $decode_dir --kaldi-output-file $decode_dir/dnn.nnet.JOB || exit 1;
77 | #  touch $working_dir/dnn.fine.done
78 | #fi
79 | 
80 | echo =====================================================================
81 | echo "                           Decoding                                "
82 | echo =====================================================================
83 | if [ $stage -eq 2 ]; then
84 |   graph_dir=$gmmdir/graph_bd_tgpr
85 |   steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 8 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \
86 |     $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev_bd_tgpr_lhuc || exit 1;
87 |   steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 11 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \
88 |     $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test_bd_tgpr_lhuc || exit 1;
89 | fi
90 | 
91 | echo "Finish !!"
92 | 


--------------------------------------------------------------------------------
/run_tedlium/tmp/run-dnn-sat-lhuc.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014     Yajie Miao   Carnegie Mellon University      Apache 2.0
  4 | # This is the script that trains DNN system over the filterbank features. It
  5 | # is to  be  run after run.sh. Before running this, you should already build
  6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn"
  7 | # toolkit to train the DNN. The input filterbank features are with mean  and
  8 | # variance normalization.
  9 | 
 10 | # For more informaiton regarding the recipes and results, visit the webiste
 11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 12 | 
 13 | working_dir=exp_pdnn/dnn_fbank_sat
 14 | gmmdir=exp/tri3
 15 | dnndir=exp_pdnn/dnn_fbank
 16 | 
 17 | # Specify the gpu device to be used
 18 | gpu=gpu
 19 | stage=1
 20 | 
 21 | cmd=run.pl
 22 | . cmd.sh
 23 | [ -f path.sh ] && . ./path.sh
 24 | . parse_options.sh || exit 1;
 25 | 
 26 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 27 | 
 28 | echo =====================================================================
 29 | echo "             Prepare Adaptation Data & Alignment                   "
 30 | echo =====================================================================
 31 | 
 32 | #ivec_dir="/data/ASR5/babel/ymiao/Install/kaldi-latest/egs/sre08/v1/exp_tedlium_V2/ivectors_devtest"
 33 | ivec_dir="/data/ASR5/babel/ymiao/Install/kaldi-latest/egs/sre08/v1/exp_tedlium_bnf/ivectors_devtest"
 34 | 
 35 | if [ $stage -eq 1 ]; then
 36 | 
 37 |   for set in dev test; do
 38 |     dir=$working_dir/decode_${set}_bd_tgpr_lhuc
 39 |     mkdir -p $dir/log
 40 | 
 41 |     case $set in
 42 |       dev) nj=8;;
 43 |       test) nj=11;;
 44 |       *) echo "$0: invalid set name $set" && exit 1;
 45 |     esac
 46 | 
 47 |     echo $nj > $dir/num_jobs
 48 | 
 49 |     steps_pdnn/sat/make_feat_with_ivec.sh --nj $nj --cmd "$train_cmd" --ivec-type speaker \
 50 |       $working_dir/data_ivec/$set $working_dir/data/$set $working_dir $ivec_dir $working_dir/_log $working_dir/_ivec || exit 1;
 51 |     steps/compute_cmvn_stats.sh --fake \
 52 |       $working_dir/data_ivec/$set $working_dir/_log $working_dir/_ivec || exit 1;
 53 | 
 54 | #    $cmd JOB=1:$nj $dir/log/best_path.JOB.log \
 55 | #      lattice-scale --inv-acoustic-scale=10 "ark:gunzip -c $working_dir/decode_${set}_bd_tgpr/lat.JOB.gz|" ark:- \| \
 56 | #      lattice-best-path ark:- ark,t:$dir/tra.JOB "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
 57 | 
 58 | #    $cmd JOB=1:$nj $dir/log/best_path.JOB.log \
 59 | #      lattice-scale --inv-acoustic-scale=10 "ark:gunzip -c exp_pdnn/dnn_fbank/decode_${set}_bd_tgpr/lat.JOB.gz|" ark:- \| \
 60 | #      lattice-best-path ark:- ark,t:$dir/tra.JOB "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
 61 | 
 62 |     rm -rf $dir/tra.*
 63 | 
 64 |     cp $gmmdir/final.mdl $dir
 65 |     if [ ! -f $dir/${set}.pfile.done ]; then
 66 |       steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --do-concat false \
 67 |         --norm-vars false --splice-opts "--left-context=0 --right-context=0" \
 68 |         $working_dir/data_ivec/$set $dir $dir || exit 1
 69 |       touch $dir/${set}.pfile.done
 70 |     fi
 71 |   done
 72 | fi
 73 | 
 74 | echo =====================================================================
 75 | echo "                  DNN Pre-training & Fine-tuning                   "
 76 | echo =====================================================================
 77 | #feat_dim=$(gunzip -c $dir/dev.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1;
 78 | 
 79 | #if [ ! -f $decode_dir/dnn.fine.done ]; then
 80 | #  $train_cmd JOB=1:$nj $decode_dir/log/dnn.fine.JOB.log \
 81 | #    export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn_lhuc/ \; \
 82 | #    export THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 \; \
 83 | #    python pdnn_lhuc/cmds/run_DNN.py --train-data "$decode_dir/data.pfile.JOB.gz,partition=2000m,random=true,stream=true" \
 84 | #                                    --valid-data "$decode_dir/data.pfile.JOB.gz,partition=600m,random=true,stream=true" \
 85 | #                                    --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
 86 | #                                    --ptr-file $working_dir/nnet.param --ptr-layer-number 7 \
 87 | #                                    --lrate "C:0.8:3" \
 88 | #                                    --wdir $decode_dir --kaldi-output-file $decode_dir/dnn.nnet.JOB || exit 1;
 89 | #  touch $working_dir/dnn.fine.done
 90 | #fi
 91 | 
 92 | echo =====================================================================
 93 | echo "                           Decoding                                "
 94 | echo =====================================================================
 95 | 
 96 | if [ $stage -eq 2 ]; then
 97 |   graph_dir=$gmmdir/graph_bd_tgpr
 98 |   steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 8 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \
 99 |     --norm-vars false --splice-opts "--left-context=0 --right-context=0" \
100 |     $graph_dir $working_dir/data_ivec/dev ${dnndir}_ali_tr95 $working_dir/decode_dev_bd_tgpr_lhuc || exit 1;
101 |   steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 11 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \
102 |     --norm-vars false --splice-opts "--left-context=0 --right-context=0" \
103 |     $graph_dir $working_dir/data_ivec/test ${dnndir}_ali_tr95 $working_dir/decode_test_bd_tgpr_lhuc || exit 1;
104 | 
105 | #  steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 8 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \
106 | #    --norm-vars false --splice-opts "--left-context=0 --right-context=0" \
107 | #    $graph_dir $working_dir/data_ivec/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev_bd_tgpr_lhuc || exit 1;
108 | #  steps_pdnn/tmp/decode_dnn_lhuc.sh --nj 11 --scoring-opts "--min-lmwt 8 --max-lmwt 12" --cmd "$decode_cmd" \
109 | #    --norm-vars false --splice-opts "--left-context=0 --right-context=0" \
110 | #    $graph_dir $working_dir/data_ivec/test ${gmmdir}_ali_tr95 $working_dir/decode_test_bd_tgpr_lhuc || exit 1;
111 | 
112 | fi
113 | 
114 | echo "Finish !!"
115 | 


--------------------------------------------------------------------------------
/run_timit/RESULTS:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # --------------------------------------------------------------------------------------------------------------
 4 | # run-dnn.sh : DNN Models over fMLLR features
 5 | # -------------------------------------------------------------------------------------------------------------- 
 6 | %WER 18.8 | 400 15057 | 83.4 11.4 5.1 2.3 18.8 99.8 | -0.592 | exp_pdnn/dnn/decode_dev/score_5/ctm_39phn.filt.sys
 7 | %WER 20.2 | 192 7215 | 82.9 11.9 5.2 3.1 20.2 100.0 | -0.917 | exp_pdnn/dnn/decode_test/score_4/ctm_39phn.filt.sys
 8 | # with dropout applied
 9 | %WER 18.0 | 400 15057 | 84.3 11.2 4.5 2.3 18.0 99.3 | -0.706 | exp_pdnn/dnn_dropout/decode_dev/score_5/ctm_39phn.filt.sys
10 | %WER 19.6 | 192 7215 | 83.3 11.9 4.8 3.0 19.6 99.5 | -1.070 | exp_pdnn/dnn_dropout/decode_test/score_4/ctm_39phn.filt.sys
11 | 
12 | # --------------------------------------------------------------------------------------------------------------
13 | # run-dnn-maxout.sh : Deep Maxout Networks over fMLLR features
14 | # --------------------------------------------------------------------------------------------------------------
15 | %WER 17.5 | 400 15057 | 85.0 10.8 4.2 2.4 17.5 99.8 | -0.859 | exp_pdnn/dnn_maxout/decode_dev/score_5/ctm_39phn.filt.sys
16 | %WER 19.0 | 192 7215 | 83.4 11.9 4.7 2.4 19.0 99.5 | -0.894 | exp_pdnn/dnn_maxout/decode_test/score_5/ctm_39phn.filt.sys
17 | # with dropout applied
18 | %WER 16.7 | 400 15057 | 85.1 10.3 4.5 1.9 16.7 99.3 | -0.515 | exp_pdnn/dnn_maxout+dropout/decode_dev/score_7/ctm_39phn.filt.sys
19 | %WER 18.0 | 192 7215 | 84.5 11.1 4.3 2.5 18.0 99.0 | -1.070 | exp_pdnn/dnn_maxout+dropout/decode_test/score_5/ctm_39phn.filt.sys
20 | # by setting momentum to 0.9, you can improve dnn by ~1.0% and maxout by ~0.5%
21 | 
22 | # --------------------------------------------------------------------------------------------------------------
23 | # run-bnf-tandem.sh : Tandem Systems with BNFs Trained on fMLLRs
24 | # --------------------------------------------------------------------------------------------------------------
25 | # LDA+MLLT
26 | %WER 16.8 | 400 15057 | 85.5 11.2 3.3 2.4 16.8 99.3 | -1.195 | exp_pdnn/bnf_tandem/tri4/decode_dev/score_11/ctm_39phn.filt.sys
27 | %WER 18.5 | 192 7215 | 84.1 12.1 3.9 2.6 18.5 99.5 | -1.223 | exp_pdnn/bnf_tandem/tri4/decode_test/score_12/ctm_39phn.filt.sys
28 | # MMI
29 | %WER 16.7 | 400 15057 | 85.8 11.0 3.2 2.5 16.7 99.0 | -1.188 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_dev_it1/score_11/ctm_39phn.filt.sys
30 | %WER 18.4 | 192 7215 | 84.3 12.0 3.7 2.8 18.4 99.5 | -1.191 | exp_pdnn/bnf_tandem/tri4_mmi_b0.1/decode_test_it1/score_12/ctm_39phn.filt.sys
31 | # SGMM
32 | %WER 16.3 | 400 15057 | 85.7 10.8 3.5 2.0 16.3 99.0 | -1.114 | exp_pdnn/bnf_tandem/sgmm5a/decode_dev/score_9/ctm_39phn.filt.sys
33 | %WER 17.8 | 192 7215 | 84.2 11.4 4.4 2.0 17.8 100.0 | -0.978 | exp_pdnn/bnf_tandem/sgmm5a/decode_test/score_12/ctm_39phn.filt.sys
34 | # MMI-SGMM
35 | %WER 16.5 | 400 15057 | 86.0 10.9 3.2 2.5 16.5 98.8 | -1.119 | exp_pdnn/bnf_tandem/sgmm5a_mmi_b0.1/decode_dev_it1/score_10/ctm_39phn.filt.sys
36 | %WER 17.9 | 192 7215 | 85.1 11.6 3.3 3.0 17.9 99.0 | -1.303 | exp_pdnn/bnf_tandem/sgmm5a_mmi_b0.1/decode_test_it1/score_8/ctm_39phn.filt.sys
37 | 
38 | # --------------------------------------------------------------------------------------------------------------
39 | # run-dnn-fbank.sh : DNN Models over filterbank features
40 | # --------------------------------------------------------------------------------------------------------------
41 | %WER 22.5 | 400 15057 | 80.1 14.3 5.6 2.6 22.5 99.8 | -0.340 | exp_pdnn/dnn_fbank/decode_dev/score_5/ctm_39phn.filt.sys
42 | %WER 24.0 | 192 7215 | 78.7 15.1 6.1 2.8 24.0 100.0 | -0.244 | exp_pdnn/dnn_fbank/decode_test/score_5/ctm_39phn.filt.sys
43 | # if momentum increased to 0.9 for faster convergence
44 | %WER 20.2 | 400 15057 | 82.4 12.9 4.7 2.6 20.2 99.5 | -0.424 | exp_pdnn/dnn_fbank/decode_dev/score_5/ctm_39phn.filt.sys
45 | %WER 21.6 | 192 7215 | 81.2 13.7 5.1 2.8 21.6 100.0 | -0.397 | exp_pdnn/dnn_fbank/decode_test/score_5/ctm_39phn.filt.sys
46 | 
47 | # --------------------------------------------------------------------------------------------------------------
48 | # run-cnn.sh : CNN Models over filterbank features
49 | # -------------------------------------------------------------------------------------------------------------- 
50 | %WER 19.0 | 400 15057 | 83.2 12.0 4.8 2.2 19.0 99.3 | -0.392 | exp_pdnn/cnn/decode_dev/score_6/ctm_39phn.filt.sys
51 | %WER 19.7 | 192 7215 | 82.6 12.5 4.9 2.3 19.7 99.0 | -0.530 | exp_pdnn/cnn/decode_test/score_5/ctm_39phn.filt.sys
52 | 


--------------------------------------------------------------------------------
/run_timit/kaldi_io/run-dnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014    Yajie Miao  Carnegie Mellon University       Apache 2.0
  4 | # This script trains DNN model by reading the Kaldi feature (.scp) and align-
  5 | # ment (.ali) files directly. It is to be run after run.sh. Before running
  6 | # this, you should already build the initial GMM model. This script requires
  7 | # a GPU, and also the "pdnn" toolkit to train the DNN.
  8 | 
  9 | # For more informaiton regarding the recipes and results, visit the webiste
 10 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 11 | 
 12 | working_dir=exp_pdnn/dnn_kaldi_io
 13 | gmmdir=exp/tri3
 14 | 
 15 | # Specify the gpu device to be used
 16 | gpu=gpu
 17 | 
 18 | cmd=run.pl
 19 | . cmd.sh
 20 | [ -f path.sh ] && . ./path.sh
 21 | . parse_options.sh || exit 1;
 22 | 
 23 | # At this point you may want to make sure the directory $working_dir is
 24 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 25 | if [ ! -d pdnn ]; then
 26 |   echo "Checking out PDNN code."
 27 |   svn co https://github.com/yajiemiao/pdnn/trunk pdnn
 28 | fi
 29 | 
 30 | if [ ! -d steps_pdnn ]; then
 31 |   echo "Checking out steps_pdnn scripts."
 32 |   svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn
 33 | fi
 34 | 
 35 | if ! nvidia-smi; then
 36 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 37 |   echo "(Note: this script might still work, it would just be slower.)"
 38 | fi
 39 | 
 40 | # The hope here is that Theano has been installed either to python or to python2.6
 41 | pythonCMD=python
 42 | if ! python -c 'import theano;'; then
 43 |   if ! python2.6 -c 'import theano;'; then
 44 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 45 |     echo "(Note: this script might still work, it would just be slower.)"
 46 |     exit 1;
 47 |   else
 48 |     pythonCMD=python2.6
 49 |   fi
 50 | fi
 51 | 
 52 | mkdir -p $working_dir/log
 53 | 
 54 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 55 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 56 | 
 57 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 58 | 
 59 | echo =====================================================================
 60 | echo "           Data Split & Alignment & Feature Preparation            "
 61 | echo =====================================================================
 62 | # Split training data into traing and cross-validation sets for DNN
 63 | if [ ! -d data/train_tr95 ]; then
 64 |   utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1
 65 | fi
 66 | # Alignment on the training and validation data
 67 | for set in tr95 cv05; do
 68 |   if [ ! -d ${gmmdir}_ali_$set ]; then
 69 |     steps/align_fmllr.sh --nj 16 --cmd "$train_cmd" \
 70 |       data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1
 71 |   fi
 72 | done
 73 | 
 74 | # Dump fMLLR features. "Fake" cmvn states (0 means and 1 variance) are applied. 
 75 | for set in tr95 cv05; do
 76 |   if [ ! -d $working_dir/data/train_$set ]; then
 77 |     steps/nnet/make_fmllr_feats.sh --nj 16 --cmd "$train_cmd" \
 78 |       --transform-dir ${gmmdir}_ali_$set \
 79 |       $working_dir/data/train_$set data/train_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 80 |     steps/compute_cmvn_stats.sh --fake \
 81 |       $working_dir/data/train_$set $working_dir/_log $working_dir/_fmllr || exit 1;
 82 |   fi
 83 | done
 84 | for set in dev test; do
 85 |   if [ ! -d $working_dir/data/$set ]; then
 86 |     steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
 87 |       --transform-dir $gmmdir/decode_$set \
 88 |       $working_dir/data/$set data/$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 89 |     steps/compute_cmvn_stats.sh --fake \
 90 |       $working_dir/data/$set $working_dir/_log $working_dir/_fmllr || exit 1;
 91 |   fi
 92 | done
 93 | 
 94 | echo =====================================================================
 95 | echo "               Training and Cross-Validation Pfiles                "
 96 | echo =====================================================================
 97 | # By default, DNN inputs include 11 frames of fMLLR
 98 | for set in tr95 cv05; do
 99 |   if [ ! -f $working_dir/${set}.netdata.done ]; then
100 |     steps_pdnn/make_nnet_data.sh --nj 10 --cmd "$train_cmd" --norm-vars false \
101 |       --splice-opts "--left-context=5 --right-context=5" \
102 |       $working_dir/data_nnet/train_$set $working_dir/data/train_$set \
103 |       $working_dir/_nnet_input ${gmmdir}_ali_$set $working_dir || exit 1
104 |     touch $working_dir/${set}.netdata.done
105 |   fi
106 | done
107 | # Shuffle the scp list
108 | cat $working_dir/data_nnet/train_tr95/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} > $working_dir/train_tr95.scp
109 | cat $working_dir/data_nnet/train_cv05/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} > $working_dir/train_cv05.scp
110 | 
111 | echo =====================================================================
112 | echo "                  DNN Pre-training & Fine-tuning                   "
113 | echo =====================================================================
114 | 
115 | if [ ! -f $working_dir/dnn.ptr.done ]; then
116 |   echo "RBM Pre-training"
117 |   $cmd $working_dir/log/dnn.ptr.log \
118 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
119 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
120 |     $pythonCMD pdnn/cmds/run_RBM.py --train-data "$working_dir/train_tr95.scp,partition=1000m,random=true,stream=false" \
121 |                                --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" --wdir $working_dir \
122 |                                --ptr-layer-number 4 --param-output-file $working_dir/dnn.ptr || exit 1;
123 |   touch $working_dir/dnn.ptr.done
124 | fi
125 | 
126 | if [ ! -f $working_dir/dnn.fine.done ]; then
127 |   echo "Fine-tuning DNN"
128 |   $cmd $working_dir/log/dnn.fine.log \
129 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
130 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
131 |     $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train_tr95.scp,label=train_tr95.ali.gz,partition=1000m,random=true,stream=false" \
132 |                           --valid-data "$working_dir/train_cv05.scp,label=train_cv05.ali.gz,partition=200m,random=true,stream=false" \
133 |                           --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" \
134 |                           --ptr-file $working_dir/dnn.ptr --ptr-layer-number 4 \
135 |                           --lrate "D:0.08:0.5:0.2,0.2:8" \
136 |                           --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1;
137 |   touch $working_dir/dnn.fine.done
138 | fi
139 | 
140 | echo =====================================================================
141 | echo "                           Decoding                                "
142 | echo =====================================================================
143 | if [ ! -f  $working_dir/decode.done ]; then
144 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
145 |   graph_dir=$gmmdir/graph
146 |   steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \
147 |     $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1;
148 |   steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \
149 |     $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1;
150 | 
151 |   touch $working_dir/decode.done
152 | fi
153 | 
154 | echo "Finish !!"
155 | 


--------------------------------------------------------------------------------
/run_timit/run-cnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014     Yajie Miao   Carnegie Mellon University       Apache 2.0
  4 | # This script trains CNN model over the filterbank features. It  is to be run
  5 | # after run.sh. Before running this, you should already build the initial GMM
  6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train
  7 | # CNN. The input filterbank features are with mean and variance normalization. 
  8 | 
  9 | # The input features and CNN architecture follow the IBM configuration: 
 10 | # Hagen Soltau, George Saon, and Tara N. Sainath. Joint Training of Convolu-
 11 | # tional and non-Convolutional Neural Networks
 12 | 
 13 | # For more informaiton regarding the recipes and results, visit the webiste
 14 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 15 | 
 16 | working_dir=exp_pdnn/cnn
 17 | gmmdir=exp/tri3
 18 | 
 19 | # Specify the gpu device to be used
 20 | gpu=gpu
 21 | 
 22 | cmd=run.pl
 23 | . cmd.sh
 24 | [ -f path.sh ] && . ./path.sh
 25 | . parse_options.sh || exit 1;
 26 | 
 27 | # At this point you may want to make sure the directory $working_dir is
 28 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 29 | if [ ! -d pdnn ]; then
 30 |   echo "Checking out PDNN code."
 31 |   svn co https://github.com/yajiemiao/pdnn/trunk pdnn
 32 | fi
 33 | 
 34 | if [ ! -d steps_pdnn ]; then
 35 |   echo "Checking out steps_pdnn scripts."
 36 |   svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn
 37 | fi
 38 | 
 39 | if ! nvidia-smi; then
 40 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 41 |   echo "(Note: this script might still work, it would just be slower.)"
 42 | fi
 43 | 
 44 | # The hope here is that Theano has been installed either to python or to python2.6
 45 | pythonCMD=python
 46 | if ! python -c 'import theano;'; then
 47 |   if ! python2.6 -c 'import theano;'; then
 48 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 49 |     echo "(Note: this script might still work, it would just be slower.)"
 50 |     exit 1;
 51 |   else
 52 |     pythonCMD=python2.6
 53 |   fi
 54 | fi
 55 | 
 56 | mkdir -p $working_dir/log
 57 | 
 58 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 59 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 60 | 
 61 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 62 | 
 63 | echo =====================================================================
 64 | echo "           Data Split & Alignment & Feature Preparation            "
 65 | echo =====================================================================
 66 | # Split training data into traing and cross-validation sets for DNN
 67 | if [ ! -d data/train_tr95 ]; then
 68 |   utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1
 69 | fi
 70 | # Alignment on the training and validation data
 71 | for set in tr95 cv05; do
 72 |   if [ ! -d ${gmmdir}_ali_$set ]; then
 73 |     steps/align_fmllr.sh --nj 16 --cmd "$train_cmd" \
 74 |       data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1
 75 |   fi
 76 | done
 77 | 
 78 | # Generate the fbank features: 40-dimensional fbanks on each frame
 79 | echo "--num-mel-bins=40" > conf/fbank.conf
 80 | mkdir -p $working_dir/data
 81 | for set in train_tr95 train_cv05 dev test; do
 82 |   if [ ! -d $working_dir/data/$set ]; then
 83 |     cp -r data/$set $working_dir/data/$set
 84 |     ( cd $working_dir/data/$set; rm -rf {cmvn,feats}.scp split*; )
 85 |     steps/make_fbank.sh --cmd "$train_cmd" --nj 16 $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1;
 86 |     steps/compute_cmvn_stats.sh $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1;
 87 |   fi
 88 | done
 89 | 
 90 | echo =====================================================================
 91 | echo "               Training and Cross-Validation Pfiles                "
 92 | echo =====================================================================
 93 | # By default, CNN inputs include 11 frames of filterbanks, and with delta
 94 | # and double-deltas.
 95 | for set in tr95 cv05; do
 96 |   if [ ! -f $working_dir/${set}.pfile.done ]; then
 97 |     steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --norm-vars true --add-deltas true \
 98 |       --splice-opts "--left-context=5 --right-context=5" \
 99 |       $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1
100 |     ( cd $working_dir; mv concat.pfile ${set}.pfile; gzip ${set}.pfile; )
101 |     touch $working_dir/${set}.pfile.done
102 |   fi
103 | done
104 | # Rename pfiles to keep consistency
105 | ( cd $working_dir;
106 |   ln -s tr95.pfile.gz train.pfile.gz; ln -s cv05.pfile.gz valid.pfile.gz
107 | )
108 | 
109 | echo =====================================================================
110 | echo "                          CNN Fine-tuning                          "
111 | echo =====================================================================
112 | # CNN is configed in the way that it has (approximately) the same number of trainable parameters as DNN
113 | # (e.g., the DNN in run-dnn-fbank.sh). Also, we adopt "--momentum 0.9" becuase CNN over filterbanks seems
114 | # to converge slowly. So we increase momentum to speed up convergence.
115 | if [ ! -f $working_dir/cnn.fine.done ]; then
116 |   echo "Fine-tuning CNN"
117 |   $cmd $working_dir/log/cnn.fine.log \
118 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
119 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
120 |     $pythonCMD pdnn/cmds/run_CNN.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \
121 |                           --valid-data "$working_dir/valid.pfile.gz,partition=400m,random=true,stream=false" \
122 |                           --conv-nnet-spec "3x11x40:147,9x9,p1x3:147,3x4,p1x1,f" \
123 |                           --nnet-spec "1024:1024:1024:$num_pdfs" \
124 |                           --lrate "D:0.08:0.5:0.2,0.2:4" --momentum 0.9 \
125 |                           --wdir $working_dir --param-output-file $working_dir/nnet.param \
126 |                           --cfg-output-file $working_dir/nnet.cfg --kaldi-output-file $working_dir/dnn.nnet || exit 1;
127 |   touch $working_dir/cnn.fine.done
128 | fi
129 | 
130 | echo =====================================================================
131 | echo "                Dump Convolution-Layer Activation                  "
132 | echo =====================================================================
133 | mkdir -p $working_dir/data_conv
134 | for set in dev test; do
135 |   if [ ! -d $working_dir/data_conv/$set ]; then
136 |     steps_pdnn/make_conv_feat.sh --nj 12 --cmd "$decode_cmd" \
137 |       $working_dir/data_conv/$set $working_dir/data/$set $working_dir $working_dir/nnet.param \
138 |       $working_dir/nnet.cfg $working_dir/_log $working_dir/_conv || exit 1;
139 |     # Generate *fake* CMVN states here.
140 |     steps/compute_cmvn_stats.sh --fake \
141 |       $working_dir/data_conv/$set $working_dir/_log $working_dir/_conv || exit 1;
142 |   fi
143 | done
144 | 
145 | echo =====================================================================
146 | echo "                           Decoding                                "
147 | echo =====================================================================
148 | # In decoding, we take the convolution-layer activation as inputs and the 
149 | # fully-connected layers as the DNN model. So we set --norm-vars, --add-deltas
150 | # and --splice-opts accordingly.
151 | if [ ! -f  $working_dir/decode.done ]; then
152 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
153 |   graph_dir=$gmmdir/graph
154 |   steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \
155 |     --norm-vars false --add-deltas false --splice-opts "--left-context=0 --right-context=0" \
156 |     $graph_dir $working_dir/data_conv/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1;
157 |   steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \
158 |     --norm-vars false --add-deltas false --splice-opts "--left-context=0 --right-context=0" \
159 |     $graph_dir $working_dir/data_conv/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1;
160 |   touch $working_dir/decode.done
161 | fi
162 | 
163 | echo "Finish !!"
164 | 


--------------------------------------------------------------------------------
/run_timit/run-dnn-fbank.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014    Yajie Miao   Carnegie Mellon University       Apache 2.0
  4 | # This is the script that trains DNN system over the filterbank features. It
  5 | # is to  be  run after run.sh. Before running this, you should already build
  6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn"
  7 | # toolkit to train the DNN. The input filterbank features are with mean  and
  8 | # variance normalization.
  9 | 
 10 | # For more informaiton regarding the recipes and results, visit the webiste
 11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 12 | 
 13 | working_dir=exp_pdnn/dnn_fbank
 14 | gmmdir=exp/tri3
 15 | 
 16 | # Specify the gpu device to be used
 17 | gpu=gpu
 18 | 
 19 | cmd=run.pl
 20 | . cmd.sh
 21 | [ -f path.sh ] && . ./path.sh
 22 | . parse_options.sh || exit 1;
 23 | 
 24 | # At this point you may want to make sure the directory $working_dir is
 25 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 26 | if [ ! -d pdnn ]; then
 27 |   echo "Checking out PDNN code."
 28 |   svn co https://github.com/yajiemiao/pdnn/trunk pdnn
 29 | fi
 30 | 
 31 | if [ ! -d steps_pdnn ]; then
 32 |   echo "Checking out steps_pdnn scripts."
 33 |   svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn
 34 | fi
 35 | 
 36 | if ! nvidia-smi; then
 37 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 38 |   echo "(Note: this script might still work, it would just be slower.)"
 39 | fi
 40 | 
 41 | # The hope here is that Theano has been installed either to python or to python2.6
 42 | pythonCMD=python
 43 | if ! python -c 'import theano;'; then
 44 |   if ! python2.6 -c 'import theano;'; then
 45 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 46 |     echo "(Note: this script might still work, it would just be slower.)"
 47 |     exit 1;
 48 |   else
 49 |     pythonCMD=python2.6
 50 |   fi
 51 | fi
 52 | 
 53 | mkdir -p $working_dir/log
 54 | 
 55 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 56 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 57 | 
 58 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 59 | 
 60 | echo =====================================================================
 61 | echo "           Data Split & Alignment & Feature Preparation            "
 62 | echo =====================================================================
 63 | # Split training data into traing and cross-validation sets for DNN
 64 | if [ ! -d data/train_tr95 ]; then
 65 |   utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1
 66 | fi
 67 | # Alignment on the training and validation data
 68 | for set in tr95 cv05; do
 69 |   if [ ! -d ${gmmdir}_ali_$set ]; then
 70 |     steps/align_fmllr.sh --nj 16 --cmd "$train_cmd" \
 71 |       data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1
 72 |   fi
 73 | done
 74 | 
 75 | # Generate the fbank features: 40-dimensional fbanks on each frame
 76 | echo "--num-mel-bins=40" > conf/fbank.conf
 77 | mkdir -p $working_dir/data
 78 | for set in train_tr95 train_cv05 dev test; do
 79 |   if [ ! -d $working_dir/data/$set ]; then
 80 |     cp -r data/$set $working_dir/data/$set
 81 |     ( cd $working_dir/data/$set; rm -rf {cmvn,feats}.scp split*; )
 82 |     steps/make_fbank.sh --cmd "$train_cmd" --nj 16 $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1;
 83 |     steps/compute_cmvn_stats.sh $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1;
 84 |   fi
 85 | done
 86 | 
 87 | echo =====================================================================
 88 | echo "               Training and Cross-Validation Pfiles                "
 89 | echo =====================================================================
 90 | # By default, DNN inputs include 11 frames of filterbanks
 91 | for set in tr95 cv05; do
 92 |   if [ ! -f $working_dir/${set}.pfile.done ]; then
 93 |     steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --norm-vars true \
 94 |       --splice-opts "--left-context=5 --right-context=5" \
 95 |       $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1
 96 |     ( cd $working_dir; mv concat.pfile ${set}.pfile; gzip ${set}.pfile; )
 97 |     touch $working_dir/${set}.pfile.done
 98 |   fi
 99 | done
100 | # Rename pfiles to keep consistency
101 | ( cd $working_dir;
102 |   ln -s tr95.pfile.gz train.pfile.gz; ln -s cv05.pfile.gz valid.pfile.gz
103 | )
104 | 
105 | echo =====================================================================
106 | echo "                  DNN Pre-training & Fine-tuning                   "
107 | echo =====================================================================
108 | feat_dim=$(gunzip -c $working_dir/train.pfile.gz |head |grep num_features| awk '{print $2}') || exit 1;
109 | 
110 | if [ ! -f $working_dir/dnn.ptr.done ]; then
111 |   echo "RBM Pre-training"
112 |   $cmd $working_dir/log/dnn.ptr.log \
113 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
114 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
115 |     $pythonCMD pdnn/cmds/run_RBM.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \
116 |                                --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" --wdir $working_dir \
117 |                                --ptr-layer-number 4 --param-output-file $working_dir/dnn.ptr || exit 1;
118 |   touch $working_dir/dnn.ptr.done
119 | fi
120 | 
121 | # For SDA pre-training
122 | #$pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \
123 | #                          --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" \
124 | #                          --1stlayer-reconstruct-activation "tanh" \
125 | #                          --wdir $working_dir --param-output-file $working_dir/dnn.ptr \
126 | #                          --ptr-layer-number 4 --epoch-number 5 || exit 1;
127 | 
128 | # To apply dropout, add "--dropout-factor 0.2,0.2,0.2,0.2" and change the value of "--lrate" to "D:0.8:0.5:0.2,0.2:8"
129 | # Check run_timit/RESULTS for the results
130 | 
131 | if [ ! -f $working_dir/dnn.fine.done ]; then
132 |   echo "Fine-tuning DNN"
133 |   $cmd $working_dir/log/dnn.fine.log \
134 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
135 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
136 |     $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \
137 |                           --valid-data "$working_dir/valid.pfile.gz,partition=200m,random=true,stream=false" \
138 |                           --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" \
139 |                           --ptr-file $working_dir/dnn.ptr --ptr-layer-number 4 \
140 |                           --lrate "D:0.08:0.5:0.2,0.2:8" --momentum 0.9 \
141 |                           --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1;
142 |   touch $working_dir/dnn.fine.done
143 | fi
144 | 
145 | echo =====================================================================
146 | echo "                           Decoding                                "
147 | echo =====================================================================
148 | if [ ! -f  $working_dir/decode.done ]; then
149 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
150 |   graph_dir=$gmmdir/graph
151 |   steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \
152 |     $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1;
153 |   steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \
154 |     $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1;
155 | 
156 |   touch $working_dir/decode.done
157 | fi
158 | 
159 | echo "Finish !!"
160 | 


--------------------------------------------------------------------------------
/run_timit/run-dnn-maxout.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014     Yajie Miao   Carnegie Mellon University       Apache 2.0
  4 | # This script trains  Maxout Network  models over fMLLR features. It is to be
  5 | # run after run.sh. Before running this, you should already build the initial
  6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train
  7 | # the DNN. 
  8 | 
  9 | # We implement the <Maxout> activation function, based on Kaldi "revision 4960".
 10 | # Please follow the following steps:
 11 | # 1. Go to /path/to/kaldi/src/nnet and *backup* nnet-component.h, nnet-component.cc, nnet-activation.h
 12 | # 2. Download these 3 files from here:
 13 | #    http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/nnet-component.h
 14 | #    http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/nnet-component.cc
 15 | #    http://www.cs.cmu.edu/~ymiao/codes/kaldipdnn/nnet-activation.h
 16 | # 3. Recompile Kaldi
 17 | 
 18 | # For more informaiton regarding the recipes and results, visit the webiste
 19 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 20 | 
 21 | working_dir=exp_pdnn/dnn_maxout
 22 | gmmdir=exp/tri3
 23 | 
 24 | # Specify the gpu device to be used
 25 | gpu=gpu
 26 | 
 27 | cmd=run.pl
 28 | . cmd.sh
 29 | [ -f path.sh ] && . ./path.sh
 30 | . parse_options.sh || exit 1;
 31 | 
 32 | # At this point you may want to make sure the directory $working_dir is
 33 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 34 | if [ ! -d pdnn ]; then
 35 |   echo "Checking out PDNN code."
 36 |   svn co https://github.com/yajiemiao/pdnn/trunk pdnn
 37 | fi
 38 | 
 39 | if [ ! -d steps_pdnn ]; then
 40 |   echo "Checking out steps_pdnn scripts."
 41 |   svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn
 42 | fi
 43 | 
 44 | if ! nvidia-smi; then
 45 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 46 |   echo "(Note: this script might still work, it would just be slower.)"
 47 | fi
 48 | 
 49 | # The hope here is that Theano has been installed either to python or to python2.6
 50 | pythonCMD=python
 51 | if ! python -c 'import theano;'; then
 52 |   if ! python2.6 -c 'import theano;'; then
 53 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 54 |     echo "(Note: this script might still work, it would just be slower.)"
 55 |     exit 1;
 56 |   else
 57 |     pythonCMD=python2.6
 58 |   fi
 59 | fi
 60 | 
 61 | mkdir -p $working_dir/log
 62 | 
 63 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 64 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 65 | 
 66 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 67 | 
 68 | echo =====================================================================
 69 | echo "           Data Split & Alignment & Feature Preparation            "
 70 | echo =====================================================================
 71 | # Split training data into traing and cross-validation sets for DNN
 72 | if [ ! -d data/train_tr95 ]; then
 73 |   utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1
 74 | fi
 75 | # Alignment on the training and validation data
 76 | for set in tr95 cv05; do
 77 |   if [ ! -d ${gmmdir}_ali_$set ]; then
 78 |     steps/align_fmllr.sh --nj 16 --cmd "$train_cmd" \
 79 |       data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1
 80 |   fi
 81 | done
 82 | 
 83 | # Dump fMLLR features. "Fake" cmvn states (0 means and 1 variance) are applied. 
 84 | for set in tr95 cv05; do
 85 |   if [ ! -d $working_dir/data/train_$set ]; then
 86 |     steps/nnet/make_fmllr_feats.sh --nj 16 --cmd "$train_cmd" \
 87 |       --transform-dir ${gmmdir}_ali_$set \
 88 |       $working_dir/data/train_$set data/train_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 89 |     steps/compute_cmvn_stats.sh --fake \
 90 |       $working_dir/data/train_$set $working_dir/_log $working_dir/_fmllr || exit 1;
 91 |   fi
 92 | done
 93 | for set in dev test; do
 94 |   if [ ! -d $working_dir/data/$set ]; then
 95 |     steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
 96 |       --transform-dir $gmmdir/decode_$set \
 97 |       $working_dir/data/$set data/$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 98 |     steps/compute_cmvn_stats.sh --fake \
 99 |       $working_dir/data/$set $working_dir/_log $working_dir/_fmllr || exit 1;
100 |   fi
101 | done
102 | 
103 | echo =====================================================================
104 | echo "               Training and Cross-Validation Pfiles                "
105 | echo =====================================================================
106 | # By default, DNN inputs include 11 frames of fMLLR
107 | for set in tr95 cv05; do
108 |   if [ ! -f $working_dir/${set}.pfile.done ]; then
109 |     steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --norm-vars false \
110 |       --splice-opts "--left-context=5 --right-context=5" \
111 |       $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1
112 |     ( cd $working_dir; mv concat.pfile ${set}.pfile; gzip ${set}.pfile; )
113 |     touch $working_dir/${set}.pfile.done
114 |   fi
115 | done
116 | # Rename pfiles to keep consistency
117 | ( cd $working_dir;
118 |   ln -s tr95.pfile.gz train.pfile.gz; ln -s cv05.pfile.gz valid.pfile.gz
119 | )
120 | 
121 | echo =====================================================================
122 | echo "                  DNN Pre-training & Fine-tuning                   "
123 | echo =====================================================================
124 | # Here we use maxout networks. When using maxout, we need to reduce the learning rate. To apply dropout,
125 | # add "--dropout-factor 0.2,0.2,0.2,0.2" and change the value of "--lrate" to "D:0.1:0.5:0.2,0.2:8"
126 | # Check run_timit/RESULTS for the results
127 | 
128 | # The network structure is set in the way that this maxout network has approximately the same number of
129 | # parameters as the DNN model in run-dnn.sh
130 | 
131 | feat_dim=$(gunzip -c $working_dir/train.pfile.gz |head |grep num_features| awk '{print $2}') || exit 1;
132 | 
133 | if [ ! -f $working_dir/dnn.fine.done ]; then
134 |   echo "Fine-tuning DNN"
135 |   $cmd $working_dir/log/dnn.fine.log \
136 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
137 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
138 |     $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \
139 |                           --valid-data "$working_dir/valid.pfile.gz,partition=200m,random=true,stream=false" \
140 |                           --nnet-spec "$feat_dim:625:625:625:625:$num_pdfs" \
141 |                           --activation "maxout:3" \
142 |                           --lrate "D:0.008:0.5:0.2,0.2:8" \
143 |                           --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1;
144 |   touch $working_dir/dnn.fine.done
145 | fi
146 | 
147 | echo =====================================================================
148 | echo "                           Decoding                                "
149 | echo =====================================================================
150 | if [ ! -f  $working_dir/decode.done ]; then
151 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
152 |   graph_dir=$gmmdir/graph
153 |   steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \
154 |     $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1;
155 |   steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \
156 |     $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1;
157 |   touch $working_dir/decode.done
158 | fi
159 | 
160 | echo "Finish !!"
161 | 


--------------------------------------------------------------------------------
/run_timit/run-dnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014     Yajie Miao   Carnegie Mellon University       Apache 2.0
  4 | # This is the script  that trains DNN model over fMLLR features.  It is to be
  5 | # run after run.sh. Before running this, you should already build the initial
  6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train
  7 | # the DNN.
  8 | 
  9 | # For more informaiton regarding the recipes and results, visit the webiste
 10 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 11 | 
 12 | working_dir=exp_pdnn/dnn
 13 | gmmdir=exp/tri3
 14 | 
 15 | # Specify the gpu device to be used
 16 | gpu=gpu
 17 | 
 18 | cmd=run.pl
 19 | . cmd.sh
 20 | [ -f path.sh ] && . ./path.sh
 21 | . parse_options.sh || exit 1;
 22 | 
 23 | # At this point you may want to make sure the directory $working_dir is
 24 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 25 | if [ ! -d pdnn ]; then
 26 |   echo "Checking out PDNN code."
 27 |   svn co https://github.com/yajiemiao/pdnn/trunk pdnn
 28 | fi
 29 | 
 30 | if [ ! -d steps_pdnn ]; then
 31 |   echo "Checking out steps_pdnn scripts."
 32 |   svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn
 33 | fi
 34 | 
 35 | if ! nvidia-smi; then
 36 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 37 |   echo "(Note: this script might still work, it would just be slower.)"
 38 | fi
 39 | 
 40 | # The hope here is that Theano has been installed either to python or to python2.6
 41 | pythonCMD=python
 42 | if ! python -c 'import theano;'; then
 43 |   if ! python2.6 -c 'import theano;'; then
 44 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 45 |     echo "(Note: this script might still work, it would just be slower.)"
 46 |     exit 1;
 47 |   else
 48 |     pythonCMD=python2.6
 49 |   fi
 50 | fi
 51 | 
 52 | mkdir -p $working_dir/log
 53 | 
 54 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 55 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 56 | 
 57 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 58 | 
 59 | echo =====================================================================
 60 | echo "           Data Split & Alignment & Feature Preparation            "
 61 | echo =====================================================================
 62 | # Split training data into traing and cross-validation sets for DNN
 63 | if [ ! -d data/train_tr95 ]; then
 64 |   utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train data/train_tr95 data/train_cv05 || exit 1
 65 | fi
 66 | # Alignment on the training and validation data
 67 | for set in tr95 cv05; do
 68 |   if [ ! -d ${gmmdir}_ali_$set ]; then
 69 |     steps/align_fmllr.sh --nj 16 --cmd "$train_cmd" \
 70 |       data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1
 71 |   fi
 72 | done
 73 | 
 74 | # Dump fMLLR features. "Fake" cmvn states (0 means and 1 variance) are applied. 
 75 | for set in tr95 cv05; do
 76 |   if [ ! -d $working_dir/data/train_$set ]; then
 77 |     steps/nnet/make_fmllr_feats.sh --nj 16 --cmd "$train_cmd" \
 78 |       --transform-dir ${gmmdir}_ali_$set \
 79 |       $working_dir/data/train_$set data/train_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 80 |     steps/compute_cmvn_stats.sh --fake \
 81 |       $working_dir/data/train_$set $working_dir/_log $working_dir/_fmllr || exit 1;
 82 |   fi
 83 | done
 84 | for set in dev test; do
 85 |   if [ ! -d $working_dir/data/$set ]; then
 86 |     steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
 87 |       --transform-dir $gmmdir/decode_$set \
 88 |       $working_dir/data/$set data/$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 89 |     steps/compute_cmvn_stats.sh --fake \
 90 |       $working_dir/data/$set $working_dir/_log $working_dir/_fmllr || exit 1;
 91 |   fi
 92 | done
 93 | 
 94 | echo =====================================================================
 95 | echo "               Training and Cross-Validation Pfiles                "
 96 | echo =====================================================================
 97 | # By default, DNN inputs include 11 frames of fMLLR
 98 | for set in tr95 cv05; do
 99 |   if [ ! -f $working_dir/${set}.pfile.done ]; then
100 |     steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --norm-vars false \
101 |       --splice-opts "--left-context=5 --right-context=5" \
102 |       $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1
103 |     ( cd $working_dir; mv concat.pfile ${set}.pfile; gzip ${set}.pfile; )
104 |     touch $working_dir/${set}.pfile.done
105 |   fi
106 | done
107 | # Rename pfiles to keep consistency
108 | ( cd $working_dir;
109 |   ln -s tr95.pfile.gz train.pfile.gz; ln -s cv05.pfile.gz valid.pfile.gz
110 | )
111 | 
112 | echo =====================================================================
113 | echo "                  DNN Pre-training & Fine-tuning                   "
114 | echo =====================================================================
115 | feat_dim=$(gunzip -c $working_dir/train.pfile.gz |head |grep num_features| awk '{print $2}') || exit 1;
116 | 
117 | if [ ! -f $working_dir/dnn.ptr.done ]; then
118 |   echo "RBM Pre-training"
119 |   $cmd $working_dir/log/dnn.ptr.log \
120 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
121 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
122 |     $pythonCMD pdnn/cmds/run_RBM.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \
123 |                                --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" --wdir $working_dir \
124 |                                --ptr-layer-number 4 --param-output-file $working_dir/dnn.ptr || exit 1;
125 |   touch $working_dir/dnn.ptr.done
126 | fi
127 | 
128 | # For SDA pre-training
129 | #$pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \
130 | #                          --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" \
131 | #                          --1stlayer-reconstruct-activation "tanh" \
132 | #                          --wdir $working_dir --param-output-file $working_dir/dnn.ptr \
133 | #                          --ptr-layer-number 4 --epoch-number 5 || exit 1;
134 | 
135 | # To apply dropout, add "--dropout-factor 0.2,0.2,0.2,0.2" and change the value of "--lrate" to "D:0.8:0.5:0.2,0.2:8"
136 | # Check run_timit/RESULTS for the results
137 | 
138 | if [ ! -f $working_dir/dnn.fine.done ]; then
139 |   echo "Fine-tuning DNN"
140 |   $cmd $working_dir/log/dnn.fine.log \
141 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
142 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
143 |     $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train.pfile.gz,partition=1000m,random=true,stream=false" \
144 |                           --valid-data "$working_dir/valid.pfile.gz,partition=200m,random=true,stream=false" \
145 |                           --nnet-spec "$feat_dim:1024:1024:1024:1024:$num_pdfs" \
146 |                           --ptr-file $working_dir/dnn.ptr --ptr-layer-number 4 \
147 |                           --lrate "D:0.08:0.5:0.2,0.2:8" \
148 |                           --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1;
149 |   touch $working_dir/dnn.fine.done
150 | fi
151 | 
152 | echo =====================================================================
153 | echo "                           Decoding                                "
154 | echo =====================================================================
155 | if [ ! -f  $working_dir/decode.done ]; then
156 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
157 |   graph_dir=$gmmdir/graph
158 |   steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \
159 |     $graph_dir $working_dir/data/dev ${gmmdir}_ali_tr95 $working_dir/decode_dev || exit 1;
160 |   steps_pdnn/decode_dnn.sh --nj 12 --scoring-opts "--min-lmwt 1 --max-lmwt 8" --cmd "$decode_cmd" \
161 |     $graph_dir $working_dir/data/test ${gmmdir}_ali_tr95 $working_dir/decode_test || exit 1;
162 |   touch $working_dir/decode.done
163 | fi
164 | 
165 | echo "Finish !!"
166 | 


--------------------------------------------------------------------------------
/run_wsj/RESULTS:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # results of tri4b, the SAT system 
 4 | %WER 9.39 [ 773 / 8234, 103 ins, 108 del, 562 sub ] exp/tri4b/decode_bd_tgpr_dev93/wer_16
 5 | %WER 6.15 [ 347 / 5643, 66 ins, 25 del, 256 sub ] exp/tri4b/decode_bd_tgpr_eval92/wer_14
 6 | 
 7 | # below are results of the pdnn recipes
 8 | 
 9 | # run-dnn.sh
10 | %WER 7.18 [ 591 / 8234, 74 ins, 92 del, 425 sub ] exp_pdnn/dnn/decode_bd_tgpr_dev93/wer_11
11 | %WER 4.08 [ 230 / 5643, 29 ins, 21 del, 180 sub ] exp_pdnn/dnn/decode_bd_tgpr_eval92/wer_11
12 | 
13 | # run-bnf-tandem.sh -- apply mean normalization over BNF; the default config
14 | %WER 7.66 [ 631 / 8234, 86 ins, 94 del, 451 sub ] exp_pdnn/bnf_tandem/tri5a/decode_bd_tgpr_dev93/wer_32
15 | %WER 4.61 [ 260 / 5643, 41 ins, 18 del, 201 sub ] exp_pdnn/bnf_tandem/tri5a/decode_bd_tgpr_eval92/wer_33
16 | %WER 6.72 [ 553 / 8234, 55 ins, 95 del, 403 sub ] exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_bd_tgpr_dev93_it4/wer_30
17 | %WER 3.81 [ 215 / 5643, 22 ins, 13 del, 180 sub ] exp_pdnn/bnf_tandem/tri5a_mmi_b0.1/decode_bd_tgpr_eval92_it4/wer_32
18 | %WER 7.10 [ 585 / 8234, 66 ins, 102 del, 417 sub ] exp_pdnn/bnf_tandem/sgmm5a/decode_bd_tgpr_dev93/wer_26
19 | %WER 4.27 [ 241 / 5643, 30 ins, 17 del, 194 sub ] exp_pdnn/bnf_tandem/sgmm5a/decode_bd_tgpr_eval92/wer_26
20 | %WER 6.64 [ 547 / 8234, 69 ins, 85 del, 393 sub ] exp_pdnn/bnf_tandem/sgmm5a_mmi_b0.1/decode_bd_tgpr_dev93_it4/wer_20
21 | %WER 3.69 [ 208 / 5643, 27 ins, 10 del, 171 sub ] exp_pdnn/bnf_tandem/sgmm5a_mmi_b0.1/decode_bd_tgpr_eval92_it4/wer_20
22 | 
23 | # run-dnn-fbank.sh
24 | %WER 7.38 [ 608 / 8234, 80 ins, 90 del, 438 sub ] exp_pdnn/dnn_fbank/decode_bd_tgpr_dev93/wer_10
25 | %WER 4.27 [ 241 / 5643, 36 ins, 14 del, 191 sub ] exp_pdnn/dnn_fbank/decode_bd_tgpr_eval92/wer_9
26 | 
27 | # run-cnn.sh
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/run_wsj/run-dnn-fbank.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014     Yajie Miao  Carnegie Mellon University       Apache 2.0
  4 | # This is the script that trains DNN system over the filterbank features. It
  5 | # is to  be  run after run.sh. Before running this, you should already build
  6 | # the initial GMM model. This script requires a GPU card, and also the "pdnn"
  7 | # toolkit to train the DNN. The input filterbank features are with mean  and
  8 | # variance normalization.
  9 | 
 10 | # For more informaiton regarding the recipes and results, visit the webiste
 11 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 12 | 
 13 | working_dir=exp_pdnn/dnn_fbank
 14 | gmmdir=exp/tri4b
 15 | 
 16 | # Specify the gpu device to be used
 17 | gpu=gpu
 18 | 
 19 | cmd=run.pl
 20 | . cmd.sh
 21 | [ -f path.sh ] && . ./path.sh
 22 | . parse_options.sh || exit 1;
 23 | 
 24 | # At this point you may want to make sure the directory $working_dir is
 25 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 26 | if [ ! -d pdnn ]; then
 27 |   echo "Checking out PDNN code."
 28 |   svn co https://github.com/yajiemiao/pdnn/trunk pdnn
 29 | fi
 30 | 
 31 | if [ ! -d steps_pdnn ]; then
 32 |   echo "Checking out steps_pdnn scripts."
 33 |   svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn
 34 | fi
 35 | 
 36 | if ! nvidia-smi; then
 37 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 38 |   echo "(Note: this script might still work, it would just be slower.)"
 39 | fi
 40 | 
 41 | # The hope here is that Theano has been installed either to python or to python2.6
 42 | pythonCMD=python
 43 | if ! python -c 'import theano;'; then
 44 |   if ! python2.6 -c 'import theano;'; then
 45 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 46 |     echo "(Note: this script might still work, it would just be slower.)"
 47 |     exit 1;
 48 |   else
 49 |     pythonCMD=python2.6
 50 |   fi
 51 | fi
 52 | 
 53 | mkdir -p $working_dir/log
 54 | 
 55 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 56 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 57 | 
 58 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 59 | 
 60 | echo =====================================================================
 61 | echo "           Data Split & Alignment & Feature Preparation            "
 62 | echo =====================================================================
 63 | # Split training data into traing and cross-validation sets for DNN
 64 | if [ ! -d data/train_tr95 ]; then
 65 |   utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train_si284 data/train_tr95 data/train_cv05 || exit 1
 66 | fi
 67 | # Alignment on the training and validation data
 68 | for set in tr95 cv05; do
 69 |   if [ ! -d ${gmmdir}_ali_$set ]; then
 70 |     steps/align_fmllr.sh --nj 14 --cmd "$train_cmd" \
 71 |       data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1
 72 |   fi
 73 | done
 74 | 
 75 | # Generate the fbank features: 40-dimensional fbanks on each frame
 76 | echo "--num-mel-bins=40" > conf/fbank.conf
 77 | mkdir -p $working_dir/data
 78 | for set in train_tr95 train_cv05; do
 79 |   if [ ! -d $working_dir/data/$set ]; then
 80 |     cp -r data/$set $working_dir/data/$set
 81 |     ( cd $working_dir/data/$set; rm -rf {cmvn,feats}.scp split*; )
 82 |     steps/make_fbank.sh --cmd "$train_cmd" --nj 14 $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1;
 83 |     steps/compute_cmvn_stats.sh $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1;
 84 |   fi
 85 | done
 86 | 
 87 | for set in dev93 eval92; do
 88 |   if [ ! -d $working_dir/data/$set ]; then
 89 |     cp -r data/test_$set $working_dir/data/$set
 90 |     ( cd $working_dir/data/$set; rm -rf {cmvn,feats}.scp split*; )
 91 |     steps/make_fbank.sh --cmd "$train_cmd" --nj 8 $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1;
 92 |     steps/compute_cmvn_stats.sh $working_dir/data/$set $working_dir/_log $working_dir/_fbank || exit 1;
 93 |   fi
 94 | done
 95 | 
 96 | echo =====================================================================
 97 | echo "               Training and Cross-Validation Pfiles                "
 98 | echo =====================================================================
 99 | # By default, DNN inputs include 11 frames of filterbanks
100 | for set in tr95 cv05; do
101 |   if [ ! -f $working_dir/${set}.pfile.done ]; then
102 |     steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --norm-vars true --do-concat false \
103 |       --splice-opts "--left-context=5 --right-context=5" \
104 |       $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1
105 |     touch $working_dir/${set}.pfile.done
106 |   fi
107 | done
108 | 
109 | echo =====================================================================
110 | echo "                  DNN Pre-training & Fine-tuning                   "
111 | echo =====================================================================
112 | feat_dim=$(gunzip -c $working_dir/train_tr95.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1;
113 | 
114 | if [ ! -f $working_dir/dnn.ptr.done ]; then
115 |   echo "SDA Pre-training"
116 |   $cmd $working_dir/log/dnn.ptr.log \
117 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
118 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
119 |     $pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=false" \
120 |                                     --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
121 |                                     --1stlayer-reconstruct-activation "tanh" \
122 |                                     --wdir $working_dir --param-output-file $working_dir/dnn.ptr \
123 |                                     --ptr-layer-number 6 --epoch-number 5 || exit 1;
124 |   touch $working_dir/dnn.ptr.done
125 | fi
126 | 
127 | if [ ! -f $working_dir/dnn.fine.done ]; then
128 |   echo "Fine-tuning DNN"
129 |   $cmd $working_dir/log/dnn.fine.log \
130 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
131 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
132 |     $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=true" \
133 |                                     --valid-data "$working_dir/train_cv05.pfile.*.gz,partition=600m,random=true,stream=true" \
134 |                                     --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
135 |                                     --ptr-file $working_dir/dnn.ptr --ptr-layer-number 6 \
136 |                                     --lrate "D:0.08:0.5:0.2,0.2:8" \
137 |                                     --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1;
138 |   touch $working_dir/dnn.fine.done
139 | fi
140 | 
141 | echo =====================================================================
142 | echo "                           Decoding                                "
143 | echo =====================================================================
144 | if [ ! -f  $working_dir/decode.done ]; then
145 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
146 |   graph_dir=$gmmdir/graph_bd_tgpr
147 |   steps_pdnn/decode_dnn.sh --nj 10 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
148 |     $graph_dir $working_dir/data/dev93 ${gmmdir}_ali_tr95 $working_dir/decode_bd_tgpr_dev93 || exit 1;
149 |   steps_pdnn/decode_dnn.sh --nj 8 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
150 |     $graph_dir $working_dir/data/eval92 ${gmmdir}_ali_tr95 $working_dir/decode_bd_tgpr_eval92 || exit 1;
151 |   touch $working_dir/decode.done
152 | fi
153 | 
154 | echo "Finish !!"
155 | 


--------------------------------------------------------------------------------
/run_wsj/run-dnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014     Yajie Miao   Carnegie Mellon University       Apache 2.0
  4 | # This is the script  that trains DNN model over fMLLR features.  It is to be
  5 | # run after run.sh. Before running this, you should already build the initial
  6 | # GMM model. This script requires a GPU, and also the "pdnn" toolkit to train
  7 | # the DNN.
  8 | 
  9 | # For more informaiton regarding the recipes and results, visit the webiste
 10 | # http://www.cs.cmu.edu/~ymiao/kaldipdnn
 11 | 
 12 | working_dir=exp_pdnn/dnn
 13 | gmmdir=exp/tri4b
 14 | 
 15 | # Specify the gpu device to be used
 16 | gpu=gpu
 17 | 
 18 | cmd=run.pl
 19 | . cmd.sh
 20 | [ -f path.sh ] && . ./path.sh
 21 | . parse_options.sh || exit 1;
 22 | 
 23 | # At this point you may want to make sure the directory $working_dir is
 24 | # somewhere with a lot of space, preferably on the local GPU-containing machine.
 25 | if [ ! -d pdnn ]; then
 26 |   echo "Checking out PDNN code."
 27 |   svn co https://github.com/yajiemiao/pdnn/trunk pdnn
 28 | fi
 29 | 
 30 | if [ ! -d steps_pdnn ]; then
 31 |   echo "Checking out steps_pdnn scripts."
 32 |   svn co https://github.com/yajiemiao/kaldipdnn/trunk/steps_pdnn steps_pdnn
 33 | fi
 34 | 
 35 | if ! nvidia-smi; then
 36 |   echo "The command nvidia-smi was not found: this probably means you don't have a GPU."
 37 |   echo "(Note: this script might still work, it would just be slower.)"
 38 | fi
 39 | 
 40 | # The hope here is that Theano has been installed either to python or to python2.6
 41 | pythonCMD=python
 42 | if ! python -c 'import theano;'; then
 43 |   if ! python2.6 -c 'import theano;'; then
 44 |     echo "Theano does not seem to be installed on your machine.  Not continuing."
 45 |     echo "(Note: this script might still work, it would just be slower.)"
 46 |     exit 1;
 47 |   else
 48 |     pythonCMD=python2.6
 49 |   fi
 50 | fi
 51 | 
 52 | mkdir -p $working_dir/log
 53 | 
 54 | ! gmm-info $gmmdir/final.mdl >&/dev/null && \
 55 |    echo "Error getting GMM info from $gmmdir/final.mdl" && exit 1;
 56 | 
 57 | num_pdfs=`gmm-info $gmmdir/final.mdl | grep pdfs | awk '{print $NF}'` || exit 1;
 58 | 
 59 | echo =====================================================================
 60 | echo "           Data Split & Alignment & Feature Preparation            "
 61 | echo =====================================================================
 62 | # Split training data into traing and cross-validation sets for DNN
 63 | if [ ! -d data/train_tr95 ]; then
 64 |   utils/subset_data_dir_tr_cv.sh --cv-spk-percent 5 data/train_si284 data/train_tr95 data/train_cv05 || exit 1
 65 | fi
 66 | # Alignment on the training and validation data. We set --nj to 14 because data/train_cv05 has 14 speakers.
 67 | for set in tr95 cv05; do
 68 |   if [ ! -d ${gmmdir}_ali_$set ]; then
 69 |     steps/align_fmllr.sh --nj 14 --cmd "$train_cmd" \
 70 |       data/train_$set data/lang $gmmdir ${gmmdir}_ali_$set || exit 1
 71 |   fi
 72 | done
 73 | # Dump fMLLR features. "Fake" cmvn states (0 means and 1 variance) are applied. 
 74 | for set in tr95 cv05; do
 75 |   if [ ! -d $working_dir/data/train_$set ]; then
 76 |     steps/nnet/make_fmllr_feats.sh --nj 14 --cmd "$train_cmd" \
 77 |       --transform-dir ${gmmdir}_ali_$set \
 78 |       $working_dir/data/train_$set data/train_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 79 |     steps/compute_cmvn_stats.sh --fake \
 80 |       $working_dir/data/train_$set $working_dir/_log $working_dir/_fmllr || exit 1;
 81 |   fi
 82 | done
 83 | for set in dev93 eval92; do
 84 |   if [ ! -d $working_dir/data/$set ]; then
 85 |     steps/nnet/make_fmllr_feats.sh --nj 8 --cmd "$train_cmd" \
 86 |       --transform-dir $gmmdir/decode_bd_tgpr_$set \
 87 |       $working_dir/data/$set data/test_$set $gmmdir $working_dir/_log $working_dir/_fmllr || exit 1
 88 |     steps/compute_cmvn_stats.sh --fake \
 89 |       $working_dir/data/$set $working_dir/_log $working_dir/_fmllr || exit 1;
 90 |   fi
 91 | done
 92 | 
 93 | echo =====================================================================
 94 | echo "               Training and Cross-Validation Pfiles                "
 95 | echo =====================================================================
 96 | # By default, DNN inputs include 11 frames of fMLLR
 97 | for set in tr95 cv05; do
 98 |   if [ ! -f $working_dir/${set}.pfile.done ]; then
 99 |     steps_pdnn/build_nnet_pfile.sh --cmd "$train_cmd" --norm-vars false --do-concat false \
100 |       --splice-opts "--left-context=5 --right-context=5" \
101 |       $working_dir/data/train_$set ${gmmdir}_ali_$set $working_dir || exit 1
102 |     touch $working_dir/${set}.pfile.done
103 |   fi
104 | done
105 | 
106 | echo =====================================================================
107 | echo "                  DNN Pre-training & Fine-tuning                   "
108 | echo =====================================================================
109 | feat_dim=$(gunzip -c $working_dir/train_tr95.pfile.1.gz |head |grep num_features| awk '{print $2}') || exit 1;
110 | 
111 | if [ ! -f $working_dir/dnn.ptr.done ]; then
112 |   echo "SDA Pre-training"
113 |   $cmd $working_dir/log/dnn.ptr.log \
114 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
115 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
116 |     $pythonCMD pdnn/cmds/run_SdA.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=true" \
117 |                                     --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
118 |                                     --1stlayer-reconstruct-activation "tanh" \
119 |                                     --wdir $working_dir --param-output-file $working_dir/dnn.ptr \
120 |                                     --ptr-layer-number 6 --epoch-number 5 || exit 1;
121 |   touch $working_dir/dnn.ptr.done
122 | fi
123 | 
124 | # To apply dropout, add "--dropout-factor 0.2,0.2,0.2,0.2,0.2,0.2" and change the value of "--lrate" to "D:0.8:0.5:0.2,0.2:4"
125 | if [ ! -f $working_dir/dnn.fine.done ]; then
126 |   echo "Fine-tuning DNN"
127 |   $cmd $working_dir/log/dnn.fine.log \
128 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
129 |     export THEANO_FLAGS=mode=FAST_RUN,device=$gpu,floatX=float32 \; \
130 |     $pythonCMD pdnn/cmds/run_DNN.py --train-data "$working_dir/train_tr95.pfile.*.gz,partition=2000m,random=true,stream=true" \
131 |                                     --valid-data "$working_dir/train_cv05.pfile.*.gz,partition=600m,random=true,stream=true" \
132 |                                     --nnet-spec "$feat_dim:1024:1024:1024:1024:1024:1024:$num_pdfs" \
133 |                                     --ptr-file $working_dir/dnn.ptr --ptr-layer-number 6 \
134 |                                     --lrate "D:0.08:0.5:0.2,0.2:8" \
135 |                                     --wdir $working_dir --kaldi-output-file $working_dir/dnn.nnet || exit 1;
136 |   touch $working_dir/dnn.fine.done
137 | fi
138 | 
139 | echo =====================================================================
140 | echo "                           Decoding                                "
141 | echo =====================================================================
142 | if [ ! -f  $working_dir/decode.done ]; then
143 |   cp $gmmdir/final.mdl $working_dir || exit 1;  # copy final.mdl for scoring
144 |   graph_dir=$gmmdir/graph_bd_tgpr
145 |   steps_pdnn/decode_dnn.sh --nj 10 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
146 |     $graph_dir $working_dir/data/dev93 ${gmmdir}_ali_tr95 $working_dir/decode_bd_tgpr_dev93 || exit 1;
147 |   steps_pdnn/decode_dnn.sh --nj 8 --scoring-opts "--min-lmwt 7 --max-lmwt 18" --cmd "$decode_cmd" \
148 |     $graph_dir $working_dir/data/eval92 ${gmmdir}_ali_tr95 $working_dir/decode_bd_tgpr_eval92 || exit 1;
149 |   touch $working_dir/decode.done
150 | fi
151 | 
152 | echo "Finish !!"
153 | 


--------------------------------------------------------------------------------
/steps_pdnn/align_nnet.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2012-2013 Brno University of Technology (Author: Karel Vesely)
  3 | # Apache 2.0
  4 | 
  5 | # Aligns 'data' to sequences of transition-ids using Neural Network based acoustic model.
  6 | # Optionally produces alignment in lattice format, this is handy to get word alignment.
  7 | 
  8 | # Begin configuration section.  
  9 | nj=4
 10 | cmd=run.pl
 11 | stage=0
 12 | # Begin configuration.
 13 | scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
 14 | beam=10
 15 | retry_beam=40
 16 | 
 17 | splice_opts=
 18 | norm_vars=
 19 | add_deltas=
 20 | 
 21 | align_to_lats=false # optionally produce alignment in lattice format
 22 |  lats_decode_opts="--acoustic-scale=0.1 --beam=20 --lattice_beam=10"
 23 |  lats_graph_scales="--transition-scale=1.0 --self-loop-scale=0.1"
 24 | 
 25 | use_gpu="no" # yes|no|optionaly
 26 | # End configuration options.
 27 | 
 28 | [ $# -gt 0 ] && echo "$0 $@"  # Print the command line for logging
 29 | 
 30 | [ -f path.sh ] && . ./path.sh # source the path.
 31 | . parse_options.sh || exit 1;
 32 | 
 33 | if [ $# != 4 ]; then
 34 |    echo "usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>"
 35 |    echo "e.g.:  $0 data/train data/lang exp/tri1 exp/tri1_ali"
 36 |    echo "main options (for others, see top of script file)"
 37 |    echo "  --config <config-file>                           # config containing options"
 38 |    echo "  --nj <nj>                                        # number of parallel jobs"
 39 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
 40 |    exit 1;
 41 | fi
 42 | 
 43 | data=$1
 44 | lang=$2
 45 | srcdir=$3
 46 | dir=$4
 47 | 
 48 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
 49 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null`
 50 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null`
 51 | 
 52 | oov=`cat $lang/oov.int` || exit 1;
 53 | mkdir -p $dir/log
 54 | echo $nj > $dir/num_jobs
 55 | sdata=$data/split$nj
 56 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
 57 | 
 58 | cp $srcdir/{tree,final.mdl} $dir || exit 1;
 59 | 
 60 | # Select default locations to model files
 61 | nnet=$srcdir/final.nnet;
 62 | model=$dir/final.mdl
 63 | class_frame_counts=$srcdir/train_class.counts
 64 | 
 65 | # Check that files exist
 66 | for f in $sdata/1/feats.scp $sdata/1/text $lang/L.fst $nnet $model $class_frame_counts; do
 67 |   [ ! -f $f ] && echo "$0: missing file $f" && exit 1;
 68 | done
 69 | 
 70 | # PREPARE FEATURE EXTRACTION PIPELINE
 71 | # Create the feature stream:
 72 | ## Set up the features
 73 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})"
 74 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
 75 | $add_deltas && feats="$feats add-deltas ark:- ark:- |"
 76 | 
 77 | # Finally add feature_transform and the MLP
 78 | feats="$feats nnet-forward --no-softmax=true --class-frame-counts=$class_frame_counts $nnet ark:- ark:- |"
 79 | 
 80 | echo "$0: aligning data '$data' using nnet/model '$srcdir', putting alignments in '$dir'"
 81 | # Map oovs in reference transcription 
 82 | tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
 83 | # We could just use align-mapped in the next line, but it's less efficient as it compiles the
 84 | # training graphs one by one.
 85 | if [ $stage -le 0 ]; then
 86 |   $cmd JOB=1:$nj $dir/log/align.JOB.log \
 87 |     compile-train-graphs $dir/tree $dir/final.mdl  $lang/L.fst "$tra" ark:- \| \
 88 |     align-compiled-mapped $scale_opts --beam=$beam --retry-beam=$retry_beam $dir/final.mdl ark:- \
 89 |       "$feats" "ark,t:|gzip -c >$dir/ali.JOB.gz" || exit 1;
 90 | fi
 91 | 
 92 | # Optionally align to lattice format (handy to get word alignment)
 93 | if [ "$align_to_lats" == "true" ]; then
 94 |   echo "$0: aligning also to lattices '$dir/lat.*.gz'"
 95 |   $cmd JOB=1:$nj $dir/log/align_lat.JOB.log \
 96 |     compile-train-graphs $lat_graph_scale $dir/tree $dir/final.mdl  $lang/L.fst "$tra" ark:- \| \
 97 |     latgen-faster-mapped $lat_decode_opts --word-symbol-table=$lang/words.txt $dir/final.mdl ark:- \
 98 |       "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
 99 | fi
100 | 
101 | echo "$0: done aligning data."
102 | 


--------------------------------------------------------------------------------
/steps_pdnn/build_avnnet_pfile.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2013     Yajie Miao     Carnegie Mellon University
  3 | #           2015     Fei Tao
  4 | # Apache 2.0
  5 | 
  6 | # Create pfiles for deep neural network training. We assume that the training  alignment
  7 | # is ready and features (either fbanks and fMLLRs) have been generated.
  8 | # Refer to the following comments for configurations.
  9 | 
 10 | ## Begin configuration section.  
 11 | stage=1
 12 | every_nth_frame=1 # for subsampling.
 13 | nj=4
 14 | cmd=run.pl
 15 | 
 16 | splice_opts="--left-context=4 --right-context=4" # frame-splicing options for neural net input
 17 | add_deltas=false
 18 | norm_vars=false  # when doing cmvn, whether to normalize variance
 19 | 
 20 | do_concat=true # whether to concatenate the individual pfiles into a single one
 21 | 
 22 | # Config for splitting pfile into training and valid set; not used for SWBD
 23 | do_split=false  # whether to do pfile splitting
 24 | pfile_unit_size=40 # the number of utterances of each small unit into which the whole pfile is chopped 
 25 | cv_ratio=0.05 # the ratio of CV data
 26 | 
 27 | shuffle_scp=false  # whether the feature scp is shuffled
 28 | 
 29 | ## End configuration options.
 30 | 
 31 | echo "$0 $@"  # Print the command line for logging
 32 | 
 33 | [ -f path.sh ] && . ./path.sh # source the path.
 34 | . parse_options.sh || exit 1;
 35 | 
 36 | if [ $# != 5 ]; then
 37 |    echo "Wrong #arguments ($#, expected 4)"
 38 |    echo "usage: steps/build_nnet_pfile.sh <data-dir> <ali-dir> <exp-dir>"
 39 |    echo "e.g.:  steps/build_nnet_pfile.sh data/train exp/tri4_ali exp/tri4_pfile"
 40 |    echo "main options (for others, see top of script file)"
 41 |    echo "  --stage <stage>                                  # starts from which stage"
 42 |    echo "  --nj <nj>                                        # number of parallel jobs"
 43 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
 44 |    exit 1;
 45 | fi
 46 | 
 47 | data=$1
 48 | alidir=$2
 49 | dir=$3
 50 | feat1dim=$4	#The first modality feature dimension
 51 | feat2dim=$5	#The second modality feature dimension
 52 | feat1ind=$(($feat1dim - 1))
 53 | feat2ind=$(($feat2dim - 1))
 54 | 
 55 | name=`basename $data`
 56 | nj=`cat $alidir/num_jobs` || exit 1;
 57 | sdata=$data/split$nj
 58 | 
 59 | if ! which pfile_create >/dev/null; then # pfile_create not on our path.
 60 |   [ -z "$KALDI_ROOT" ] && KALDI_ROOT=`pwd`/../../.. # normal case.
 61 |   try_path=$KALDI_ROOT/tools/pfile_utils-v0_51/bin/
 62 |   if [ -f $try_path/pfile_create ]; then
 63 |     PPATH=$try_path
 64 |   else
 65 |     echo "You do not have pfile_create (part of pfile-utils) on your path,"
 66 |     echo "and it is not accessible in the normal place e.g. $try_path/pfile_create"
 67 |     echo "Try going to KALDI_ROOT/tools/ and running ./install_pfile_utils.sh"
 68 |     exit 1
 69 |   fi
 70 | else
 71 |   PPATH=$(dirname `which pfile_create`)
 72 | fi
 73 | export PATH=$PATH:$PPATH
 74 | 
 75 | mkdir -p $dir/log
 76 | echo $splice_opts > $dir/splice_opts
 77 | echo $norm_vars > $dir/norm_vars
 78 | echo $add_deltas > $dir/add_deltas
 79 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
 80 | 
 81 | ## Setup features
 82 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})"
 83 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
 84 | if $shuffle_scp; then 
 85 |   feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp \"scp:cat $sdata/JOB/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} |\" ark:- | "
 86 |   audiofeats="$feats select-feats 0-$feat1ind ark:- ark:- |"
 87 |   videofeats="$feats select-feats $feat1dim-$feat2ind ark:- ark:- |"
 88 |   audiosplice="$audiofeats splice-feats $splice_opts ark:- ark:- |"
 89 |   videosplice="$videofeats splice-feats $splice_opts ark:- ark:- |"
 90 |   feats="paste-feats ark:$audiosplice ark:$videosplice ark:-"
 91 | fi
 92 | # Add first and second-order deltas if needed
 93 | $add_deltas && feats="$feats add-deltas ark:- ark:- |"
 94 | 
 95 | ## Get the dimension of the features
 96 | $cmd JOB=1:1 $dir/log/get_feat_dim.log \
 97 |     feat-to-dim "$feats subset-feats --n=1 ark:- ark:- |" ark,t:$dir/feat_dim || exit 1;
 98 | feat_dim=`cat $dir/feat_dim | awk '{print $NF}'`
 99 | echo "$0: network inputs have the dimension of $feat_dim"
100 | 
101 | if [ $stage -le 2 ]; then
102 |   $cmd JOB=1:$nj $dir/log/build_pfile.$name.JOB.log \
103 |     build-pfile-from-ali --every-nth-frame=$every_nth_frame $alidir/final.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" \
104 |       "$feats" "|$PPATH/pfile_create -i - -o $dir/$name.pfile.JOB -f $feat_dim -l 1" || exit 1;
105 |   # Concatenate the pfiles into one
106 |   all_pfiles=""
107 |   for n in `seq 1 $nj`; do
108 |     all_pfiles="$all_pfiles $dir/$name.pfile.$n"
109 |   done
110 |   if $do_concat; then
111 |     $cmd $dir/log/pfile_cat.log \
112 |       $PPATH/pfile_concat -q $all_pfiles -o $dir/concat.pfile || exit 1;
113 |     rm -rf $dir/$name.pfile.*
114 |   else
115 |     $cmd JOB=1:$nj $dir/log/gzip.$name.JOB.log gzip $dir/$name.pfile.JOB || exit 1;
116 |   fi
117 | fi
118 | 
119 | if [ $stage -le 3 ] && $do_split; then
120 |   echo "Split data into training and cross-validation"
121 |   mkdir -p $dir/concat
122 |   # Chop the whole pfile into small units
123 |   $cmd $dir/log/pfile_burst.log \
124 |     perl steps_pdnn/pfile_burst.pl -i $dir/concat.pfile -o $dir/concat -s $pfile_unit_size || exit 1;
125 | fi
126 | 
127 | if [ $stage -le 4 ] && $do_split; then
128 |   # Split the units accoring to cv_ratio
129 |   $cmd $dir/log/pfile_rconcat.log \
130 |     perl steps_pdnn/pfile_rconcat.pl -t "$dir" -o $dir/valid.pfile,${cv_ratio} -o $dir/train.pfile $dir/concat/*.pfile || exit 1;
131 |   rm -r $dir/concat
132 |   echo "## Info of the training pfile: ##"
133 |   $PPATH/pfile_info $dir/train.pfile
134 |   echo "## Info of the cross-validation pfile: ##"
135 |   $PPATH/pfile_info $dir/valid.pfile
136 | fi
137 | 
138 | echo "$0: done creating pfiles."
139 | 
140 | exit 0;
141 | 


--------------------------------------------------------------------------------
/steps_pdnn/build_nnet_pfile.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2013     Yajie Miao     Carnegie Mellon University
  3 | # Apache 2.0
  4 | 
  5 | # Create pfiles for deep neural network training. We assume that the training  alignment
  6 | # is ready and features (either fbanks and fMLLRs) have been generated.
  7 | # Refer to the following comments for configurations.
  8 | 
  9 | ## Begin configuration section.  
 10 | stage=1
 11 | every_nth_frame=1 # for subsampling.
 12 | nj=4
 13 | cmd=run.pl
 14 | 
 15 | splice_opts="--left-context=4 --right-context=4" # frame-splicing options for neural net input
 16 | add_deltas=false
 17 | norm_vars=false  # when doing cmvn, whether to normalize variance
 18 | 
 19 | do_concat=true # whether to concatenate the individual pfiles into a single one
 20 | 
 21 | # Config for splitting pfile into training and valid set; not used for SWBD
 22 | do_split=false  # whether to do pfile splitting
 23 | pfile_unit_size=40 # the number of utterances of each small unit into which the whole pfile is chopped 
 24 | cv_ratio=0.05 # the ratio of CV data
 25 | 
 26 | shuffle_scp=false  # whether the feature scp is shuffled
 27 | 
 28 | ## End configuration options.
 29 | 
 30 | echo "$0 $@"  # Print the command line for logging
 31 | 
 32 | [ -f path.sh ] && . ./path.sh # source the path.
 33 | . parse_options.sh || exit 1;
 34 | 
 35 | if [ $# != 3 ]; then
 36 |    echo "Wrong #arguments ($#, expected 4)"
 37 |    echo "usage: steps/build_nnet_pfile.sh <data-dir> <ali-dir> <exp-dir>"
 38 |    echo "e.g.:  steps/build_nnet_pfile.sh data/train exp/tri4_ali exp/tri4_pfile"
 39 |    echo "main options (for others, see top of script file)"
 40 |    echo "  --stage <stage>                                  # starts from which stage"
 41 |    echo "  --nj <nj>                                        # number of parallel jobs"
 42 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
 43 |    exit 1;
 44 | fi
 45 | 
 46 | data=$1
 47 | alidir=$2
 48 | dir=$3
 49 | 
 50 | name=`basename $data`
 51 | nj=`cat $alidir/num_jobs` || exit 1;
 52 | sdata=$data/split$nj
 53 | 
 54 | if ! which pfile_create >/dev/null; then # pfile_create not on our path.
 55 |   [ -z "$KALDI_ROOT" ] && KALDI_ROOT=`pwd`/../../.. # normal case.
 56 |   try_path=$KALDI_ROOT/tools/pfile_utils-v0_51/bin/
 57 |   if [ -f $try_path/pfile_create ]; then
 58 |     PPATH=$try_path
 59 |   else
 60 |     echo "You do not have pfile_create (part of pfile-utils) on your path,"
 61 |     echo "and it is not accessible in the normal place e.g. $try_path/pfile_create"
 62 |     echo "Try going to KALDI_ROOT/tools/ and running ./install_pfile_utils.sh"
 63 |     exit 1
 64 |   fi
 65 | else
 66 |   PPATH=$(dirname `which pfile_create`)
 67 | fi
 68 | export PATH=$PATH:$PPATH
 69 | 
 70 | mkdir -p $dir/log
 71 | echo $splice_opts > $dir/splice_opts
 72 | echo $norm_vars > $dir/norm_vars
 73 | echo $add_deltas > $dir/add_deltas
 74 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
 75 | 
 76 | ## Setup features
 77 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})"
 78 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
 79 | if $shuffle_scp; then 
 80 |   feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp \"scp:cat $sdata/JOB/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} |\" ark:- | splice-feats $splice_opts ark:- ark:- |"
 81 | fi
 82 | # Add first and second-order deltas if needed
 83 | $add_deltas && feats="$feats add-deltas ark:- ark:- |"
 84 | 
 85 | ## Get the dimension of the features
 86 | $cmd JOB=1:1 $dir/log/get_feat_dim.log \
 87 |     feat-to-dim "$feats subset-feats --n=1 ark:- ark:- |" ark,t:$dir/feat_dim || exit 1;
 88 | feat_dim=`cat $dir/feat_dim | awk '{print $NF}'`
 89 | echo "$0: network inputs have the dimension of $feat_dim"
 90 | 
 91 | if [ $stage -le 2 ]; then
 92 |   $cmd JOB=1:$nj $dir/log/build_pfile.$name.JOB.log \
 93 |     build-pfile-from-ali --every-nth-frame=$every_nth_frame $alidir/final.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" \
 94 |       "$feats" "|$PPATH/pfile_create -i - -o $dir/$name.pfile.JOB -f $feat_dim -l 1" || exit 1;
 95 |   # Concatenate the pfiles into one
 96 |   all_pfiles=""
 97 |   for n in `seq 1 $nj`; do
 98 |     all_pfiles="$all_pfiles $dir/$name.pfile.$n"
 99 |   done
100 |   if $do_concat; then
101 |     $cmd $dir/log/pfile_cat.log \
102 |       $PPATH/pfile_concat -q $all_pfiles -o $dir/concat.pfile || exit 1;
103 |     rm -rf $dir/$name.pfile.*
104 |   else
105 |     $cmd JOB=1:$nj $dir/log/gzip.$name.JOB.log gzip $dir/$name.pfile.JOB || exit 1;
106 |   fi
107 | fi
108 | 
109 | if [ $stage -le 3 ] && $do_split; then
110 |   echo "Split data into training and cross-validation"
111 |   mkdir -p $dir/concat
112 |   # Chop the whole pfile into small units
113 |   $cmd $dir/log/pfile_burst.log \
114 |     perl steps_pdnn/pfile_burst.pl -i $dir/concat.pfile -o $dir/concat -s $pfile_unit_size || exit 1;
115 | fi
116 | 
117 | if [ $stage -le 4 ] && $do_split; then
118 |   # Split the units accoring to cv_ratio
119 |   $cmd $dir/log/pfile_rconcat.log \
120 |     perl steps_pdnn/pfile_rconcat.pl -t "$dir" -o $dir/valid.pfile,${cv_ratio} -o $dir/train.pfile $dir/concat/*.pfile || exit 1;
121 |   rm -r $dir/concat
122 |   echo "## Info of the training pfile: ##"
123 |   $PPATH/pfile_info $dir/train.pfile
124 |   echo "## Info of the cross-validation pfile: ##"
125 |   $PPATH/pfile_info $dir/valid.pfile
126 | fi
127 | 
128 | echo "$0: done creating pfiles."
129 | 
130 | exit 0;
131 | 


--------------------------------------------------------------------------------
/steps_pdnn/decode_avdnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2013    Yajie Miao    Carnegie Mellon University
  4 | #           2015    Fei Tao
  5 | # Apache 2.0
  6 | 
  7 | # Decode the DNN model. The [srcdir] in this script should be the same as dir in
  8 | # build_nnet_pfile.sh. Also, the DNN model has been trained and put in srcdir.
  9 | # All these steps will be done automatically if you run the recipe file run-dnn.sh
 10 | 
 11 | ## Begin configuration section
 12 | stage=0
 13 | nj=16
 14 | cmd=run.pl
 15 | num_threads=1
 16 | 
 17 | max_active=7000 # max-active
 18 | beam=15.0 # beam used
 19 | latbeam=7.0 # beam used in getting lattices
 20 | acwt=0.1 # acoustic weight used in getting lattices
 21 | max_arcs=-1
 22 | 
 23 | skip_scoring=false # whether to skip WER scoring
 24 | scoring_opts=
 25 | 
 26 | splice_opts=
 27 | norm_vars=
 28 | add_deltas=
 29 | 
 30 | ## End configuration section
 31 | 
 32 | echo "$0 $@"  # Print the command line for logging
 33 | 
 34 | [ -f ./path.sh ] && . ./path.sh; # source the path.
 35 | . parse_options.sh || exit 1;
 36 | 
 37 | if [ $# != 5 ]; then
 38 |    echo "Wrong #arguments ($#, expected 4)"
 39 |    echo "Usage: steps/decode_dnn.sh [options] <graph-dir> <data-dir> <ali-dir> <decode-dir> <scr-dir>"
 40 |    echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp/tri4_dnn/decode KaldiScripts"
 41 |    echo "main options (for others, see top of script file)"
 42 |    echo "  --stage                                  # starts from which stage"
 43 |    echo "  --nj <nj>                                # number of parallel jobs"
 44 |    echo "  --cmd <cmd>                              # command to run in parallel with"
 45 |    echo "  --acwt <acoustic-weight>                 # default 0.1 ... used to get posteriors"
 46 |    echo "  --num-threads <n>                        # number of threads to use, default 4."
 47 |    echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
 48 |    echo "  --scoring-opts <opts>                    # options to local/score.sh"
 49 |    exit 1;
 50 | fi
 51 | 
 52 | graphdir=$1
 53 | data=$2
 54 | alidir=$3
 55 | dir=`echo $4 | sed 's:/$::g'` # remove any trailing slash.
 56 | scoredir=$5			#directory of scoring script
 57 | 
 58 | srcdir=`dirname $dir`; # assume model directory one level up from decoding directory.
 59 | sdata=$data/split$nj;
 60 | 
 61 | thread_string=
 62 | [ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
 63 | 
 64 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
 65 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null`
 66 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null`
 67 | 
 68 | mkdir -p $dir/log
 69 | split_data.sh $data $nj || exit 1;
 70 | echo $nj > $dir/num_jobs
 71 | 
 72 | # Some checks.  Note: we don't need $srcdir/tree but we expect
 73 | # it should exist, given the current structure of the scripts.
 74 | for f in $graphdir/HCLG.fst $data/feats.scp $alidir/tree; do
 75 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
 76 | done
 77 | 
 78 | # Generate state counts; will be used as prior
 79 | $cmd $dir/log/class_count.log \
 80 |   ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz |" ark:- \| \
 81 |     analyze-counts --binary=false ark:- $dir/class.counts || exit 1;
 82 | 
 83 | ## Set up the features
 84 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})"
 85 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
 86 | $add_deltas && feats="$feats add-deltas ark:- ark:- |"
 87 | midfeats="$feats nnet-forward --no-softmax=true $srcdir/dnn.nnet.bran ark:- ark:-|"
 88 | finalfeats="$midfeats nnet-forward --class-frame-counts=$dir/class.counts --apply-log=true --no-softmax=false $srcdir/dnn.nnet.main ark:- ark:- |"
 89 | #finalfeats="$feats nnet-forward --class-frame-counts=$dir/class.counts --apply-log=true --no-softmax=false $srcdir/dnn.nnet ark:- ark:- |"
 90 | 
 91 | $cmd JOB=1:$nj $dir/log/decode.JOB.log \
 92 |   latgen-faster-mapped --max-active=$max_active --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt $alidir/final.mdl $graphdir/HCLG.fst "$finalfeats" "ark:|gzip -c > $dir/lat.JOB.gz"
 93 | 
 94 | # Copy the source model in order for scoring
 95 | cp $alidir/final.mdl $srcdir
 96 |   
 97 | if ! $skip_scoring ; then
 98 |   [ ! -x $scoredir/iskld_score_kaldi.sh ] && \
 99 |     echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
100 |   $scoredir/iskld_score_kaldi.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
101 | fi
102 | 
103 | exit 0;
104 | 


--------------------------------------------------------------------------------
/steps_pdnn/decode_dnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2013    Yajie Miao    Carnegie Mellon University
  4 | # Apache 2.0
  5 | 
  6 | # Decode the DNN model. The [srcdir] in this script should be the same as dir in
  7 | # build_nnet_pfile.sh. Also, the DNN model has been trained and put in srcdir.
  8 | # All these steps will be done automatically if you run the recipe file run-dnn.sh
  9 | 
 10 | ## Begin configuration section
 11 | stage=0
 12 | nj=16
 13 | cmd=run.pl
 14 | num_threads=1
 15 | 
 16 | max_active=7000 # max-active
 17 | beam=15.0 # beam used
 18 | latbeam=7.0 # beam used in getting lattices
 19 | acwt=0.1 # acoustic weight used in getting lattices
 20 | max_arcs=-1
 21 | 
 22 | skip_scoring=false # whether to skip WER scoring
 23 | scoring_opts=
 24 | 
 25 | splice_opts=
 26 | norm_vars=
 27 | add_deltas=
 28 | 
 29 | ## End configuration section
 30 | 
 31 | echo "$0 $@"  # Print the command line for logging
 32 | 
 33 | [ -f ./path.sh ] && . ./path.sh; # source the path.
 34 | . parse_options.sh || exit 1;
 35 | 
 36 | if [ $# != 4 ]; then
 37 |    echo "Wrong #arguments ($#, expected 4)"
 38 |    echo "Usage: steps/decode_dnn.sh [options] <graph-dir> <data-dir> <ali-dir> <decode-dir>"
 39 |    echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp/tri4_dnn/decode"
 40 |    echo "main options (for others, see top of script file)"
 41 |    echo "  --stage                                  # starts from which stage"
 42 |    echo "  --nj <nj>                                # number of parallel jobs"
 43 |    echo "  --cmd <cmd>                              # command to run in parallel with"
 44 |    echo "  --acwt <acoustic-weight>                 # default 0.1 ... used to get posteriors"
 45 |    echo "  --num-threads <n>                        # number of threads to use, default 4."
 46 |    echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
 47 |    echo "  --scoring-opts <opts>                    # options to local/score.sh"
 48 |    exit 1;
 49 | fi
 50 | 
 51 | graphdir=$1
 52 | data=$2
 53 | alidir=$3
 54 | dir=`echo $4 | sed 's:/$::g'` # remove any trailing slash.
 55 | 
 56 | srcdir=`dirname $dir`; # assume model directory one level up from decoding directory.
 57 | sdata=$data/split$nj;
 58 | 
 59 | thread_string=
 60 | [ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
 61 | 
 62 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
 63 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null`
 64 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null`
 65 | 
 66 | mkdir -p $dir/log
 67 | split_data.sh $data $nj || exit 1;
 68 | echo $nj > $dir/num_jobs
 69 | 
 70 | # Some checks.  Note: we don't need $srcdir/tree but we expect
 71 | # it should exist, given the current structure of the scripts.
 72 | for f in $graphdir/HCLG.fst $data/feats.scp $alidir/tree; do
 73 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
 74 | done
 75 | 
 76 | # Generate state counts; will be used as prior
 77 | $cmd $dir/log/class_count.log \
 78 |   ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz |" ark:- \| \
 79 |     analyze-counts --binary=false ark:- $dir/class.counts || exit 1;
 80 | 
 81 | ## Set up the features
 82 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})"
 83 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
 84 | $add_deltas && feats="$feats add-deltas ark:- ark:- |"
 85 | ##
 86 | finalfeats="$feats nnet-forward --class-frame-counts=$dir/class.counts --apply-log=true --no-softmax=false $srcdir/dnn.nnet ark:- ark:- |"
 87 | 
 88 | $cmd JOB=1:$nj $dir/log/decode.JOB.log \
 89 |   latgen-faster-mapped --max-active=$max_active --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt $alidir/final.mdl $graphdir/HCLG.fst "$finalfeats" "ark:|gzip -c > $dir/lat.JOB.gz"
 90 | 
 91 | # Copy the source model in order for scoring
 92 | cp $alidir/final.mdl $srcdir
 93 |   
 94 | if ! $skip_scoring ; then
 95 |   [ ! -x local/score.sh ] && \
 96 |     echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
 97 |   local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
 98 | fi
 99 | 
100 | exit 0;
101 | 


--------------------------------------------------------------------------------
/steps_pdnn/make_bnf_feat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2013    Yajie Miao    Carnegie Mellon University
 3 | # Copyright 2014    Xiaohui Zhang    Johns Hopkins University
 4 | # Apache 2.0
 5 | 
 6 | # Make BNF front-end with the trained neural network
 7 | 
 8 | # Begin configuration section.  
 9 | stage=1
10 | nj=8
11 | cmd=run.pl
12 | 
13 | splice_opts=
14 | norm_vars=
15 | add_deltas=
16 | # End configuration options.
17 | 
18 | echo "$0 $@"  # Print the command line for logging
19 | 
20 | [ -f path.sh ] && . ./path.sh # source the path.
21 | . parse_options.sh || exit 1;
22 | 
23 | if [ $# != 5 ]; then
24 |    echo "usage: steps_pdnn/make_bnf_feat.sh <data-dir> <srcdata-dir> <nnet-dir> <log-dir> <feat-dir>"
25 |    echo "e.g.:  steps_pdnn/make_bnf_feat.sh data_bnf/train data/train exp/bnf_net exp/make_bnf/log exp/bnf"
26 |    echo "main options (for others, see top of script file)"
27 |    echo "  --config <config-file>                           # config containing options"
28 |    echo "  --nj <nj>                                        # number of parallel jobs"
29 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
30 |    exit 1;
31 | fi
32 | 
33 | data=$1
34 | srcdata=$2
35 | netdir=$3
36 | logdir=$4
37 | feadir=$5
38 | 
39 | sdata=$srcdata/split$nj;
40 | name=`basename $data`
41 | [ -z "$splice_opts" ] && splice_opts=`cat $netdir/splice_opts 2>/dev/null` # frame-splicing options.
42 | [ -z "$add_deltas" ] && add_deltas=`cat $netdir/add_deltas 2>/dev/null`
43 | [ -z "$norm_vars" ] && norm_vars=`cat $netdir/norm_vars 2>/dev/null`
44 | 
45 | mkdir -p $data $logdir $feadir
46 | [[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1;
47 | 
48 | for f in $netdir/bnf.nnet; do
49 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
50 | done
51 | 
52 | ## Set up the features
53 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})"
54 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
55 | ##
56 | 
57 | # prepare the dir
58 | cp $srcdata/* $data 2>/dev/null; rm $data/{feats,cmvn}.scp;
59 | 
60 | # get the absolute pathname
61 | feadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $feadir ${PWD}`
62 | 
63 | echo "$0: making BNF scp and ark."
64 | $cmd JOB=1:$nj $logdir/make_bnf_$name.JOB.log \
65 |   nnet-forward --apply-log=false $netdir/bnf.nnet "$feats" \
66 |   ark,scp:$feadir/feats_bnf_$name.JOB.ark,$feadir/feats_bnf_$name.JOB.scp || exit 1;
67 |   
68 | 
69 | N0=$(cat $srcdata/feats.scp | wc -l) 
70 | N1=$(cat $feadir/feats_bnf_$name.*.scp | wc -l)
71 | if [[ "$N0" != "$N1" ]]; then
72 |   echo "$0: error happens when generating BNF for $name (Original:$N0  BNF:$N1)"
73 |   exit 1;
74 | fi
75 | 
76 | # Concatenate feats.scp into bnf_data
77 | for n in `seq 1 $nj`; do
78 |   cat $feadir/feats_bnf_$name.$n.scp >> $data/feats.scp
79 | done
80 | 
81 | echo "$0: done making BNF"
82 | 
83 | exit 0;
84 | 


--------------------------------------------------------------------------------
/steps_pdnn/make_conv_feat.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2014     Yajie Miao     Carnegie Mellon University
  3 | # Apache 2.0
  4 | 
  5 | # Generate activation from convolution layers in CNNs and save the activation into
  6 | # Kaldi format.
  7 | 
  8 | ## Begin configuration section.  
  9 | stage=1
 10 | nj=4
 11 | cmd=run.pl
 12 | 
 13 | splice_opts=
 14 | norm_vars=
 15 | add_deltas=
 16 | 
 17 | layer_index=1
 18 | 
 19 | ## End configuration options.
 20 | 
 21 | echo "$0 $@"  # Print the command line for logging
 22 | 
 23 | [ -f path.sh ] && . ./path.sh # source the path.
 24 | . parse_options.sh || exit 1;
 25 | 
 26 | if [ $# != 7 ]; then
 27 |    echo "Wrong #arguments ($#, expected 7)"
 28 |    echo "usage: steps_pdnn/make_conv_feat.sh <data-dir> <srcdata-dir> <net-dir> "
 29 |    echo "<cnn-param-file> <cnn-cfg-file> <log-dir> <feat-dir>"
 30 |    echo "e.g.:  steps_pdnn/make_conv_feat.sh data_conv/train data/train exp/cnn "
 31 |    echo "exp/cnn/nnet.param exp/cnn/nnet.cfg exp/cnn/_log exp/cnn/_conv"
 32 |    echo "main options (for others, see top of script file)"
 33 |    echo "  --stage <stage>                                  # starts from which stage"
 34 |    echo "  --nj <nj>                                        # number of parallel jobs"
 35 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
 36 |    exit 1;
 37 | fi
 38 | 
 39 | data=$1
 40 | srcdata=$2
 41 | netdir=$3
 42 | cnnparam=$4
 43 | cnncfg=$5
 44 | logdir=$6
 45 | feadir=$7
 46 | 
 47 | # get the absolute pathname
 48 | feadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $feadir ${PWD}`
 49 | 
 50 | name=`basename $data`
 51 | sdata=$srcdata/split$nj
 52 | [ -z "$splice_opts" ] && splice_opts=`cat $netdir/splice_opts 2>/dev/null` # frame-splicing options.
 53 | [ -z "$add_deltas" ] && add_deltas=`cat $netdir/add_deltas 2>/dev/null`
 54 | [ -z "$norm_vars" ] && norm_vars=`cat $netdir/norm_vars 2>/dev/null`
 55 | 
 56 | mkdir -p $data $logdir $feadir
 57 | [[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1;
 58 | 
 59 | for f in $cnnparam $cnncfg; do
 60 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
 61 | done
 62 | 
 63 | # prepare the dir
 64 | cp $srcdata/* $data 2>/dev/null; rm $data/{feats,cmvn}.scp;
 65 | 
 66 | ## First dump the network input into local files
 67 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})"
 68 | if $add_deltas; then
 69 |   $cmd JOB=1:$nj $logdir/nnet_input.$name.JOB.log \
 70 |     apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk \
 71 |       scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- \| \
 72 |       splice-feats $splice_opts ark:- ark:- \|  \
 73 |       add-deltas ark:- ark,scp:$feadir/nnet_input.$name.JOB.ark,$feadir/nnet_input.$name.JOB.scp || exit 1;
 74 | 
 75 | else
 76 |   $cmd JOB=1:$nj $logdir/nnet_input.$name.JOB.log \
 77 |     apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk \
 78 |       scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- \| \
 79 |       splice-feats $splice_opts ark:- ark,scp:$feadir/nnet_input.$name.JOB.ark,$feadir/nnet_input.$name.JOB.scp || exit 1;
 80 | fi
 81 |  
 82 | # Generate conv-layer activation by calling PDNN
 83 | $cmd JOB=1:$nj $logdir/conv_feat.$name.JOB.log \
 84 |     export PYTHONPATH=$PYTHONPATH:`pwd`/pdnn/ \; \
 85 |     export THEANO_FLAGS=mode=FAST_RUN,device=cpu,floatX=float32 \; \
 86 |     python pdnn/cmds2/run_FeatExt_Kaldi.py --in-scp-file $feadir/nnet_input.$name.JOB.scp --out-ark-file $feadir/conv_feats.$name.JOB.ark  --nnet-param $cnnparam --nnet-cfg $cnncfg --layer-index $layer_index
 87 |    
 88 | rm $feadir/nnet_input.*
 89 | 
 90 | # Generate the final scp and ark files
 91 | $cmd JOB=1:$nj $logdir/copy_feat.$name.JOB.log \
 92 |     copy-feats ark:$feadir/conv_feats.$name.JOB.ark ark,scp:$feadir/feats.$name.JOB.ark,$feadir/feats.$name.JOB.scp
 93 | rm $feadir/conv_feats.*
 94 | 
 95 | N0=$(cat $srcdata/feats.scp | wc -l)
 96 | N1=$(cat $feadir/feats.$name.*.scp | wc -l)
 97 | if [[ "$N0" != "$N1" ]]; then
 98 |   echo "$0: error happens when generating features for $name (Original:$N0  New:$N1)"
 99 |   exit 1;
100 | fi
101 | 
102 | # Concatenate feats.scp into bnf_data
103 | for n in `seq 1 $nj`; do
104 |   cat $feadir/feats.$name.$n.scp >> $data/feats.scp
105 | done
106 | 
107 | echo "$0: done making features"
108 | 
109 | exit 0;
110 | 
111 | 


--------------------------------------------------------------------------------
/steps_pdnn/make_denlats_nnet.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2012-2013 Karel Vesely, Daniel Povey
  3 | # Apache 2.0.
  4 | 
  5 | # Create denominator lattices for MMI/MPE/sMBR training.
  6 | # Creates its output in $dir/lat.*.ark,$dir/lat.scp
  7 | # The lattices are uncompressed, we need random access for DNN training.
  8 | 
  9 | # Begin configuration section.
 10 | nj=4
 11 | cmd=run.pl
 12 | sub_split=1
 13 | beam=13.0
 14 | lattice_beam=7.0
 15 | acwt=0.1
 16 | max_active=5000
 17 | nnet=
 18 | max_mem=20000000 # This will stop the processes getting too large.
 19 | # This is in bytes, but not "real" bytes-- you have to multiply
 20 | # by something like 5 or 10 to get real bytes (not sure why so large)
 21 | # End configuration section.
 22 | use_gpu=no # yes|no|optional
 23 | parallel_opts="-pe smp 2"
 24 | 
 25 | splice_opts=
 26 | norm_vars=
 27 | add_deltas=
 28 | 
 29 | echo "$0 $@"  # Print the command line for logging
 30 | 
 31 | [ -f ./path.sh ] && . ./path.sh; # source the path.
 32 | . parse_options.sh || exit 1;
 33 | 
 34 | if [ $# != 4 ]; then
 35 |    echo "Usage: steps/$0 [options] <data-dir> <lang-dir> <src-dir> <exp-dir>"
 36 |    echo "  e.g.: steps/$0 data/train data/lang exp/tri1 exp/tri1_denlats"
 37 |    echo "Works for plain features (or CMN, delta), forwarded through feature-transform."
 38 |    echo ""
 39 |    echo "Main options (for others, see top of script file)"
 40 |    echo "  --config <config-file>                           # config containing options"
 41 |    echo "  --nj <nj>                                        # number of parallel jobs"
 42 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
 43 |    echo "  --sub-split <n-split>                            # e.g. 40; use this for "
 44 |    echo "                           # large databases so your jobs will be smaller and"
 45 |    echo "                           # will (individually) finish reasonably soon."
 46 |    exit 1;
 47 | fi
 48 | 
 49 | data=$1
 50 | lang=$2
 51 | srcdir=$3
 52 | dir=$4
 53 | 
 54 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
 55 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null`
 56 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null`
 57 | 
 58 | sdata=$data/split$nj
 59 | mkdir -p $dir/log
 60 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
 61 | echo $nj > $dir/num_jobs
 62 | 
 63 | oov=`cat $lang/oov.int` || exit 1;
 64 | 
 65 | mkdir -p $dir
 66 | 
 67 | cp -r $lang $dir/
 68 | 
 69 | # Compute grammar FST which corresponds to unigram decoding graph.
 70 | new_lang="$dir/"$(basename "$lang")
 71 | echo "Making unigram grammar FST in $new_lang"
 72 | cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
 73 |   awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
 74 |   utils/make_unigram_grammar.pl | fstcompile > $new_lang/G.fst \
 75 |    || exit 1;
 76 | 
 77 | # mkgraph.sh expects a whole directory "lang", so put everything in one directory...
 78 | # it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
 79 | # final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph.
 80 | 
 81 | echo "Compiling decoding graph in $dir/dengraph"
 82 | if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then
 83 |    echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
 84 | else
 85 |   utils/mkgraph.sh $new_lang $srcdir $dir/dengraph || exit 1;
 86 | fi
 87 | 
 88 | 
 89 | cp $srcdir/{tree,final.mdl} $dir
 90 | 
 91 | # Select default locations to model files
 92 | [ -z "$nnet" ] && nnet=$srcdir/final.nnet;
 93 | class_frame_counts=$srcdir/train_class.counts
 94 | model=$dir/final.mdl
 95 | 
 96 | # Check that files exist
 97 | for f in $sdata/1/feats.scp $nnet $model $class_frame_counts; do
 98 |   [ ! -f $f ] && echo "$0: missing file $f" && exit 1;
 99 | done
100 | 
101 | # PREPARE FEATURE EXTRACTION PIPELINE
102 | # Create the feature stream:
103 | ## Set up the features
104 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})"
105 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
106 | $add_deltas && feats="$feats add-deltas ark:- ark:- |"
107 | feats="$feats nnet-forward --no-softmax=true --class-frame-counts=$class_frame_counts $nnet ark:- ark:- |"
108 | 
109 | 
110 | echo "$0: generating denlats from data '$data', putting lattices in '$dir'"
111 | # Generate the lattices
112 | $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
113 |   latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
114 |     --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
115 |     $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
116 | echo "$0: done generating denominator lattices."
117 | 


--------------------------------------------------------------------------------
/steps_pdnn/sat/build_nnet_pfile_ivec.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2014     Yajie Miao     Carnegie Mellon University
  3 | # Apache 2.0
  4 | 
  5 | # Create pfiles for deep neural network training with i-vectors appended to each frame.
  6 | # We assume that the training alignment is ready and features (either fbanks and fMLLRs)
  7 | # have been generated.
  8 | # Refer to the following comments for configurations.
  9 | 
 10 | ## Begin configuration section.  
 11 | stage=1
 12 | every_nth_frame=1 # for subsampling.
 13 | nj=4
 14 | cmd=run.pl
 15 | 
 16 | splice_opts="--left-context=4 --right-context=4" # frame-splicing options for neural net input
 17 | add_deltas=false
 18 | norm_vars=false  # when doing cmvn, whether to normalize variance
 19 | 
 20 | do_concat=true # whether to concatenate the individual pfiles into a single one
 21 | 
 22 | # Config for splitting pfile into training and valid set; not used for SWBD
 23 | pfile_unit_size=40 # the number of utterances of each small unit into which the whole pfile is chopped 
 24 | do_split=false
 25 | cv_ratio=0.05 # the ratio of CV data
 26 | 
 27 | shuffle_scp=false  # whether the feature scp is shuffled
 28 | 
 29 | # Config for ivector
 30 | ivec_type=speaker     # the type of appended features: speaker, utterance, frame
 31 | 
 32 | ## End configuration options.
 33 | 
 34 | echo "$0 $@"  # Print the command line for logging
 35 | 
 36 | [ -f path.sh ] && . ./path.sh # source the path.
 37 | . parse_options.sh || exit 1;
 38 | 
 39 | if [ $# != 4 ]; then
 40 |    echo "Wrong #arguments ($#, expected 4)"
 41 |    echo "usage: steps/build_nnet_pfile.sh <data-dir> <ali-dir> <iv-dir> <exp-dir>"
 42 |    echo "e.g.:  steps/build_nnet_pfile.sh data/train exp/tri4_ali exp_ivec/ivector_swbd1 exp/tri4_pfile"
 43 |    echo "main options (for others, see top of script file)"
 44 |    echo "  --stage <stage>                                  # starts from which stage"
 45 |    echo "  --nj <nj>                                        # number of parallel jobs"
 46 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
 47 |    exit 1;
 48 | fi
 49 | 
 50 | data=$1
 51 | alidir=$2
 52 | ivdir=$3
 53 | dir=$4
 54 | 
 55 | name=`basename $data`
 56 | nj=`cat $alidir/num_jobs` || exit 1;
 57 | sdata=$data/split$nj
 58 | 
 59 | # Check whether ivectors have been generated successfully. 
 60 | [ ! -f $ivdir/ivector.scp ] && echo "$0: no such file $ivdir/ivector.scp" && exit 1;
 61 | 
 62 | if ! which pfile_create >/dev/null; then # pfile_create not on our path.
 63 |   [ -z "$KALDI_ROOT" ] && KALDI_ROOT=`pwd`/../../.. # normal case.
 64 |   try_path=$KALDI_ROOT/tools/pfile_utils-v0_51/bin/
 65 |   if [ -f $try_path/pfile_create ]; then
 66 |     PPATH=$try_path
 67 |   else
 68 |     echo "You do not have pfile_create (part of pfile-utils) on your path,"
 69 |     echo "and it is not accessible in the normal place e.g. $try_path/pfile_create"
 70 |     echo "Try going to KALDI_ROOT/tools/ and running ./install_pfile_utils.sh"
 71 |     exit 1
 72 |   fi
 73 | else
 74 |   PPATH=$(dirname `which pfile_create`)
 75 | fi
 76 | export PATH=$PATH:$PPATH
 77 | 
 78 | mkdir -p $dir/log
 79 | #echo $splice_opts > $dir/splice_opts
 80 | #echo $norm_vars > $dir/norm_vars
 81 | #echo $add_deltas > $dir/add_deltas
 82 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
 83 | 
 84 | ## Setup features
 85 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})"
 86 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
 87 | if $shuffle_scp; then
 88 |   feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp \"scp:cat $sdata/JOB/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} |\" ark:- | splice-feats $splice_opts ark:- ark:- |"
 89 | fi
 90 | # Add first and second-order deltas if needed
 91 | $add_deltas && feats="$feats add-deltas ark:- ark:- |"
 92 | 
 93 | # Setup the additional ivector features
 94 | case $ivec_type in
 95 |   speaker) ivfeats="ark,s,cs:get-spkvec-feat --utt2spk=ark:$sdata/JOB/utt2spk scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";;
 96 |   utterance) ivfeats="ark,s,cs:get-spkvec-feat scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";;
 97 |   frame)  ivfeats="scp:$ivdir/ivector.scp";;
 98 |   *) echo "$0: invalid ivector type $ivec_type" && exit 1;
 99 | esac
100 | # On each frame, append the i-vector to the original feature vector
101 | if [ $stage -le 2 ]; then
102 |   $cmd JOB=1:$nj $dir/log/append_feat_$name.JOB.log \
103 |     append-feats "$feats" "$ivfeats" ark,scp:$dir/append_feat.$name.JOB.ark,$dir/append_feat.$name.JOB.scp || exit 1;
104 | fi
105 | ##
106 | 
107 | ## Get the dimension of the features
108 | $cmd JOB=1:1 $dir/log/get_feat_dim.log \
109 |   feat-to-dim "ark,s,cs:copy-feats scp:$dir/append_feat.$name.1.scp ark:- | subset-feats --n=1 ark:- ark:- |" ark,t:$dir/feat_dim || exit 1;
110 | feat_dim=`cat $dir/feat_dim | awk '{print $NF}'`
111 | echo "$0: network inputs have the dimension of $feat_dim"
112 | 
113 | if [ $stage -le 3 ]; then
114 |   $cmd JOB=1:$nj $dir/log/build_pfile.JOB.log \
115 |     build-pfile-from-ali --every-nth-frame=$every_nth_frame $alidir/final.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" \
116 |       scp:$dir/append_feat.$name.JOB.scp "|$PPATH/pfile_create -i - -o  $dir/$name.pfile.JOB -f $feat_dim -l 1" || exit 1;
117 |   # Concatenate the pfiles into one
118 |   all_pfiles=""
119 |   for n in `seq 1 $nj`; do
120 |     all_pfiles="$all_pfiles $dir/pfile.$n"
121 |   done
122 |   if $do_concat; then
123 |     $cmd $dir/log/pfile_cat.log \
124 |       $PPATH/pfile_concat -q $all_pfiles -o $dir/concat.pfile || exit 1;
125 |     rm -rf $dir/$name.pfile.*
126 |   else
127 |     $cmd JOB=1:$nj $dir/log/gzip.$name.JOB.log gzip $dir/$name.pfile.JOB || exit 1;
128 |   fi
129 | fi
130 | 
131 | if [ $stage -le 4 ] && $do_split; then
132 |   echo "Split data into training and cross-validation"
133 |   mkdir -p $dir/concat
134 |   # Chop the whole pfile into small units
135 |   $cmd $dir/log/pfile_burst.log \
136 |     perl steps_pdnn/pfile_burst.pl -i $dir/concat.pfile -o $dir/concat -s $pfile_unit_size || exit 1;
137 | fi
138 | 
139 | if [ $stage -le 5 ] && $do_split; then
140 |   # Split the units accoring to cv_ratio
141 |   $cmd $dir/log/pfile_rconcat.log \
142 |     perl steps_pdnn/pfile_rconcat.pl -t "$dir" -o $dir/valid.pfile,${cv_ratio} -o $dir/train.pfile $dir/concat/*.pfile || exit 1;
143 |   rm -r $dir/concat
144 |   echo "## Info of the training pfile: ##"
145 |   $PPATH/pfile_info $dir/train.pfile
146 |   echo "## Info of the cross-validation pfile: ##"
147 |   $PPATH/pfile_info $dir/valid.pfile
148 | fi
149 | 
150 | echo "$0: done creating pfiles."
151 | 
152 | exit 0;
153 | 


--------------------------------------------------------------------------------
/steps_pdnn/sat/decode_dnn_concat.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014    Yajie Miao    Carnegie Mellon University
  4 | # Apache 2.0
  5 | 
  6 | # Decode the DNN model. The features are a concatenation of the original input
  7 | # features and additional features. These additional features can be on the 
  8 | # speaker, utterance or frame level.
  9 | 
 10 | ## Begin configuration section
 11 | stage=0
 12 | nj=16
 13 | cmd=run.pl
 14 | num_threads=1
 15 | 
 16 | max_active=7000 # max-active
 17 | beam=15.0 # beam used
 18 | latbeam=7.0 # beam used in getting lattices
 19 | acwt=0.1 # acoustic weight used in getting lattices
 20 | max_arcs=-1
 21 | 
 22 | skip_scoring=false # whether to skip WER scoring
 23 | scoring_opts=
 24 | 
 25 | splice_opts=
 26 | norm_vars=
 27 | add_deltas=
 28 | ivec_type=speaker         # the type of the i-vectors: speaker, utterance, frame
 29 | 
 30 | ## End configuration section
 31 | 
 32 | echo "$0 $@"  # Print the command line for logging
 33 | 
 34 | [ -f ./path.sh ] && . ./path.sh; # source the path.
 35 | . parse_options.sh || exit 1;
 36 | 
 37 | if [ $# != 5 ]; then
 38 |    echo "Wrong #arguments ($#, expected 5)"
 39 |    echo "Usage: steps/decode_dnn.sh [options] <graph-dir> <data-dir> <ali-dir> <iv-dir> <decode-dir>"
 40 |    echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp_ivec/ivector_eval2000 exp/tri4_dnn/decode"
 41 |    echo "main options (for others, see top of script file)"
 42 |    echo "  --stage                                  # starts from which stage"
 43 |    echo "  --nj <nj>                                # number of parallel jobs"
 44 |    echo "  --cmd <cmd>                              # command to run in parallel with"
 45 |    echo "  --acwt <acoustic-weight>                 # default 0.1 ... used to get posteriors"
 46 |    echo "  --num-threads <n>                        # number of threads to use, default 4."
 47 |    echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
 48 |    echo "  --scoring-opts <opts>                    # options to local/score.sh"
 49 |    exit 1;
 50 | fi
 51 | 
 52 | graphdir=$1
 53 | data=$2
 54 | alidir=$3
 55 | ivdir=$4
 56 | dir=`echo $5 | sed 's:/$::g'` # remove any trailing slash.
 57 | 
 58 | srcdir=`dirname $dir`; # assume model directory one level up from decoding directory.
 59 | name=`basename $data`
 60 | sdata=$data/split$nj;
 61 | 
 62 | thread_string=
 63 | [ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
 64 | 
 65 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
 66 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null`
 67 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null`
 68 | 
 69 | mkdir -p $dir/log
 70 | split_data.sh $data $nj || exit 1;
 71 | echo $nj > $dir/num_jobs
 72 | 
 73 | # Some checks.  Note: we don't need $srcdir/tree but we expect
 74 | # it should exist, given the current structure of the scripts.
 75 | for f in $graphdir/HCLG.fst $data/feats.scp $alidir/tree $ivdir/ivector.scp; do
 76 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
 77 | done
 78 | 
 79 | # Generate state counts; will be used as prior
 80 | $cmd $dir/log/class_count.log \
 81 |   ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz |" ark:- \| \
 82 |     analyze-counts --binary=false ark:- $dir/class.counts || exit 1;
 83 | 
 84 | ## Set up the features
 85 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars})"
 86 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
 87 | 
 88 | # Setup the additional ivector features
 89 | case $ivec_type in
 90 |   speaker) ivfeats="ark,s,cs:get-spkvec-feat --utt2spk=ark:$sdata/JOB/utt2spk scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";;
 91 |   utterance) ivfeats="ark,s,cs:get-spkvec-feat scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";;
 92 |   frame)  ivfeats="scp:$ivdir/ivector.scp";;
 93 |   *) echo "$0: invalid ivector type $ivec_type" && exit 1;
 94 | esac
 95 | 
 96 | # On each frame, append the i-vector to the original feature vector
 97 | $cmd JOB=1:$nj $dir/log/append_feat_$name.JOB.log \
 98 |   append-feats "$feats" "$ivfeats" ark,scp:$dir/append_feat.$name.JOB.ark,$dir/append_feat.$name.JOB.scp || exit 1;
 99 | ##
100 | 
101 | finalfeats="ark:nnet-forward --class-frame-counts=$dir/class.counts --apply-log=true --no-softmax=false $srcdir/dnn.nnet scp:$dir/append_feat.$name.JOB.scp ark:- |"
102 | 
103 | $cmd JOB=1:$nj $dir/log/decode.JOB.log \
104 |   latgen-faster-mapped --max-active=$max_active --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt $alidir/final.mdl $graphdir/HCLG.fst "$finalfeats" "ark:|gzip -c > $dir/lat.JOB.gz"
105 | 
106 | rm -rf $dir/append_feat.$name.*
107 | 
108 | # Copy the source model in order for scoring
109 | cp $alidir/final.mdl $srcdir
110 |   
111 | if ! $skip_scoring ; then
112 |   [ ! -x local/score.sh ] && \
113 |     echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
114 |   local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
115 | fi
116 | 
117 | exit 0;
118 | 


--------------------------------------------------------------------------------
/steps_pdnn/sat/decode_dnn_ivec.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014    Yajie Miao    Carnegie Mellon University
  4 | # Apache 2.0
  5 | 
  6 | # Decode the SAT-DNN model. You should already have the canonical DNN model
  7 | # and the iVecNN network in srcdir.
  8 | 
  9 | ## Begin configuration section
 10 | stage=0
 11 | nj=16
 12 | cmd=run.pl
 13 | num_threads=1
 14 | 
 15 | max_active=7000 # max-active
 16 | beam=15.0 # beam used
 17 | latbeam=7.0 # beam used in getting lattices
 18 | acwt=0.1 # acoustic weight used in getting lattices
 19 | max_arcs=-1
 20 | 
 21 | skip_scoring=false # whether to skip WER scoring
 22 | scoring_opts=
 23 | 
 24 | splice_opts=
 25 | norm_vars=
 26 | add_deltas=
 27 | ivec_type=speaker         # the type of the i-vectors: speaker, utterance, frame
 28 | 
 29 | ## End configuration section
 30 | 
 31 | echo "$0 $@"  # Print the command line for logging
 32 | 
 33 | [ -f ./path.sh ] && . ./path.sh; # source the path.
 34 | . parse_options.sh || exit 1;
 35 | 
 36 | if [ $# != 5 ]; then
 37 |    echo "Wrong #arguments ($#, expected 5)"
 38 |    echo "Usage: steps/decode_dnn.sh [options] <graph-dir> <data-dir> <ali-dir> <iv-dir> <decode-dir>"
 39 |    echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp_ivec/ivector_eval2000 exp/tri4_dnn/decode"
 40 |    echo "main options (for others, see top of script file)"
 41 |    echo "  --stage                                  # starts from which stage"
 42 |    echo "  --nj <nj>                                # number of parallel jobs"
 43 |    echo "  --cmd <cmd>                              # command to run in parallel with"
 44 |    echo "  --acwt <acoustic-weight>                 # default 0.1 ... used to get posteriors"
 45 |    echo "  --num-threads <n>                        # number of threads to use, default 4."
 46 |    echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
 47 |    echo "  --scoring-opts <opts>                    # options to local/score.sh"
 48 |    exit 1;
 49 | fi
 50 | 
 51 | graphdir=$1
 52 | data=$2
 53 | alidir=$3
 54 | ivdir=$4
 55 | dir=`echo $5 | sed 's:/$::g'` # remove any trailing slash.
 56 | 
 57 | srcdir=`dirname $dir`; # assume model directory one level up from decoding directory.
 58 | name=`basename $data`
 59 | sdata=$data/split$nj;
 60 | 
 61 | thread_string=
 62 | [ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
 63 | 
 64 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
 65 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null`
 66 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null`
 67 | 
 68 | mkdir -p $dir/log
 69 | split_data.sh $data $nj || exit 1;
 70 | echo $nj > $dir/num_jobs
 71 | 
 72 | # Some checks.  Note: we don't need $srcdir/tree but we expect
 73 | # it should exist, given the current structure of the scripts.
 74 | for f in $graphdir/HCLG.fst $data/feats.scp $alidir/tree $ivdir/ivector.scp; do
 75 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
 76 | done
 77 | 
 78 | # Generate state counts; will be used as prior
 79 | $cmd $dir/log/class_count.log \
 80 |   ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz |" ark:- \| \
 81 |     analyze-counts --binary=false ark:- $dir/class.counts || exit 1;
 82 | 
 83 | ## Set up the features
 84 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars})"
 85 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
 86 | 
 87 | # Setup the additional ivector features
 88 | case $ivec_type in
 89 |   speaker) ivfeats="ark,s,cs:get-spkvec-feat --utt2spk=ark:$sdata/JOB/utt2spk scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";;
 90 |   utterance) ivfeats="ark,s,cs:get-spkvec-feat scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";;
 91 |   frame)  ivfeats="ark:copy-feats scp:$ivdir/ivector.scp ark:- |";;
 92 |   *) echo "$0: invalid ivector type $ivec_type" && exit 1;
 93 | esac
 94 | ivfeats="$ivfeats nnet-forward $srcdir/dnn.nnet.adapt ark:- ark:- |"
 95 | 
 96 | # Add the linear feature shifts to the original DNN inputs
 97 | $cmd JOB=1:$nj $dir/log/add_feat_$name.JOB.log \
 98 |      add-feats "$feats" "$ivfeats" ark,scp:$dir/add_feat.$name.JOB.ark,$dir/add_feat.$name.JOB.scp || exit 1;
 99 | 
100 | # Use the add-feats-wgt if you want to try the weighted-sum feature fusion
101 | #featbin/add-feats-wgt --feat1wgt=ark:$srcdir/feat.wgt.ark --feat2wgt=ark:$srcdir/ivec.wgt.ark --biaswgt=ark:$srcdir/bias.wgt.ark "$feats" "$ivfeats" ark,scp:$dir/add_feat.$name.JOB.ark,$dir/add_feat.$name.JOB.scp || exit 1;
102 | 
103 | ##
104 | 
105 | finalfeats="ark:nnet-forward --class-frame-counts=$dir/class.counts --apply-log=true --no-softmax=false $srcdir/dnn.nnet.si scp:$dir/add_feat.$name.JOB.scp ark:- |"
106 | 
107 | $cmd JOB=1:$nj $dir/log/decode.JOB.log \
108 |   latgen-faster-mapped --max-active=$max_active --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt $alidir/final.mdl $graphdir/HCLG.fst "$finalfeats" "ark:|gzip -c > $dir/lat.JOB.gz"
109 | 
110 | # Copy the source model in order for scoring
111 | cp $alidir/final.mdl $srcdir
112 |   
113 | if ! $skip_scoring ; then
114 |   [ ! -x local/score.sh ] && \
115 |     echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
116 |   local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
117 | fi
118 | 
119 | exit 0;
120 | 


--------------------------------------------------------------------------------
/steps_pdnn/sat/make_bnf_feat_ivec.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2013    Yajie Miao    Carnegie Mellon University
 3 | # Copyright 2014    Xiaohui Zhang    Johns Hopkins University
 4 | # Apache 2.0
 5 | 
 6 | # Make BNF front-end with the SAT-trained neural network
 7 | 
 8 | # Begin configuration section.  
 9 | stage=1
10 | nj=8
11 | cmd=run.pl
12 | 
13 | norm_vars=false  # when doing cmvn, whether to normalize variance; has to be consistent with build_nnet_pfile.sh
14 | 
15 | # Config for ivector
16 | is_spk_mode=false  # whether the i-vectors are per-speaker
17 | 
18 | # End configuration options.
19 | 
20 | echo "$0 $@"  # Print the command line for logging
21 | 
22 | [ -f path.sh ] && . ./path.sh # source the path.
23 | . parse_options.sh || exit 1;
24 | 
25 | if [ $# != 6 ]; then
26 |    echo "usage: steps_pdnn/make_bnf_feat.sh <data-dir> <srcdata-dir> <nnet-dir> <iv-dir> <log-dir> <feat-dir>"
27 |    echo "e.g.:  steps_pdnn/make_bnf_feat.sh data_bnf/train data/train exp/bnf_net exp/make_bnf/log exp/bnf"
28 |    echo "main options (for others, see top of script file)"
29 |    echo "  --config <config-file>                           # config containing options"
30 |    echo "  --nj <nj>                                        # number of parallel jobs"
31 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
32 |    exit 1;
33 | fi
34 | 
35 | data=$1
36 | srcdata=$2
37 | netdir=$3
38 | ivdir=$4
39 | logdir=$5
40 | feadir=$6
41 | 
42 | sdata=$srcdata/split$nj;
43 | splice_opts=`cat $netdir/splice_opts 2>/dev/null` # frame-splicing options.
44 | name=`basename $data`
45 | 
46 | mkdir -p $data $logdir $feadir
47 | [[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1;
48 | 
49 | for f in $netdir/bnf.nnet; do
50 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
51 | done
52 | 
53 | ## Set up the features
54 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars})"
55 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
56 | 
57 | if $is_spk_mode; then
58 |   ivfeats="ark,s,cs:get-spkvec-feat --utt2spk=ark:$sdata/JOB/utt2spk scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- | nnet-forward $netdir/ivec.nnet ark:- ark:- |"
59 | else
60 |   ivfeats="ark,s,cs:get-spkvec-feat scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- | nnet-forward $netdir/ivec.nnet ark:- ark:- |"
61 | fi
62 | 
63 | $cmd JOB=1:$nj $logdir/add_feat_$name.JOB.log \
64 |     add-feats "$feats" "$ivfeats" ark,scp:$feadir/add_feat.$name.JOB.ark,$feadir/add_feat.$name.JOB.scp || exit 1;
65 | ##
66 | 
67 | # prepare the dir
68 | cp $srcdata/* $data 2>/dev/null; rm $data/{feats,cmvn}.scp;
69 | 
70 | # get the absolute pathname
71 | feadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $feadir ${PWD}`
72 | 
73 | echo "$0: making BNF scp and ark."
74 | $cmd JOB=1:$nj $logdir/make_bnf_$name.JOB.log \
75 |   nnet-forward --apply-log=false $netdir/bnf.nnet scp:$feadir/append_feat.$name.JOB.scp \
76 |   ark,scp:$feadir/feats_bnf_$name.JOB.ark,$feadir/feats_bnf_$name.JOB.scp || exit 1;
77 |   
78 | 
79 | N0=$(cat $srcdata/feats.scp | wc -l) 
80 | N1=$(cat $feadir/feats_bnf_$name.*.scp | wc -l)
81 | if [[ "$N0" != "$N1" ]]; then
82 |   echo "$0: error happens when generating BNF for $name (Original:$N0  BNF:$N1)"
83 |   exit 1;
84 | fi
85 | 
86 | # Concatenate feats.scp into bnf_data
87 | for n in `seq 1 $nj`; do
88 |   cat $feadir/feats_bnf_$name.$n.scp >> $data/feats.scp
89 | done
90 | 
91 | echo "$0: done making BNF"
92 | 
93 | exit 0;
94 | 


--------------------------------------------------------------------------------
/steps_pdnn/sat/make_feat_with_ivec.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014    Yajie Miao    Carnegie Mellon University
  4 | # Apache 2.0
  5 | 
  6 | # Decode the SAT-DNN model. You should already have the canonical DNN model
  7 | # and the iVecNN network in srcdir.
  8 | 
  9 | ## Begin configuration section
 10 | stage=0
 11 | nj=16
 12 | cmd=run.pl
 13 | num_threads=1
 14 | 
 15 | splice_opts=
 16 | norm_vars=
 17 | add_deltas=
 18 | ivec_type=speaker         # the type of the i-vectors: speaker, utterance, frame
 19 | 
 20 | ## End configuration section
 21 | 
 22 | echo "$0 $@"  # Print the command line for logging
 23 | 
 24 | [ -f ./path.sh ] && . ./path.sh; # source the path.
 25 | . parse_options.sh || exit 1;
 26 | 
 27 | if [ $# != 6 ]; then
 28 |    echo "Wrong #arguments ($#, expected 5)"
 29 |    echo "Usage: steps/decode_dnn.sh [options] <graph-dir> <data-dir> <ali-dir> <iv-dir> <decode-dir>"
 30 |    echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp_ivec/ivector_eval2000 exp/tri4_dnn/decode"
 31 |    echo "main options (for others, see top of script file)"
 32 |    echo "  --stage                                  # starts from which stage"
 33 |    echo "  --nj <nj>                                # number of parallel jobs"
 34 |    echo "  --cmd <cmd>                              # command to run in parallel with"
 35 |    echo "  --acwt <acoustic-weight>                 # default 0.1 ... used to get posteriors"
 36 |    echo "  --num-threads <n>                        # number of threads to use, default 4."
 37 |    echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
 38 |    echo "  --scoring-opts <opts>                    # options to local/score.sh"
 39 |    exit 1;
 40 | fi
 41 | 
 42 | data=$1
 43 | srcdata=$2
 44 | netdir=$3
 45 | ivdir=$4
 46 | logdir=$5
 47 | feadir=$6
 48 | 
 49 | name=`basename $data`
 50 | sdata=$srcdata/split$nj;
 51 | [ -z "$splice_opts" ] && splice_opts=`cat $netdir/splice_opts 2>/dev/null` # frame-splicing options.
 52 | [ -z "$add_deltas" ] && add_deltas=`cat $netdir/add_deltas 2>/dev/null`
 53 | [ -z "$norm_vars" ] && norm_vars=`cat $netdir/norm_vars 2>/dev/null`
 54 | 
 55 | mkdir -p $data $logdir $feadir
 56 | [[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1;
 57 | 
 58 | for f in $netdir/dnn.nnet.adapt; do
 59 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
 60 | done
 61 | 
 62 | ## Set up the features
 63 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})"
 64 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
 65 | $add_deltas && feats="$feats add-deltas ark:- ark:- |"
 66 | 
 67 | # Setup the additional ivector features
 68 | case $ivec_type in
 69 |   speaker) ivfeats="ark,s,cs:get-spkvec-feat --utt2spk=ark:$sdata/JOB/utt2spk scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";;
 70 |   utterance) ivfeats="ark,s,cs:get-spkvec-feat scp:$ivdir/ivector.scp scp:$sdata/JOB/feats.scp ark:- |";;
 71 |   frame)  ivfeats="ark:copy-feats $ivdir/ivector.scp ark:- |";;
 72 |   *) echo "$0: invalid ivector type $ivec_type" && exit 1;
 73 | esac
 74 | ivfeats="$ivfeats nnet-forward $netdir/dnn.nnet.adapt ark:- ark:- |"
 75 | 
 76 | # prepare the dir
 77 | cp $srcdata/* $data 2>/dev/null; rm $data/{feats,cmvn}.scp;
 78 | 
 79 | # get the absolute pathname
 80 | feadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $feadir ${PWD}`
 81 | 
 82 | # Add the linear feature shifts to the original DNN inputs
 83 | echo "$0: making feature + [ivector shift] scp and ark."
 84 | $cmd JOB=1:$nj $logdir/add_feat_$name.JOB.log \
 85 |      add-feats "$feats" "$ivfeats" ark,scp:$feadir/add_feat.$name.JOB.ark,$feadir/add_feat.$name.JOB.scp || exit 1;
 86 | 
 87 | N0=$(cat $srcdata/feats.scp | wc -l)
 88 | N1=$(cat $feadir/add_feat.$name.*.scp | wc -l)
 89 | if [[ "$N0" != "$N1" ]]; then
 90 |   echo "$0: error happens when generating feature + [ivector shift] for $name (Original:$N0  Now:$N1)"
 91 |   exit 1;
 92 | fi
 93 | 
 94 | # Concatenate feats.scp into bnf_data
 95 | for n in `seq 1 $nj`; do
 96 |   cat $feadir/add_feat.$name.$n.scp >> $data/feats.scp
 97 | done
 98 | 
 99 | echo "$0: done making feature + [ivector shift]"
100 | 
101 | exit 0;
102 | 


--------------------------------------------------------------------------------
/steps_pdnn/tmp/decode_dnn_lhuc.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2013    Yajie Miao    Carnegie Mellon University
  4 | # Apache 2.0
  5 | 
  6 | # Decode the DNN model. The [srcdir] in this script should be the same as dir in
  7 | # build_nnet_pfile.sh. Also, the DNN model has been trained and put in srcdir.
  8 | # All these steps will be done automatically if you run the recipe file run-dnn.sh
  9 | 
 10 | ## Begin configuration section
 11 | stage=0
 12 | nj=16
 13 | cmd=run.pl
 14 | num_threads=1
 15 | 
 16 | max_active=7000 # max-active
 17 | beam=15.0 # beam used
 18 | latbeam=7.0 # beam used in getting lattices
 19 | acwt=0.1 # acoustic weight used in getting lattices
 20 | max_arcs=-1
 21 | 
 22 | skip_scoring=false # whether to skip WER scoring
 23 | scoring_opts=
 24 | 
 25 | splice_opts=
 26 | norm_vars=
 27 | add_deltas=
 28 | 
 29 | ## End configuration section
 30 | 
 31 | echo "$0 $@"  # Print the command line for logging
 32 | 
 33 | [ -f ./path.sh ] && . ./path.sh; # source the path.
 34 | . parse_options.sh || exit 1;
 35 | 
 36 | if [ $# != 4 ]; then
 37 |    echo "Wrong #arguments ($#, expected 4)"
 38 |    echo "Usage: steps/decode_dnn.sh [options] <graph-dir> <data-dir> <ali-dir> <decode-dir>"
 39 |    echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp/tri4_dnn/decode"
 40 |    echo "main options (for others, see top of script file)"
 41 |    echo "  --stage                                  # starts from which stage"
 42 |    echo "  --nj <nj>                                # number of parallel jobs"
 43 |    echo "  --cmd <cmd>                              # command to run in parallel with"
 44 |    echo "  --acwt <acoustic-weight>                 # default 0.1 ... used to get posteriors"
 45 |    echo "  --num-threads <n>                        # number of threads to use, default 4."
 46 |    echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
 47 |    echo "  --scoring-opts <opts>                    # options to local/score.sh"
 48 |    exit 1;
 49 | fi
 50 | 
 51 | graphdir=$1
 52 | data=$2
 53 | alidir=$3
 54 | dir=`echo $4 | sed 's:/$::g'` # remove any trailing slash.
 55 | 
 56 | srcdir=`dirname $dir`; # assume model directory one level up from decoding directory.
 57 | sdata=$data/split$nj;
 58 | 
 59 | thread_string=
 60 | [ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
 61 | 
 62 | [ -z "$splice_opts" ] && splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
 63 | [ -z "$add_deltas" ] && add_deltas=`cat $srcdir/add_deltas 2>/dev/null`
 64 | [ -z "$norm_vars" ] && norm_vars=`cat $srcdir/norm_vars 2>/dev/null`
 65 | 
 66 | mkdir -p $dir/log
 67 | split_data.sh $data $nj || exit 1;
 68 | echo $nj > $dir/num_jobs
 69 | 
 70 | # Some checks.  Note: we don't need $srcdir/tree but we expect
 71 | # it should exist, given the current structure of the scripts.
 72 | for f in $graphdir/HCLG.fst $data/feats.scp $alidir/tree; do
 73 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
 74 | done
 75 | 
 76 | # Generate state counts; will be used as prior
 77 | $cmd $dir/log/class_count.log \
 78 |   ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz |" ark:- \| \
 79 |     analyze-counts --binary=false ark:- $dir/class.counts || exit 1;
 80 | 
 81 | ## Set up the features
 82 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})"
 83 | feats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
 84 | $add_deltas && feats="$feats add-deltas ark:- ark:- |"
 85 | ##
 86 | finalfeats="$feats nnet-forward --class-frame-counts=$dir/class.counts --apply-log=true --no-softmax=false $dir/dnn.nnet.JOB ark:- ark:- |"
 87 | 
 88 | $cmd JOB=1:$nj $dir/log/decode.JOB.log \
 89 |   latgen-faster-mapped --max-active=$max_active --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt $alidir/final.mdl $graphdir/HCLG.fst "$finalfeats" "ark:|gzip -c > $dir/lat.JOB.gz"
 90 | 
 91 | # Copy the source model in order for scoring
 92 | cp $alidir/final.mdl $srcdir
 93 |   
 94 | if ! $skip_scoring ; then
 95 |   [ ! -x local/score.sh ] && \
 96 |     echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
 97 |   local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir
 98 | fi
 99 | 
100 | exit 0;
101 | 


--------------------------------------------------------------------------------
/steps_pdnn/tmp/make_nnet_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2014     Yajie Miao     Carnegie Mellon University
 3 | # Apache 2.0
 4 | 
 5 | # Generate DNN input features and also create txt-formatted alignment files
 6 | # It's used when PDNN directly reads Kaldi .ark and .ali files for training.
 7 | 
 8 | ## Begin configuration section.  
 9 | stage=1
10 | nj=4
11 | cmd=run.pl
12 | 
13 | splice_opts="--left-context=4 --right-context=4" # frame-splicing options for neural net input
14 | add_deltas=false
15 | norm_vars=false  # when doing cmvn, whether to normalize variance
16 | 
17 | ## End configuration options.
18 | 
19 | echo "$0 $@"  # Print the command line for logging
20 | 
21 | [ -f path.sh ] && . ./path.sh # source the path.
22 | . parse_options.sh || exit 1;
23 | 
24 | if [ $# != 5 ]; then
25 |    echo "Wrong #arguments ($#, expected 5)"
26 |    echo "usage: make_nnet_data.sh <data-dir> <srcdata-dir> <feat-dir> <log-dir> <dir>"
27 |    echo "e.g.:  make_nnet_data.sh data/nnet_input data/train exp/feat exp/_log exp/"
28 |    echo "main options (for others, see top of script file)"
29 |    echo "  --stage <stage>                                  # starts from which stage"
30 |    echo "  --nj <nj>                                        # number of parallel jobs"
31 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
32 |    exit 1;
33 | fi
34 | 
35 | data=$1
36 | srcdata=$2
37 | feadir=$3
38 | alidir=$4
39 | dir=$5
40 | 
41 | # get the absolute pathname
42 | feadir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $feadir ${PWD}`
43 | 
44 | name=`basename $data`
45 | sdata=$srcdata/split$nj
46 | echo $splice_opts > $dir/splice_opts; echo $add_deltas > $dir/add_deltas; echo $norm_vars > $dir/norm_vars 
47 | 
48 | mkdir -p $dir/log $data $feadir
49 | [[ -d $sdata && $srcdata/feats.scp -ot $sdata ]] || split_data.sh $srcdata $nj || exit 1;
50 | 
51 | # prepare the dir
52 | cp $srcdata/* $data 2>/dev/null; rm $data/{feats,cmvn}.scp;
53 | 
54 | ## First dump the network input into local files
55 | echo "$0: feature: splice(${splice_opts}) norm_vars(${norm_vars}) add_deltas(${add_deltas})"
56 | if $add_deltas; then
57 |   $cmd JOB=1:$nj $dir/log/nnet_input.$name.JOB.log \
58 |     apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk \
59 |       scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- \| \
60 |       splice-feats $splice_opts ark:- ark:- \|  \
61 |       add-deltas ark:- ark,scp:$feadir/nnet_input.$name.JOB.ark,$feadir/nnet_input.$name.JOB.scp || exit 1;
62 | 
63 | else
64 |   $cmd JOB=1:$nj $dir/log/nnet_input.$name.JOB.log \
65 |     apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk \
66 |       scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- \| \
67 |       splice-feats $splice_opts ark:- ark,scp:$feadir/nnet_input.$name.JOB.ark,$feadir/nnet_input.$name.JOB.scp || exit 1;
68 | fi
69 | 
70 | #$cmd JOB=1:$nj $dir/log/gzip.$name.JOB.log \
71 | #  gzip $feadir/nnet_input.$name.JOB.ark || exit 1;
72 |  
73 | N0=$(cat $srcdata/feats.scp | wc -l)
74 | N1=$(cat $feadir/nnet_input.$name.*.scp | wc -l)
75 | if [[ "$N0" != "$N1" ]]; then
76 |   echo "$0: error happens when generating features for $name (Original:$N0  New:$N1)"
77 |   exit 1;
78 | fi
79 | 
80 | # Concatenate feats.scp into bnf_data
81 | for n in `seq 1 $nj`; do
82 |   cat $feadir/nnet_input.$name.$n.scp >> $data/feats.scp
83 | done
84 | 
85 | $cmd $dir/log/ali2post.$name.log \
86 |   ali-to-pdf $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz|" "ark,t:|gzip -c >$dir/$name.ali.gz" || exit 1;
87 | 
88 | echo "$0: done making features"
89 | 
90 | exit 0;
91 | 


--------------------------------------------------------------------------------