├── log.log
├── proto
    ├── PASE.proto
    ├── channelAvg.proto
    ├── adam.proto
    ├── logMelFb.proto
    ├── sgd.proto
    ├── GRU_cudnn.proto
    ├── LSTM_cudnn.proto
    ├── RNN_cudnn.cfg
    ├── QLSTM.proto
    ├── RNN_cudnn.proto
    ├── rmsprop.proto
    ├── MLP.proto
    ├── decoding.proto
    ├── GRU.proto
    ├── RNN.proto
    ├── LSTM.proto
    ├── liGRU.proto
    ├── model.proto
    ├── CNN.proto
    ├── minimalGRU.proto
    ├── SincNet.proto
    ├── SRU.proto
    ├── global_chunk.proto
    ├── global_chunk_refac01.proto
    ├── global.proto
    └── global_refac01.proto
├── kaldi_decoding_scripts
    ├── conf
    │   ├── mfcc.conf
    │   ├── fbank.conf
    │   ├── test_spk.list
    │   ├── dev_spk.list
    │   └── phones.60-48-39.map
    ├── utils
    │   ├── filt.py
    │   ├── fix_ctm.sh
    │   ├── spk2utt_to_utt2spk.pl
    │   ├── s2eps.pl
    │   ├── eps2disambig.pl
    │   ├── build_const_arpa_lm.sh
    │   ├── summarize_warnings.pl
    │   ├── utt2spk_to_spk2utt.pl
    │   ├── shuffle_list.pl
    │   ├── analyze_segments.pl
    │   ├── show_lattice.sh
    │   ├── best_wer.sh
    │   ├── remove_oovs.pl
    │   ├── add_disambig.pl
    │   ├── remove_data_links.sh
    │   ├── nnet
    │   │   ├── gen_hamm_mat.py
    │   │   ├── gen_splice.py
    │   │   ├── gen_dct_mat.py
    │   │   ├── make_lstm_proto.py
    │   │   └── make_blstm_proto.py
    │   ├── ln.pl
    │   ├── make_unigram_grammar.pl
    │   ├── int2sym.pl
    │   ├── reduce_data_dir_by_reclist.sh
    │   ├── reduce_data_dir.sh
    │   ├── scoring
    │   │   └── wer_report.pl
    │   ├── create_split_dir.pl
    │   ├── find_arpa_oovs.pl
    │   ├── prepare_online_nnet_dist_build.sh
    │   ├── convert_slf_parallel.sh
    │   ├── combine_data.sh
    │   ├── apply_map.pl
    │   ├── format_lm.sh
    │   ├── filter_scp.pl
    │   ├── gen_topo.pl
    │   ├── subset_scp.pl
    │   ├── convert_ctm.pl
    │   ├── rnnlm_compute_scores.sh
    │   ├── perturb_data_dir_speed.sh
    │   ├── summarize_logs.pl
    │   ├── sym2int.pl
    │   ├── copy_data_dir.sh
    │   ├── parse_options.sh
    │   ├── reverse_lm.sh
    │   ├── subset_data_dir_tr_cv.sh
    │   ├── map_arpa_lm.pl
    │   ├── pinyin_map.pl
    │   ├── filter_scps.pl
    │   ├── create_data_link.pl
    │   ├── reverse_lm_test.sh
    │   └── format_lm_sri.sh
    ├── path.sh
    ├── local
    │   ├── nnet
    │   │   ├── run_autoencoder.sh
    │   │   └── run_dnn.sh
    │   ├── score_basic.sh
    │   ├── timit_format_data.sh
    │   ├── score_sclite.sh
    │   ├── score.sh
    │   ├── score_phrich.sh
    │   ├── timit_prepare_dict.sh
    │   ├── timit_norm_trans.pl
    │   └── score_wsj.sh
    ├── cmd.sh
    ├── decode_dnn.sh
    └── parse_options.sh
├── requirements.txt
├── env.sh
├── pytorch-kaldi_logo.png
├── .gitignore
├── check_res_dec.sh
├── RESULTS
├── plot_acc_and_loss.py
├── best_wer.sh
├── tune_hyperparameters.py
├── cfg
    ├── TIMIT_baselines
    │   ├── TIMIT_MLP_mfcc_basic.cfg
    │   ├── TIMIT_MLP_mfcc_basic_flex.cfg
    │   └── TIMIT_MLP_fbank_autoencoder.cfg
    └── Librispeech_baselines
    │   └── libri_MLP_fmllr.cfg
└── save_raw_fea.py


/log.log:
--------------------------------------------------------------------------------
1 | prov
2 | dopo
3 | prima
4 | 


--------------------------------------------------------------------------------
/proto/PASE.proto:
--------------------------------------------------------------------------------
1 | [proto]
2 | pase_cfg=str
3 | pase_model=path
4 | 
5 | 


--------------------------------------------------------------------------------
/proto/channelAvg.proto:
--------------------------------------------------------------------------------
1 | [proto]
2 | chAvg_channelWeights=str
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false   # only non-default option.
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib>=2.1.0
2 | scipy>=1.0.0
3 | numpy>=1.14.2
4 | blockdiag>=1.0
5 | 


--------------------------------------------------------------------------------
/env.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | export PYTORCH_KALDI_DIR=`pwd`
4 | export PYTORCH_EXP=`pwd`/exp


--------------------------------------------------------------------------------
/pytorch-kaldi_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mravanelli/pytorch-kaldi/HEAD/pytorch-kaldi_logo.png


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/conf/fbank.conf:
--------------------------------------------------------------------------------
1 | --htk-compat=true
2 | --window-type=hamming
3 | --num-mel-bins=23
4 | 
5 | 


--------------------------------------------------------------------------------
/proto/adam.proto:
--------------------------------------------------------------------------------
1 | [proto]
2 | opt_betas=float_list(0,inf)
3 | opt_eps=float
4 | opt_weight_decay=float(0,inf)
5 | opt_amsgrad=bool
6 | 
7 | 


--------------------------------------------------------------------------------
/proto/logMelFb.proto:
--------------------------------------------------------------------------------
1 | [proto]
2 | logmelfb_nr_filt=int
3 | logmelfb_stft_window_size=int
4 | logmelfb_stft_window_shift=int
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/proto/sgd.proto:
--------------------------------------------------------------------------------
1 | [proto]
2 | opt_momentum=float(0,inf)
3 | opt_weight_decay=float(0,inf)
4 | opt_dampening=float(0,inf)
5 | opt_nesterov=bool


--------------------------------------------------------------------------------
/proto/GRU_cudnn.proto:
--------------------------------------------------------------------------------
1 | [proto]
2 | hidden_size=int
3 | num_layers=int
4 | bias=bool
5 | batch_first=bool
6 | dropout=float(0,1)
7 | bidirectional=bool


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore temporary python files / dirs
2 | *.pyc
3 | __pycache__
4 | 
5 | # Ignore user-generated exp directories
6 | exp
7 | .DS_Store
8 | 


--------------------------------------------------------------------------------
/proto/LSTM_cudnn.proto:
--------------------------------------------------------------------------------
1 | [proto]
2 | hidden_size=int
3 | num_layers=int
4 | bias=bool
5 | batch_first=bool
6 | dropout=float(0,1)
7 | bidirectional=bool


--------------------------------------------------------------------------------
/proto/RNN_cudnn.cfg:
--------------------------------------------------------------------------------
1 | [proto]
2 | hidden_size=int
3 | num_layers=int
4 | nonlinearity=str
5 | bias=bool
6 | batch_first=bool
7 | dropout=float(0,1)
8 | bidirectional=bool


--------------------------------------------------------------------------------
/proto/QLSTM.proto:
--------------------------------------------------------------------------------
1 | [proto]
2 | lstm_lay=str_list
3 | lstm_drop=float_list(0.0,1.0)
4 | lstm_bidir=bool
5 | lstm_act=str_list
6 | quaternion_init=str
7 | autograd=bool
8 | 


--------------------------------------------------------------------------------
/proto/RNN_cudnn.proto:
--------------------------------------------------------------------------------
1 | [proto]
2 | hidden_size=int
3 | num_layers=int
4 | nonlinearity=str
5 | bias=bool
6 | batch_first=bool
7 | dropout=float(0,1)
8 | bidirectional=bool


--------------------------------------------------------------------------------
/proto/rmsprop.proto:
--------------------------------------------------------------------------------
1 | [proto]
2 | opt_momentum=float(0,inf)
3 | opt_alpha=float(0,inf)
4 | opt_eps=float
5 | opt_centered=bool
6 | opt_weight_decay=float(0,inf)
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/proto/MLP.proto:
--------------------------------------------------------------------------------
1 | [proto]
2 | dnn_lay=str_list
3 | dnn_drop=str_list
4 | dnn_use_laynorm_inp=bool
5 | dnn_use_batchnorm_inp=bool
6 | dnn_use_batchnorm=bool_list
7 | dnn_use_laynorm=bool_list
8 | dnn_act=str_list
9 | 


--------------------------------------------------------------------------------
/proto/decoding.proto:
--------------------------------------------------------------------------------
 1 | [proto]
 2 | min_active=int(0,inf)
 3 | max_active=int(0,inf)
 4 | max_mem=int(0,inf)
 5 | beam=float(0,inf)
 6 | latbeam=float(0,inf)
 7 | acwt=float(0,inf)
 8 | max_arcs=int(-inf,inf)
 9 | scoring_opts=str
10 | norm_vars=bool
11 | skip_scoring=bool
12 | 


--------------------------------------------------------------------------------
/proto/GRU.proto:
--------------------------------------------------------------------------------
 1 | [proto]
 2 | gru_lay=str_list
 3 | gru_drop=str_list
 4 | gru_use_laynorm_inp=bool
 5 | gru_use_batchnorm_inp=bool
 6 | gru_use_laynorm=bool_list
 7 | gru_use_batchnorm=bool_list
 8 | gru_bidir=bool
 9 | gru_act=str_list
10 | gru_orthinit=bool
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/proto/RNN.proto:
--------------------------------------------------------------------------------
 1 | [proto]
 2 | rnn_lay=str_list
 3 | rnn_drop=str_list
 4 | rnn_use_laynorm_inp=bool
 5 | rnn_use_batchnorm_inp=bool
 6 | rnn_use_laynorm=bool_list
 7 | rnn_use_batchnorm=bool_list
 8 | rnn_bidir=bool
 9 | rnn_act=str_list
10 | rnn_orthinit=bool
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/proto/LSTM.proto:
--------------------------------------------------------------------------------
 1 | [proto]
 2 | lstm_lay=str_list
 3 | lstm_drop=str_list
 4 | lstm_use_laynorm_inp=bool
 5 | lstm_use_batchnorm_inp=bool
 6 | lstm_use_laynorm=bool_list
 7 | lstm_use_batchnorm=bool_list
 8 | lstm_bidir=bool
 9 | lstm_act=str_list
10 | lstm_orthinit=bool
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/proto/liGRU.proto:
--------------------------------------------------------------------------------
 1 | [proto]
 2 | ligru_lay=str_list
 3 | ligru_drop=str_list
 4 | ligru_use_laynorm_inp=bool
 5 | ligru_use_batchnorm_inp=bool
 6 | ligru_use_laynorm=bool_list
 7 | ligru_use_batchnorm=bool_list
 8 | ligru_bidir=bool
 9 | ligru_act=str_list
10 | ligru_orthinit=bool
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/proto/model.proto:
--------------------------------------------------------------------------------
 1 | [proto]
 2 | compute(architecture,input)
 3 | concatenate(input,input)
 4 | cost_nll(input,label)
 5 | cost_err(input,label)
 6 | mult(input,input)
 7 | mult_constant(input,float)
 8 | sum(input,input)
 9 | sum_constant(input,float)
10 | avg(input,input)
11 | mse(input,input)
12 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/conf/test_spk.list:
--------------------------------------------------------------------------------
 1 | mdab0
 2 | mwbt0
 3 | felc0
 4 | mtas1
 5 | mwew0
 6 | fpas0
 7 | mjmp0
 8 | mlnt0
 9 | fpkt0
10 | mlll0
11 | mtls0
12 | fjlm0
13 | mbpm0
14 | mklt0
15 | fnlp0
16 | mcmj0
17 | mjdh0
18 | fmgd0
19 | mgrt0
20 | mnjm0
21 | fdhc0
22 | mjln0
23 | mpam0
24 | fmld0
25 | 


--------------------------------------------------------------------------------
/proto/CNN.proto:
--------------------------------------------------------------------------------
 1 | [proto]
 2 | cnn_N_filt=int_list(1,inf)
 3 | cnn_len_filt=int_list(1,inf)
 4 | cnn_max_pool_len=int_list(1,inf)
 5 | cnn_use_laynorm_inp=bool
 6 | cnn_use_batchnorm_inp=bool
 7 | cnn_use_laynorm=bool_list
 8 | cnn_use_batchnorm=bool_list
 9 | cnn_act=list_str
10 | cnn_drop=str_list
11 | 


--------------------------------------------------------------------------------
/check_res_dec.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for x in $1; do [ -d $x ] && echo $x | grep "${1:-.*}" >/dev/null && grep WER $x/wer_* 2>/dev/null | ./best_wer.sh; done
3 | for x in $1; do [ -d $x ] && echo $x | grep "${1:-.*}" >/dev/null && grep Sum $x/*score_*/*.sys 2>/dev/null | ./best_wer.sh; done
4 | exit 0
5 | 
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/proto/minimalGRU.proto:
--------------------------------------------------------------------------------
 1 | [proto]
 2 | minimalgru_lay=str_list
 3 | minimalgru_drop=float_list(0.0,1.0)
 4 | minimalgru_use_laynorm_inp=bool
 5 | minimalgru_use_batchnorm_inp=bool
 6 | minimalgru_use_laynorm=bool_list
 7 | minimalgru_use_batchnorm=bool_list
 8 | minimalgru_bidir=bool
 9 | minimalgru_act=str_list
10 | minimalgru_orthinit=bool
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/RESULTS:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for x in $(find $1 -type d -name "decode_*"); do [ -d $x ] && echo $x | grep "${1:-.*}" >/dev/null && grep WER $x/wer_* 2>/dev/null | ./best_wer.sh; done
3 | for x in $(find $1 -type d -name "decode_*"); do [ -d $x ] && echo $x | grep "${1:-.*}" >/dev/null && grep Sum $x/*score_*/*.sys 2>/dev/null | ./best_wer.sh; done
4 | exit 0
5 | 
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/proto/SincNet.proto:
--------------------------------------------------------------------------------
 1 | [proto]
 2 | sinc_N_filt=int_list(1,inf)
 3 | sinc_len_filt=int_list(1,inf)
 4 | sinc_max_pool_len=int_list(1,inf)
 5 | sinc_sample_rate=int
 6 | sinc_min_low_hz=int
 7 | sinc_min_band_hz=int
 8 | sinc_use_laynorm_inp=bool
 9 | sinc_use_batchnorm_inp=bool
10 | sinc_use_laynorm=bool_list
11 | sinc_use_batchnorm=bool_list
12 | sinc_act=list_str
13 | sinc_drop=str_list
14 | 


--------------------------------------------------------------------------------
/proto/SRU.proto:
--------------------------------------------------------------------------------
 1 | [proto]
 2 | sru_hidden_size=int
 3 | sru_num_layers=int
 4 | sru_dropout=float(0,1)
 5 | sru_rnn_dropout=float(0,1)
 6 | sru_use_tanh=bool
 7 | sru_use_relu=bool
 8 | sru_use_selu=bool
 9 | sru_weight_norm=bool
10 | sru_layer_norm=bool
11 | sru_bidirectional=bool
12 | sru_is_input_normalized=bool
13 | sru_has_skip_term=bool
14 | sru_rescale=bool
15 | sru_highway_bias=float(-inf,0)
16 | sru_n_proj=int
17 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/filt.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Apache 2.0
 4 | from __future__ import print_function
 5 | 
 6 | import sys
 7 | 
 8 | vocab = set()
 9 | with open(sys.argv[1]) as vocabfile:
10 |     for line in vocabfile:
11 |         vocab.add(line.strip())
12 | 
13 | with open(sys.argv[2]) as textfile:
14 |     for line in textfile:
15 |         print(" ".join(map(lambda word: word if word in vocab else "<UNK>", line.strip().split())))
16 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/path.sh:
--------------------------------------------------------------------------------
1 | #export KALDI_ROOT=~/kaldi-trunk/
2 | [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh 
3 | export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
4 | export LC_ALL=C
5 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/conf/dev_spk.list:
--------------------------------------------------------------------------------
 1 | faks0
 2 | fdac1
 3 | fjem0
 4 | mgwt0
 5 | mjar0
 6 | mmdb1
 7 | mmdm2
 8 | mpdf0
 9 | fcmh0
10 | fkms0
11 | mbdg0
12 | mbwm0
13 | mcsh0
14 | fadg0
15 | fdms0
16 | fedw0
17 | mgjf0
18 | mglb0
19 | mrtk0
20 | mtaa0
21 | mtdt0
22 | mthc0
23 | mwjg0
24 | fnmr0
25 | frew0
26 | fsem0
27 | mbns0
28 | mmjr0
29 | mdls0
30 | mdlf0
31 | mdvc0
32 | mers0
33 | fmah0
34 | fdrw0
35 | mrcs0
36 | mrjm4
37 | fcal1
38 | mmwh0
39 | fjsj0
40 | majc0
41 | mjsw0
42 | mreb0
43 | fgjd0
44 | fjmg0
45 | mroa0
46 | mteb0
47 | mjfc0
48 | mrjr0
49 | fmml0
50 | mrws1
51 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/conf/phones.60-48-39.map:
--------------------------------------------------------------------------------
 1 | aa	aa	aa
 2 | ae	ae	ae
 3 | ah	ah	ah
 4 | ao	ao	aa
 5 | aw	aw	aw
 6 | ax	ax	ah
 7 | ax-h	ax	ah
 8 | axr	er	er
 9 | ay	ay	ay
10 | b	b	b
11 | bcl	vcl	sil
12 | ch	ch	ch
13 | d	d	d
14 | dcl	vcl	sil
15 | dh	dh	dh
16 | dx	dx	dx
17 | eh	eh	eh
18 | el	el	l
19 | em	m	m
20 | en	en	n
21 | eng	ng	ng
22 | epi	epi	sil
23 | er	er	er
24 | ey	ey	ey
25 | f	f	f
26 | g	g	g
27 | gcl	vcl	sil
28 | h#	sil	sil
29 | hh	hh	hh
30 | hv	hh	hh
31 | ih	ih	ih
32 | ix	ix	ih
33 | iy	iy	iy
34 | jh	jh	jh
35 | k	k	k
36 | kcl	cl	sil
37 | l	l	l
38 | m	m	m
39 | n	n	n
40 | ng	ng	ng
41 | nx	n	n
42 | ow	ow	ow
43 | oy	oy	oy
44 | p	p	p
45 | pau	sil	sil
46 | pcl	cl	sil
47 | q
48 | r	r	r
49 | s	s	s
50 | sh	sh	sh
51 | t	t	t
52 | tcl	cl	sil
53 | th	th	th
54 | uh	uh	uh
55 | uw	uw	uw
56 | ux	uw	uw
57 | v	v	v
58 | w	w	w
59 | y	y	y
60 | z	z	z
61 | zh	zh	sh
62 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/local/nnet/run_autoencoder.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | . path.sh
 4 | . cmd.sh
 5 | 
 6 | # Train,
 7 | dir=exp/autoencoder
 8 | data_fmllr=data-fmllr-tri3
 9 | labels="ark:feat-to-post scp:$data_fmllr/train/feats.scp ark:- |"
10 | $cuda_cmd $dir/log/train_nnet.log \
11 |   steps/nnet/train.sh --hid-layers 2 --hid-dim 200 --learn-rate 0.00001 \
12 |     --labels "$labels" --num-tgt 40 --train-tool "nnet-train-frmshuff --objective-function=mse" \
13 |     --proto-opts "--no-softmax --activation-type=<Tanh> --hid-bias-mean=0.0 --hid-bias-range=1.0 --param-stddev-factor=0.01" \
14 |   $data_fmllr/train_tr90 $data_fmllr/train_cv10 dummy-dir dummy-dir dummy-dir $dir || exit 1;
15 | 
16 | # Forward the data,
17 | output_dir=data-autoencoded/test
18 | steps/nnet/make_bn_feats.sh --nj 1 --cmd "$train_cmd" --remove-last-components 0 \
19 |   $output_dir $data_fmllr/test $dir $output_dir/{log,data} || exit 1
20 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/fix_ctm.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | stmfile=$1
 4 | ctmfile=$2
 5 | 
 6 | segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u`
 7 | segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u`
 8 | 
 9 | segments_stm_count=`echo "$segments_stm" | wc -l `
10 | segments_ctm_count=`echo "$segments_ctm" | wc -l `
11 | 
12 | #echo $segments_stm_count
13 | #echo $segments_ctm_count
14 | 
15 | if [ "$segments_stm_count" -gt "$segments_ctm_count"  ] ; then
16 |   pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g")
17 |   (
18 |     for elem in $pp ; do
19 |       echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE"
20 |     done
21 |   ) >> $ctmfile
22 |   echo "FIXED CTM FILE"
23 |   exit 0
24 | elif [ "$segments_stm_count" -lt "$segments_ctm_count"  ] ; then
25 |   echo "Segment STM count: $segments_stm_count"
26 |   echo "Segment CTM count: $segments_ctm_count"
27 |   echo "FAILURE FIXING CTM FILE"
28 |   exit 1
29 | else
30 |   exit 0
31 | fi
32 | 
33 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/spk2utt_to_utt2spk.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | while(<>){ 
19 |     @A = split(" ", $_);
20 |     @A > 1 || die "Invalid line in spk2utt file: $_";
21 |     $s = shift @A;
22 |     foreach $u ( @A ) {
23 |         print "$u $s\n";
24 |     }
25 | }
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/proto/global_chunk.proto:
--------------------------------------------------------------------------------
 1 | [cfg_proto]
 2 | cfg_proto=path
 3 | cfg_proto_chunk=path
 4 | 
 5 | [exp]
 6 | cmd=str
 7 | run_nn_script=str
 8 | to_do={train,valid,forward}
 9 | seed=int(-inf,inf)
10 | use_cuda=bool
11 | multi_gpu=bool
12 | save_gpumem=bool
13 | out_info=str
14 | N_epochs_tr=int(1,inf)
15 | 
16 |  
17 | [data_chunk]
18 | fea=str
19 | lab=str
20 | 
21 | 
22 | [batches]
23 | batch_size_train=int(0,inf)
24 | max_seq_length_train=int(20,inf)
25 | batch_size_valid=int(1,inf)
26 | max_seq_length_valid=int(20,inf)
27 | 
28 | 
29 | [architecture]
30 | arch_name=str
31 | arch_proto=path
32 | arch_library=str
33 | arch_class=str
34 | arch_pretrain_file=str
35 | arch_freeze=bool
36 | arch_seq_model=bool
37 | arch_lr=float(0,inf)
38 | arch_halving_factor=float(0,inf)
39 | arch_improvement_threshold=float(0,inf)
40 | arch_opt={sgd,rmsprop,adam}
41 | 
42 | [model]
43 | model_proto=path
44 | model=str
45 | 
46 | 
47 | [forward]
48 | forward_out=str
49 | normalize_posteriors=bool_list
50 | normalize_with_counts_from=str
51 | save_out_file=bool_list
52 | require_decoding=bool_list
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/proto/global_chunk_refac01.proto:
--------------------------------------------------------------------------------
 1 | [cfg_proto]
 2 | cfg_proto=path
 3 | cfg_proto_chunk=path
 4 | 
 5 | [exp]
 6 | cmd=str
 7 | run_nn_script=str
 8 | to_do={train,valid,forward}
 9 | seed=int(-inf,inf)
10 | use_cuda=bool
11 | multi_gpu=bool
12 | save_gpumem=bool
13 | out_info=str
14 | N_epochs_tr=int(1,inf)
15 | 
16 |  
17 | [data_chunk]
18 | fea=str
19 | lab=str
20 | 
21 | 
22 | [batches]
23 | batch_size_train=int(0,inf)
24 | max_seq_length_train=list_str
25 | batch_size_valid=int(1,inf)
26 | max_seq_length_valid=list_str
27 | 
28 | 
29 | [architecture]
30 | arch_name=str
31 | arch_proto=path
32 | arch_library=str
33 | arch_class=str
34 | arch_pretrain_file=str
35 | arch_freeze=bool
36 | arch_seq_model=bool
37 | arch_lr=float(0,inf)
38 | arch_halving_factor=float(0,inf)
39 | arch_improvement_threshold=float(0,inf)
40 | arch_opt={sgd,rmsprop,adam,none}
41 | 
42 | [model]
43 | model_proto=path
44 | model=str
45 | 
46 | 
47 | [forward]
48 | forward_out=str
49 | normalize_posteriors=bool_list
50 | normalize_with_counts_from=str
51 | save_out_file=bool_list
52 | require_decoding=bool_list
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/s2eps.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script replaces <s> and </s> with <eps> (on both input and output sides),
18 | # for the G.fst acceptor.
19 | 
20 | while(<>){
21 |     @A = split(" ", $_);
22 |     if ( @A >= 4 ) {
23 |         if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
24 |         if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
25 |     }
26 |     print join("\t", @A) . "\n";
27 | }
28 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/eps2disambig.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | #                2015 Guoguo Chen
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # This script replaces epsilon with #0 on the input side only, of the G.fst
19 | # acceptor.  
20 | 
21 | while(<>){
22 |   if (/\s+#0\s+/) {
23 |     print STDERR "$0: ERROR: LM has word #0, " .
24 |                  "which is reserved as disambiguation symbol\n";
25 |     exit 1;
26 |   }
27 |   s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
28 |   print;
29 | }
30 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/build_const_arpa_lm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014  Guoguo Chen
 4 | # Apache 2.0
 5 | 
 6 | # This script reads in an Arpa format language model, and converts it into the
 7 | # ConstArpaLm format language model.
 8 | 
 9 | # begin configuration section
10 | # end configuration section
11 | 
12 | [ -f path.sh ] && . ./path.sh;
13 | 
14 | . utils/parse_options.sh
15 | 
16 | if [ $# != 3 ]; then
17 |   echo "Usage: "
18 |   echo "  $0 [options] <arpa-lm-path> <old-lang-dir> <new-lang-dir>"
19 |   echo "e.g.:"
20 |   echo "  $0 data/local/lm/3-gram.full.arpa.gz data/lang/ data/lang_test_tgmed"
21 |   echo "Options"
22 |   exit 1;
23 | fi
24 | 
25 | export LC_ALL=C
26 | 
27 | arpa_lm=$1
28 | old_lang=$2
29 | new_lang=$3
30 | 
31 | mkdir -p $new_lang
32 | 
33 | mkdir -p $new_lang
34 | cp -r $old_lang/* $new_lang
35 | 
36 | 
37 | unk=`cat $new_lang/oov.int`
38 | bos=`grep "<s>" $new_lang/words.txt | awk '{print $2}'`
39 | eos=`grep "</s>" $new_lang/words.txt | awk '{print $2}'`
40 | if [[ -z $bos || -z $eos ]]; then
41 |   echo "$0: <s> and </s> symbols are not in $new_lang/words.txt"
42 |   exit 1
43 | fi
44 | 
45 | 
46 | arpa-to-const-arpa --bos-symbol=$bos \
47 |   --eos-symbol=$eos --unk-symbol=$unk \
48 |   "gunzip -c $arpa_lm | utils/map_arpa_lm.pl $new_lang/words.txt|"  $new_lang/G.carpa  || exit 1;
49 | 
50 | exit 0;
51 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/summarize_warnings.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | 
 5 |  @ARGV != 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;
 6 | 
 7 | $dir = $ARGV[0];
 8 | 
 9 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" && exit 1;
10 | 
11 | $dir =~ s:/$::; # Remove trailing slash.
12 | 
13 | 
14 | # Group the files into categories where all have the same base-name.
15 | foreach $f (glob ("$dir/*.log")) {
16 |   $f_category = $f;
17 |   # do next expression twice; s///g doesn't work as they overlap.
18 |   $f_category =~ s:\.\d+\.:.*.:;
19 |   $f_category =~ s:\.\d+\.:.*.:;
20 |   $fmap{$f_category} .= " $f";
21 | }
22 | 
23 | sub split_hundreds { # split list of filenames into groups of 100.
24 |   my $names = shift @_;
25 |   my @A = split(" ", $names);
26 |   my @ans = ();
27 |   while (@A > 0) {
28 |     my $group = "";
29 |     for ($x = 0; $x < 100 && @A>0; $x++) {
30 |       $fname = pop @A;
31 |       $group .= "$fname ";
32 |     }
33 |     push @ans, $group;
34 |   }
35 |   return @ans;
36 | }
37 | 
38 | foreach $c (keys %fmap) {
39 |   $n = 0;
40 |   foreach $fgroup (split_hundreds($fmap{$c})) {
41 |     $n += `grep -w WARNING $fgroup | wc -l`;
42 |   }
43 |   if ($n != 0) {
44 |     print "$n warnings in $c\n"
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/plot_acc_and_loss.py:
--------------------------------------------------------------------------------
 1 | ##########################################################
 2 | # pytorch-kaldi v.0.1
 3 | # Mirco Ravanelli, Titouan Parcollet
 4 | # Mila, University of Montreal
 5 | # October 2018
 6 | ##########################################################
 7 | 
 8 | import sys
 9 | import configparser
10 | import os
11 | from utils import create_curves
12 | 
13 | # Checking arguments
14 | if len(sys.argv) != 2:
15 |     print("ERROR: Please provide only the path of the cfg_file as : python plot_acc_and_loss.py cfg/TIMIT_MLP_mfcc.cfg")
16 | 
17 | # Checking if the cfg_file exists and loading it
18 | cfg_file = sys.argv[1]
19 | if not (os.path.exists(cfg_file)):
20 |     sys.stderr.write("ERROR: The config file %s does not exist !\n" % (cfg_file))
21 |     sys.exit(0)
22 | else:
23 |     config = configparser.ConfigParser()
24 |     config.read(cfg_file)
25 | 
26 | # Getting the parameters
27 | valid_data_lst = config["data_use"]["valid_with"].split(",")
28 | out_folder = config["exp"]["out_folder"]
29 | N_ep = int(config["exp"]["N_epochs_tr"])
30 | 
31 | # Handling call without running run_exp.py before
32 | if not (os.path.exists(out_folder + "res.res")):
33 |     sys.stderr.write("ERROR: Please run the experiment in order to get results to plot first !\n")
34 |     sys.exit(0)
35 | 
36 | # Creating files and curves
37 | create_curves(out_folder, N_ep, valid_data_lst)
38 | 


--------------------------------------------------------------------------------
/proto/global.proto:
--------------------------------------------------------------------------------
 1 | [cfg_proto]
 2 | cfg_proto=path
 3 | cfg_proto_chunk=path
 4 | 
 5 | [exp]
 6 | cmd=str
 7 | run_nn_script=str
 8 | out_folder=str
 9 | seed=int(-inf,inf)
10 | use_cuda=bool
11 | multi_gpu=bool
12 | save_gpumem=bool
13 | N_epochs_tr=int(1,inf)
14 | 
15 | [dataset]
16 | data_name=str
17 | fea=str
18 | lab=str
19 | N_chunks=int(1,inf)
20 | 
21 | [data_use]
22 | train_with=list_str
23 | valid_with=list_str
24 | forward_with=list_str
25 | 
26 | 
27 | [batches]
28 | batch_size_train=list_str
29 | max_seq_length_train=list_str
30 | increase_seq_length_train=Bool
31 | start_seq_len_train=int(20,inf)
32 | multply_factor_seq_len_train=int(0,inf)
33 | batch_size_valid=int(1,inf)
34 | max_seq_length_valid=int(20,inf)
35 | 
36 | [architecture]
37 | arch_name=str
38 | arch_proto=path
39 | arch_library=str
40 | arch_class=str
41 | arch_pretrain_file=str
42 | arch_freeze=bool
43 | arch_seq_model=bool
44 | arch_lr=list_str
45 | arch_halving_factor=float(0,inf)
46 | arch_improvement_threshold=float(0,inf)
47 | arch_opt={sgd,rmsprop,adam}
48 | 
49 | 
50 | [model]
51 | model_proto=path
52 | model=str
53 | 
54 | [forward]
55 | forward_out=str
56 | normalize_posteriors=bool_list
57 | normalize_with_counts_from=str
58 | save_out_file=bool_list
59 | require_decoding=bool_list
60 | 
61 | [decoding]
62 | decoding_script_folder=path
63 | decoding_script=str
64 | decoding_proto=path
65 | 
66 | 
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/proto/global_refac01.proto:
--------------------------------------------------------------------------------
 1 | [cfg_proto]
 2 | cfg_proto=path
 3 | cfg_proto_chunk=path
 4 | 
 5 | [exp]
 6 | cmd=str
 7 | run_nn_script=str
 8 | out_folder=str
 9 | seed=int(-inf,inf)
10 | use_cuda=bool
11 | multi_gpu=bool
12 | save_gpumem=bool
13 | N_epochs_tr=int(1,inf)
14 | 
15 | [dataset]
16 | data_name=str
17 | fea=str
18 | lab=str
19 | N_chunks=int(1,inf)
20 | 
21 | [data_use]
22 | train_with=list_str
23 | valid_with=list_str
24 | forward_with=list_str
25 | 
26 | 
27 | [batches]
28 | batch_size_train=list_str
29 | max_seq_length_train=list_str
30 | increase_seq_length_train=Bool
31 | start_seq_len_train=list_str
32 | multply_factor_seq_len_train=int(0,inf)
33 | batch_size_valid=int(1,inf)
34 | max_seq_length_valid=list_str
35 | 
36 | [architecture]
37 | arch_name=str
38 | arch_proto=path
39 | arch_library=str
40 | arch_class=str
41 | arch_pretrain_file=str
42 | arch_freeze=bool
43 | arch_seq_model=bool
44 | arch_lr=list_str
45 | arch_halving_factor=float(0,inf)
46 | arch_improvement_threshold=float(0,inf)
47 | arch_opt={sgd,rmsprop,adam,none}
48 | 
49 | 
50 | [model]
51 | model_proto=path
52 | model=str
53 | 
54 | [forward]
55 | forward_out=str
56 | normalize_posteriors=bool_list
57 | normalize_with_counts_from=str
58 | save_out_file=bool_list
59 | require_decoding=bool_list
60 | 
61 | [decoding]
62 | decoding_script_folder=path
63 | decoding_script=str
64 | decoding_proto=path
65 | 
66 | 
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/utt2spk_to_spk2utt.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # converts an utt2spk file to a spk2utt file.
18 | # Takes input from the stdin or from a file argument;
19 | # output goes to the standard out.
20 | 
21 | if ( @ARGV > 1 ) {
22 |     die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
23 | }
24 | 
25 | while(<>){ 
26 |     @A = split(" ", $_);
27 |     @A == 2 || die "Invalid line in utt2spk file: $_";
28 |     ($u,$s) = @A;
29 |     if(!$seen_spk{$s}) {
30 |         $seen_spk{$s} = 1;
31 |         push @spklist, $s;
32 |     }
33 |     push (@{$spk_hash{$s}}, "$u");
34 | }
35 | foreach $s (@spklist) {
36 |     $l = join(' ',@{$spk_hash{$s}});
37 |     print "$s $l\n";
38 | }
39 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/shuffle_list.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | if ($ARGV[0] eq "--srand") {
20 |   $n = $ARGV[1];
21 |   $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\"";
22 |   srand($ARGV[1]);
23 |   shift;
24 |   shift;
25 | } else {
26 |   srand(0); # Gives inconsistent behavior if we don't seed.
27 | }
28 | 
29 | if (@ARGV > 1 || $ARGV[0] =~ m/^-.+/) { # >1 args, or an option we 
30 |   # don't understand.
31 |   print "Usage: shuffle_list.pl [--srand N] [input file]  > output\n";
32 |   print "randomizes the order of lines of input.\n";
33 |   exit(1);
34 | }
35 | 
36 | @lines;
37 | while (<>) {
38 |   push @lines, [ (rand(), $_)] ;
39 | }
40 | 
41 | @lines = sort { $a->[0] cmp $b->[0] } @lines;
42 | foreach $l (@lines) {
43 |     print $l->[1];
44 | }
45 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/analyze_segments.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Copyright 2015 GoVivace Inc. (Author: Nagendra Kumar Goel)
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Analyze a segments file and print important stats on it.
18 | 
19 | $dur = $total = 0;
20 | $maxDur = 0;
21 | $minDur = 9999999999;
22 | $n = 0;
23 | while(<>){
24 |     chomp;
25 |     @t = split(/\s+/);
26 |     $dur = $t[3] - $t[2];
27 |     $total += $dur;
28 |     if ($dur > $maxDur) {
29 |         $maxSegId = $t[0];
30 |         $maxDur = $dur;
31 |     }
32 |     if ($dur < $minDur) {
33 |         $minSegId = $t[0];
34 |         $minDur = $dur;
35 |     }
36 |     $n++;
37 | }
38 | $avg=$total/$n;
39 | $hrs = $total/3600;
40 | print "Total $hrs hours of data\n";
41 | print "Average segment length $avg seconds\n";
42 | print "Segment $maxSegId has length of $maxDur seconds\n";
43 | print "Segment $minSegId has length of $minDur seconds\n";
44 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/show_lattice.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | format=pdf # pdf svg
 4 | mode=save # display save
 5 | lm_scale=0.0
 6 | acoustic_scale=0.0
 7 | #end of config
 8 | 
 9 | . utils/parse_options.sh
10 | 
11 | if [ $# != 3 ]; then
12 |    echo "usage: $0 [--mode display|save] [--format pdf|svg] <utt-id> <lattice-ark> <word-list>"
13 |    echo "e.g.:  $0 utt-0001 \"test/lat.*.gz\" tri1/graph/words.txt"
14 |    exit 1;
15 | fi
16 | 
17 | . path.sh
18 | 
19 | uttid=$1
20 | lat=$2
21 | words=$3
22 | 
23 | tmpdir=$(mktemp -d /tmp/kaldi.XXXX); # trap "rm -r $tmpdir" EXIT # cleanup
24 | 
25 | gunzip -c $lat | lattice-to-fst --lm-scale=$lm_scale --acoustic-scale=$acoustic_scale ark:- "scp,p:echo $uttid $tmpdir/$uttid.fst|" || exit 1;
26 | ! [ -s $tmpdir/$uttid.fst ] && \
27 |   echo "Failed to extract lattice for utterance $uttid (not present?)" && exit 1;
28 | fstdraw --portrait=true --osymbols=$words $tmpdir/$uttid.fst | dot -T${format} > $tmpdir/$uttid.${format}
29 | 
30 | if [ "$(uname)" == "Darwin" ]; then
31 |     doc_open=open
32 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then
33 |     doc_open=xdg-open
34 | elif [ $mode == "display" ] ; then
35 |         echo "Can not automaticaly open file on your operating system"
36 |         mode=save
37 | fi
38 | 
39 | [ $mode == "display" ] && $doc_open $tmpdir/$uttid.${format}
40 | [[ $mode == "display" && $? -ne 0 ]] && echo "Failed to open ${format} format." && mode=save
41 | [ $mode == "save" ] && echo "Saving to $uttid.${format}" && cp $tmpdir/$uttid.${format} .
42 | 
43 | exit 0
44 | 


--------------------------------------------------------------------------------
/best_wer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2010-2011 Microsoft Corporation
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # To be run from one directory above this script.
19 | 
20 | perl -e 'while(<>){ 
21 |     s/\|(\d)/\| $1/g; s/(\d)\|/$1 \|/g;
22 |     if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool.
23 |     elsif (m: (Mean|Sum/Avg|)\s*\|\s*\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|:
24 |         && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } }  # sclite.
25 |    if (defined $bestline){ print $bestline; } ' | \
26 |   awk 'BEGIN{ FS="%WER"; } { if(NF == 2) { print FS$2" "$1; } else { print $0; }}' | \
27 |   awk 'BEGIN{ FS="Sum/Avg"; } { if(NF == 2) { print $2" "$1; } else { print $0; }}' | \
28 |   awk '{ if($1!~/%WER/) { print "%WER "$9" "$0; } else { print $0; }}' | \
29 |   sed -e 's|\s\s*| |g' -e 's|\:$||' -e 's|\:\s*\|\s*$||'
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/best_wer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2010-2011 Microsoft Corporation
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # To be run from one directory above this script.
19 | 
20 | perl -e 'while(<>){ 
21 |     s/\|(\d)/\| $1/g; s/(\d)\|/$1 \|/g;
22 |     if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool.
23 |     elsif (m: (Mean|Sum/Avg|)\s*\|\s*\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|:
24 |         && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } }  # sclite.
25 |    if (defined $bestline){ print $bestline; } ' | \
26 |   awk 'BEGIN{ FS="%WER"; } { if(NF == 2) { print FS$2" "$1; } else { print $0; }}' | \
27 |   awk 'BEGIN{ FS="Sum/Avg"; } { if(NF == 2) { print $2" "$1; } else { print $0; }}' | \
28 |   awk '{ if($1!~/%WER/) { print "%WER "$9" "$0; } else { print $0; }}' | \
29 |   sed -e 's|\s\s*| |g' -e 's|\:$||' -e 's|\:\s*\|\s*$||'
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/remove_oovs.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script removes lines that contain these OOVs on either the
18 | # third or fourth fields  of the line.  It is intended to remove arcs
19 | # with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
20 | 
21 | if (  @ARGV < 1 && @ARGV > 2) {
22 |     die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
23 | }
24 | 
25 | $unklist = shift @ARGV;
26 | open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
27 | while(<S>){ 
28 |     @A = split(" ", $_);
29 |     @A == 1 || die "Bad line in unknown-symbol list: $_";
30 |     $unk{$A[0]} = 1;
31 | }
32 | 
33 | $num_removed = 0;
34 | while(<>){ 
35 |     @A = split(" ", $_);
36 |     if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
37 |         $num_removed++;
38 |     } else {
39 |         print;
40 |     }
41 | }
42 | print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
43 | 
44 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/cmd.sh:
--------------------------------------------------------------------------------
 1 | # "queue.pl" uses qsub.  The options to it are
 2 | # options to qsub.  If you have GridEngine installed,
 3 | # change this to a queue you have access to.
 4 | # Otherwise, use "run.pl", which will run jobs locally
 5 | # (make sure your --num-jobs options are no more than
 6 | # the number of cpus on your machine.
 7 | 
 8 | #a) JHU cluster options
 9 | #export train_cmd="queue.pl -l arch=*64"
10 | #export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
11 | #export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
12 | #export cuda_cmd=run.pl
13 | 
14 | 
15 | #if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
16 | #  export train_cmd="queue.pl -l arch=*64*"
17 | #  export decode_cmd="queue.pl -l arch=*64* --mem 3G"
18 | #  export mkgraph_cmd="queue.pl -l arch=*64* --mem 4G"
19 | #  export cuda_cmd="queue.pl -l gpu=1"
20 | #elif [[ $(hostname -f) == *.fit.vutbr.cz ]]; then
21 | #  #b) BUT cluster options
22 | #  queue="all.q@@blade,all.q@@speech,all.q@dellgpu*,all.q@supergpu*"
23 | #  export train_cmd="queue.pl -q $queue -l ram_free=2500M,mem_free=2500M,matylda5=0.5"
24 | #  export decode_cmd="queue.pl -q $queue -l ram_free=3000M,mem_free=3000M,matylda5=0.1"
25 | #  export mkgraph_cmd="queue.pl -q $queue -l ram_free=4G,mem_free=4G,matylda5=3"
26 | #  export cuda_cmd="queue.pl -q long.q@pcspeech-gpu,long.q@dellgpu1,long.q@pcgpu*,long.q@supergpu1 -l gpu=1" 
27 | #else
28 | #  echo "$0: you need to define options for your cluster."
29 | #  exit 1;
30 | #fi
31 | 
32 | #c) run locally...
33 | export train_cmd=utils/run.pl
34 | export decode_cmd=utils/run.pl
35 | export cuda_cmd=utils/run.pl
36 | export mkgraph_cmd=utils/run.pl
37 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/add_disambig.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | # Adds some specified number of disambig symbols to a symbol table.
19 | # Adds these as #1, #2, etc.
20 | # If the --include-zero option is specified, includes an extra one
21 | # #0.
22 | 
23 | $include_zero = 0;
24 | if($ARGV[0] eq "--include-zero") {
25 |     $include_zero = 1;
26 |     shift @ARGV;
27 | }
28 | 
29 | if(@ARGV != 2) {
30 |     die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
31 | }
32 | 
33 | 
34 | $input = $ARGV[0];
35 | $nsyms = $ARGV[1];
36 | 
37 | open(F, "<$input") || die "Opening file $input";
38 | 
39 | while(<F>) {
40 |     @A = split(" ", $_);
41 |     @A == 2 || die "Bad line $_";
42 |     $lastsym = $A[1];
43 |     print;
44 | }
45 | 
46 | if(!defined($lastsym)){
47 |  die "Empty symbol file?";
48 | }
49 | 
50 | if($include_zero) {
51 |     $lastsym++;
52 |     print "#0  $lastsym\n";
53 | }
54 | 
55 | for($n = 1; $n <= $nsyms; $n++) {
56 |     $y = $n + $lastsym;
57 |     print "#$n  $y\n";
58 | }
59 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/remove_data_links.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This program searches within a directory for soft links that
 4 | # appear to be created by 'create_data_link.pl' to a 'storage/' subdirectory,
 5 | # and it removes both the soft links and the things they point to.
 6 | # for instance, if you have a soft link 
 7 | #   foo/egs/1.1.egs -> storage/2/1.1.egs
 8 | # it will remove both foo/egs/storage/2/1.1.egs, and foo/egs/1.1.egs.
 9 | 
10 | ret=0
11 | 
12 | dry_run=false
13 | 
14 | if [ "$1" == "--dry-run" ]; then
15 |   dry_run=true
16 |   shift
17 | fi
18 | 
19 | if [ $# == 0 ]; then
20 |   echo "Usage:  $0 [--dry-run] <list-of-directories>"
21 |   echo "e.g.: $0 exp/nnet4a/egs/"
22 |   echo " Removes from any subdirectories of the command-line arguments, soft links that "
23 |   echo " appear to have been created by utils/create_data_link.pl, as well as the things"
24 |   echo " that those soft links point to.  Will typically be called on a directory prior"
25 |   echo " to 'rm -r' on that directory, to ensure that data that was distributed on other"
26 |   echo " volumes also gets deleted."
27 |   echo " With --dry-run, just prints what it would do."
28 | fi
29 | 
30 | for dir in $*; do
31 |   if [ ! -d $dir ]; then
32 |     echo "$0: not a directory: $dir"
33 |     ret=1
34 |   else
35 |     for subdir in $(find $dir -type d); do
36 |       if [ -d $subdir/storage ]; then
37 |         for x in $(ls $subdir); do
38 |           f=$subdir/$x
39 |           if [ -L $f ] && [[ $(readlink $f) == storage/* ]]; then
40 |             target=$subdir/$(readlink $f)
41 |             if $dry_run; then
42 |               echo rm $f $target
43 |             else
44 |               rm $f $target
45 |             fi
46 |           fi
47 |         done
48 |       fi
49 |     done
50 |   fi
51 | done
52 | 
53 | exit $ret
54 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/nnet/gen_hamm_mat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # ./gen_hamm_mat.py
19 | # script generates diagonal matrix with hamming window values
20 | from __future__ import print_function
21 | 
22 | from math import *
23 | import sys
24 | 
25 | 
26 | from optparse import OptionParser
27 | 
28 | parser = OptionParser()
29 | parser.add_option("--fea-dim", dest="dim", help="feature dimension")
30 | parser.add_option("--splice", dest="splice", help="applied splice value")
31 | (options, args) = parser.parse_args()
32 | 
33 | if options.dim == None:
34 |     parser.print_help()
35 |     sys.exit(1)
36 | 
37 | dim = int(options.dim)
38 | splice = int(options.splice)
39 | 
40 | 
41 | # generate the diagonal matrix with hammings
42 | M_2PI = 6.283185307179586476925286766559005
43 | 
44 | dim_mat = (2 * splice + 1) * dim
45 | timeContext = 2 * splice + 1
46 | print("[")
47 | for row in range(dim_mat):
48 |     for col in range(dim_mat):
49 |         if col != row:
50 |             print("0", end=" ")
51 |         else:
52 |             i = int(row / dim)
53 |             print(str(0.54 - 0.46 * cos((M_2PI * i) / (timeContext - 1))), end=" ")
54 |     print()
55 | 
56 | print("]")
57 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/nnet/gen_splice.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # ./gen_splice.py
19 | # generates <splice> Component
20 | from __future__ import print_function
21 | 
22 | from math import *
23 | import sys
24 | 
25 | 
26 | from optparse import OptionParser
27 | 
28 | parser = OptionParser()
29 | parser.add_option("--fea-dim", dest="dim_in", help="feature dimension")
30 | parser.add_option("--splice", dest="splice", help="number of frames to concatenate with the central frame")
31 | parser.add_option(
32 |     "--splice-step",
33 |     dest="splice_step",
34 |     help="splicing step (frames dont need to be consecutive, --splice 3 --splice-step 2 will select offsets: -6 -4 -2 0 2 4 6)",
35 |     default="1",
36 | )
37 | (options, args) = parser.parse_args()
38 | 
39 | if options.dim_in == None:
40 |     parser.print_help()
41 |     sys.exit(1)
42 | 
43 | dim_in = int(options.dim_in)
44 | splice = int(options.splice)
45 | splice_step = int(options.splice_step)
46 | 
47 | dim_out = (2 * splice + 1) * dim_in
48 | 
49 | print("<splice>", dim_out, dim_in)
50 | print("[", end=" ")
51 | 
52 | splice_vec = range(-splice * splice_step, splice * splice_step + 1, splice_step)
53 | for idx in range(len(splice_vec)):
54 |     print(splice_vec[idx], end=" ")
55 | 
56 | print("]")
57 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/ln.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use File::Spec;
 3 | 
 4 | if ( @ARGV < 2 ) {
 5 |   print STDERR "usage: ln.pl input1 input2 dest-dir\n" .
 6 |     "This script does a soft link of input1, input2, etc." .
 7 |     "to dest-dir, using relative links where possible\n" .
 8 |     "Note: input-n and dest-dir may both be absolute pathnames,\n" .
 9 |     "or relative pathnames, relative to the current directlory.\n";
10 |   exit(1);
11 | }  
12 | 
13 | $dir = pop @ARGV;
14 | if ( ! -d $dir ) {
15 |   print STDERR "ln.pl: last argument must be a directory ($dir is not a directory)\n";
16 |   exit(1);
17 | }
18 | 
19 | $ans = 1; # true.
20 | 
21 | $absdir = File::Spec->rel2abs($dir); # Get $dir as abs path.
22 | defined $absdir || die "No such directory $dir";
23 | foreach $file (@ARGV) {
24 |   $absfile =  File::Spec->rel2abs($file); # Get $file as abs path.
25 |   defined $absfile || die "No such file or directory: $file";
26 |   @absdir_split = split("/", $absdir);
27 |   @absfile_split = split("/", $absfile);
28 | 
29 |   $newfile = $absdir . "/" . $absfile_split[$#absfile_split]; # we'll use this
30 |   # as the destination in the link command.
31 |   $num_removed = 0;
32 |   while (@absdir_split > 0 && $absdir_split[0] eq $absfile_split[0]) {
33 |     shift @absdir_split;
34 |     shift @absfile_split;
35 |     $num_removed++;
36 |   }
37 |   if (-l $newfile) { # newfile is already a link -> safe to delete it.
38 |     unlink($newfile); # "unlink" just means delete.
39 |   }
40 |   if ($num_removed == 0) { # will use absolute pathnames.
41 |     $oldfile = "/" . join("/", @absfile_split);
42 |     $ret = symlink($oldfile, $newfile);
43 |   } else {
44 |     $num_dots = @absdir_split;
45 |     $oldfile = join("/", @absfile_split);
46 |     for ($n = 0; $n < $num_dots; $n++) {
47 |       $oldfile = "../" . $oldfile;
48 |     }
49 |     $ret = symlink($oldfile, $newfile);
50 |   }
51 |   $ans = $ans && $ret;
52 |   if (! $ret) {
53 |     print STDERR "Error linking $oldfile to $newfile\n";
54 |   }
55 | }
56 | 
57 | exit ($ans == 1 ? 0 : 1);
58 | 
59 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/make_unigram_grammar.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script is used in discriminative training.
18 | # This script makes a simple unigram-loop version of G.fst
19 | # using a unigram grammar estimated from some training transcripts.
20 | # This is for MMI training.
21 | # We don't have any silences in G.fst; these are supplied by the
22 | # optional silences in the lexicon.
23 | 
24 | # Note: the symbols in the transcripts become the input and output
25 | # symbols of G.txt; these can be numeric or not.
26 | 
27 | if(@ARGV != 0) {
28 |     die "Usage: make_unigram_grammar.pl < text-transcripts > G.txt"
29 | }
30 | 
31 | $totcount = 0;
32 | $nl = 0;
33 | while (<>) {
34 |   @A = split(" ", $_);
35 |   foreach $a (@A) {
36 |     $count{$a}++;
37 |     $totcount++;
38 |   }
39 |   $nl++;
40 |   $totcount++; # Treat end-of-sentence as a symbol for purposes of
41 |   # $totcount, so the grammar is properly stochastic.  This doesn't
42 |   # become </s>, it just becomes the final-prob.
43 | }
44 | 
45 | foreach $a (keys %count) {
46 |   $prob = $count{$a} / $totcount;
47 |   $cost = -log($prob);          # Negated natural-log probs.
48 |   print "0\t0\t$a\t$a\t$cost\n";
49 | }
50 | # Zero final-cost.
51 | $final_prob = $nl / $totcount;
52 | $final_cost = -log($final_prob);
53 | print "0\t$final_cost\n";
54 | 
55 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/int2sym.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 3 | # Apache 2.0.
 4 | 
 5 | undef $field_begin;
 6 | undef $field_end;
 7 | 
 8 | 
 9 | if ($ARGV[0] eq "-f") {
10 |   shift @ARGV; 
11 |   $field_spec = shift @ARGV; 
12 |   if ($field_spec =~ m/^\d+$/) {
13 |     $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
14 |   }
15 |   if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
16 |     if ($1 ne "") {
17 |       $field_begin = $1 - 1; # Change to zero-based indexing.
18 |     }
19 |     if ($2 ne "") {
20 |       $field_end = $2 - 1; # Change to zero-based indexing.
21 |     }
22 |   }
23 |   if (!defined $field_begin && !defined $field_end) {
24 |     die "Bad argument to -f option: $field_spec"; 
25 |   }
26 | }
27 | $symtab = shift @ARGV;
28 | if(!defined $symtab) {
29 |     print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" .
30 |       "options: [-f (<field>|<field_start>-<field-end>)]\n" .
31 |       "e.g.: -f 2, or -f 3-4\n";
32 |     exit(1);
33 | }
34 | 
35 | open(F, "<$symtab") || die "Error opening symbol table file $symtab";
36 | while(<F>) {
37 |     @A = split(" ", $_);
38 |     @A == 2 || die "bad line in symbol table file: $_";
39 |     $int2sym{$A[1]} = $A[0];
40 | }
41 | 
42 | sub int2sym {
43 |     my $a = shift @_;
44 |     my $pos = shift @_;
45 |     if($a !~  m:^\d+$:) { # not all digits..
46 |       $pos1 = $pos+1; # make it one-based.
47 |       die "int2sym.pl: found noninteger token $a [in position $pos1]\n";
48 |     }
49 |     $s = $int2sym{$a};
50 |     if(!defined ($s)) {
51 |       die "int2sym.pl: integer $a not in symbol table $symtab.";
52 |     }
53 |     return $s;
54 | }
55 | 
56 | $error = 0;
57 | while (<>) {
58 |   @A = split(" ", $_);
59 |   for ($pos = 0; $pos <= $#A; $pos++) {
60 |     $a = $A[$pos];
61 |     if ( (!defined $field_begin || $pos >= $field_begin)
62 |          && (!defined $field_end || $pos <= $field_end)) {
63 |       $a = int2sym($a, $pos);
64 |     }
65 |     print $a . " ";
66 |   }
67 |   print "\n";
68 | }
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/reduce_data_dir_by_reclist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # koried, 10/29/2012
 4 | 
 5 | # Reduce a data set based on a list of recordings
 6 | 
 7 | if [ $# != 3 ]; then
 8 | echo "usage: $0 srcdir reclist destdir"
 9 | exit 1;
10 | fi
11 | 
12 | srcdir=$1
13 | reclist=$2
14 | destdir=$3
15 | 
16 | if [ ! -f $srcdir/utt2spk ]; then 
17 | echo "$0: no such file $srcdir/utt2spk"
18 | exit 1;
19 | fi
20 | 
21 | function do_filtering {
22 | # assumes the utt2spk and spk2utt files already exist.
23 | 	[ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
24 | 	[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
25 | 	[ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
26 | 	[ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
27 | 	[ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
28 | 	if [ -f $srcdir/segments ]; then
29 | 		utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
30 | 		awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings.
31 | # The next line would override the command above for wav.scp, which would be incorrect.
32 | 		[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
33 | 		[ -f $srcdir/reco2file_and_channel ] && \
34 | 			utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
35 | 		[ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm
36 | 		rm $destdir/reco
37 | 	fi
38 | 	srcutts=`cat $srcdir/utt2spk | wc -l`
39 | 	destutts=`cat $destdir/utt2spk | wc -l`
40 | 	echo "Reduced #utt from $srcutts to $destutts"
41 | }
42 | 
43 | mkdir -p $destdir
44 | 
45 | # filter the utt2spk based on the set of recordings
46 | rm -f $destdir/utt2spk
47 | for i in `cat $reclist`; do
48 | 	cat $srcdir/utt2spk | grep ^$i >> $destdir/utt2spk
49 | done
50 | 
51 | utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
52 | do_filtering;
53 | 
54 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/reduce_data_dir.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # koried, 10/29/2012
 4 | 
 5 | # Reduce a data set based on a list of turn-ids
 6 | 
 7 | if [ $# != 3 ]; then
 8 | echo "usage: $0 srcdir turnlist destdir"
 9 | exit 1;
10 | fi
11 | 
12 | srcdir=$1
13 | reclist=$2
14 | destdir=$3
15 | 
16 | if [ ! -f $srcdir/utt2spk ]; then 
17 | echo "$0: no such file $srcdir/utt2spk"
18 | exit 1;
19 | fi
20 | 
21 | function do_filtering {
22 | # assumes the utt2spk and spk2utt files already exist.
23 | 	[ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
24 | 	[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
25 | 	[ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
26 | 	[ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
27 | 	[ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
28 | 	if [ -f $srcdir/segments ]; then
29 | 		utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
30 | 		awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings.
31 | 		# The next line would override the command above for wav.scp, which would be incorrect.
32 | 		[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
33 | 		[ -f $srcdir/reco2file_and_channel ] && \
34 | 			utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
35 | 		
36 | 		# Filter the STM file for proper sclite scoring (this will also remove the comments lines)
37 | 		[ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm
38 | 		rm $destdir/reco
39 | 	fi
40 | 	srcutts=`cat $srcdir/utt2spk | wc -l`
41 | 	destutts=`cat $destdir/utt2spk | wc -l`
42 | 	echo "Reduced #utt from $srcutts to $destutts"
43 | }
44 | 
45 | mkdir -p $destdir
46 | 
47 | # filter the utt2spk based on the set of recordings
48 | utils/filter_scp.pl $reclist < $srcdir/utt2spk > $destdir/utt2spk
49 | 
50 | utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
51 | do_filtering;
52 | 
53 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/scoring/wer_report.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2015 Johns Hopkins University (author: Jan Trmal <jtrmal@gmail.com>)
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | # This script reads per-utt table generated for example during scoring
19 | # and outpus the WER similar to the format the compute-wer utility 
20 | # or the utils/best_wer.pl produces
21 | # i.e. from table containing lines in this format
22 | # SUM raw 23344 243230 176178 46771 9975 20281 77027 16463
23 | # produces something output like this
24 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] 
25 | # NB: if the STDIN stream will contain more of the SUM raw entries,
26 | #     the best one will be found and printed 
27 | #
28 | # If the script is called with parameters, it uses them pro provide 
29 | # a description of the output
30 | # i.e.
31 | # cat per-spk-report | utils/scoring/wer_report.pl Full set
32 | # the following output will be produced
33 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] Full set
34 | 
35 | 
36 | while (<STDIN>) {
37 |   if ( m:SUM\s+raw:) {
38 |     @F = split;
39 |     if ((!defined $wer) || ($wer > $F[8])) {
40 |       $corr=$F[4];
41 |       $sub=$F[5];
42 |       $ins=$F[6];
43 |       $del=$F[7];
44 |       $wer=$F[8];
45 |       $words=$F[3];
46 |     }
47 |   }
48 | }
49 | 
50 | if (defined $wer) {
51 |   $wer_str = sprintf("%.2f", (100.0 * $wer) / $words);
52 |   print "%WER $wer_str [ $wer / $words,  $ins ins, $del del, $sub sub ]";
53 |   print " " . join(" ", @ARGV) if @ARGV > 0;
54 |   print "\n";
55 | }
56 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/create_split_dir.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2013  Guoguo Chen
 4 | # Apache 2.0.
 5 | #
 6 | # This script creates storage directories on different file systems, and creates
 7 | # symbolic links to those directories. For example, a command
 8 | #
 9 | #   utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage
10 | #
11 | # will mkdir -p all of those directories, and will create links
12 | #
13 | #   egs/storage/1 -> /export/gpu-03/egs/storage
14 | #   egs/storage/2 -> /export/gpu-03/egs/storage
15 | #   ...
16 | #
17 | use strict;
18 | use warnings;
19 | use File::Spec;
20 | use Getopt::Long;
21 | 
22 | my $Usage = <<EOU;
23 | This script creates storage directories on different file systems, and creates
24 | symbolic links to those directories.
25 | 
26 | Usage: utils/create_split_dir.pl <actual_storage_dirs> <pseudo_storage_dir>
27 |  e.g.: utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage
28 | 
29 | Allowed options:
30 |   --suffix    : Common suffix to <actual_storage_dirs>    (string, default = "")
31 | 
32 | See also create_data_link.pl, which is intended to work with the resulting
33 | directory structure, and remove_data_links.sh
34 | EOU
35 | 
36 | my $suffix="";
37 | GetOptions('suffix=s' => \$suffix);
38 | 
39 | if (@ARGV < 2) {
40 |   die $Usage;
41 | }
42 | 
43 | my $ans = 1;
44 | 
45 | my $dir = pop(@ARGV);
46 | system("mkdir -p $dir 2>/dev/null");
47 | my $index = 1;
48 | foreach my $file (@ARGV) {
49 |   $file = $file . "/" . $suffix;
50 |   my $actual_storage = File::Spec->rel2abs($file);
51 |   my $pseudo_storage = "$dir/$index";
52 | 
53 |   # If the symbolic link already exists, delete it.
54 |   if (-l $pseudo_storage) {
55 |     print STDERR "$0: link $pseudo_storage already exists, not overwriting.\n";
56 |     next;
57 |   }
58 | 
59 |   # Create the destination directory and make the link.
60 |   system("mkdir -p $actual_storage 2>/dev/null");
61 |   my $ret = symlink($actual_storage, $pseudo_storage);
62 | 
63 |   # Process the returned values
64 |   $ans = $ans && $ret;
65 |   if (! $ret) {
66 |     print STDERR "Error linking $actual_storage to $pseudo_storage\n";
67 |   }
68 | 
69 |   $index++;
70 | }
71 | 
72 | exit($ans == 1 ? 0 : 1);
73 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/local/score_basic.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 3 | # Apache 2.0
 4 | 
 5 | # begin configuration section.
 6 | cmd=run.pl
 7 | stage=0
 8 | min_lmwt=1
 9 | max_lmwt=10
10 | #end configuration section.
11 | 
12 | [ -f ./path.sh ] && . ./path.sh
13 | . parse_options.sh || exit 1;
14 | 
15 | if [ $# -ne 3 ]; then
16 |   echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
17 |   echo " Options:"
18 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
19 |   echo "    --stage (0|1|2)                 # start scoring script from part-way through."
20 |   echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
21 |   echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
22 |   exit 1;
23 | fi
24 | 
25 | data=$1
26 | lang_or_graph=$2
27 | dir=$3
28 | 
29 | phonemap="conf/phones.60-48-39.map"
30 | nj=$(cat $dir/num_jobs)
31 | 
32 | symtab=$lang_or_graph/words.txt
33 | 
34 | for f in $symtab $dir/lat.1.gz $data/text; do
35 |   [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
36 | done
37 | 
38 | mkdir -p $dir/scoring/log
39 | 
40 | # Map reference to 39 phone classes:
41 | cat $data/text | local/timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 > $dir/scoring/test_filt.txt
42 | 
43 | # Get the phone-sequence on the best-path:
44 | for LMWT in $(seq $min_lmwt $max_lmwt); do
45 |   $cmd JOB=1:$nj $dir/scoring/log/best_path_basic.$LMWT.JOB.log \
46 |     lattice-best-path --lm-scale=$LMWT --word-symbol-table=$symtab --verbose=2 \
47 |       "ark:gunzip -c $dir/lat.JOB.gz|" ark,t:$dir/scoring/$LMWT.JOB.tra || exit 1;
48 |   cat $dir/scoring/$LMWT.*.tra | sort > $dir/scoring/$LMWT.tra
49 |   rm $dir/scoring/$LMWT.*.tra
50 | done
51 | 
52 | # Map hypothesis to 39 phone classes:
53 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score_basic.LMWT.log \
54 |    cat $dir/scoring/LMWT.tra \| \
55 |     utils/int2sym.pl -f 2- $symtab \| \
56 |     local/timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 \| \
57 |     compute-wer --text --mode=all \
58 |      ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1;
59 | 
60 | exit 0;
61 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/find_arpa_oovs.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | if (  @ARGV < 1 && @ARGV > 2) {
19 |     die "Usage: find_arpa_oovs.pl words.txt [lm.arpa]\n";
20 |     # This program finds words in the arpa file that are not symbols
21 |     # in the OpenFst-format symbol table words.txt.  It prints them
22 |     # on the standard output, one per line.
23 | }
24 | 
25 | $symtab = shift @ARGV;
26 | open(S, "<$symtab") || die "Failed opening symbol table file $symtab\n";
27 | while(<S>){ 
28 |     @A = split(" ", $_);
29 |     @A == 2 || die "Bad line in symbol table file: $_";
30 |     $seen{$A[0]} = 1;
31 | }
32 | 
33 | $curgram=0;
34 | while(<>) { # Find the \data\ marker.
35 |     if(m:^\\data\\$:) { last; }
36 | }
37 | while(<>) {
38 |     if(m/^\\(\d+)\-grams:\s*$/) {
39 |         $curgram = $1;
40 |         if($curgram > 1) {
41 |             last; # This is an optimization as we can get the vocab from the 1-grams
42 |         }
43 |     } elsif($curgram > 0) {
44 |         @A = split(" ", $_);
45 |         if(@A > 1) {
46 |             shift @A;
47 |             for($n=0;$n<$curgram;$n++) {
48 |                 $word = $A[$n];
49 |                 if(!defined $word) { print STDERR "Unusual line $_ (line $.) in arpa file.\n"; }
50 |                 $in_arpa{$word} = 1;
51 |             }
52 |         } else {
53 |             if(@A > 0 && $A[0] !~ m:\\end\\:) {
54 |                 print STDERR "Unusual line $_ (line $.) in arpa file\n";
55 |             }
56 |         }
57 |     }
58 | }
59 | 
60 | foreach $w (keys %in_arpa) {
61 |     if(!defined $seen{$w} && $w ne "<s>" && $w ne "</s>") {
62 |         print "$w\n";
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/nnet/gen_dct_mat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # ./gen_dct_mat.py
19 | # script generates matrix with DCT transform, which is sparse
20 | # and takes into account that data-layout is along frequency axis,
21 | # while DCT is done along temporal axis.
22 | from __future__ import print_function
23 | 
24 | from math import *
25 | import sys
26 | 
27 | 
28 | from optparse import OptionParser
29 | 
30 | parser = OptionParser()
31 | parser.add_option("--fea-dim", dest="dim", help="feature dimension")
32 | parser.add_option("--splice", dest="splice", help="applied splice value")
33 | parser.add_option("--dct-basis", dest="dct_basis", help="number of DCT basis")
34 | (options, args) = parser.parse_args()
35 | 
36 | if options.dim == None:
37 |     parser.print_help()
38 |     sys.exit(1)
39 | 
40 | dim = int(options.dim)
41 | splice = int(options.splice)
42 | dct_basis = int(options.dct_basis)
43 | 
44 | timeContext = 2 * splice + 1
45 | 
46 | 
47 | # generate the DCT matrix
48 | M_PI = 3.1415926535897932384626433832795
49 | M_SQRT2 = 1.4142135623730950488016887
50 | 
51 | 
52 | # generate sparse DCT matrix
53 | print("[")
54 | for k in range(dct_basis):
55 |     for m in range(dim):
56 |         for n in range(timeContext):
57 |             if n == 0:
58 |                 print(m * "0 ", end=" ")
59 |             else:
60 |                 print((dim - 1) * "0 ", end=" ")
61 |             print(str(sqrt(2.0 / timeContext) * cos(M_PI / timeContext * k * (n + 0.5))), end=" ")
62 |             if n == timeContext - 1:
63 |                 print((dim - m - 1) * "0 ", end=" ")
64 |         print()
65 |     print()
66 | 
67 | print("]")
68 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/prepare_online_nnet_dist_build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2015  Johns Hopkins University (Author: Vijayaditya Peddinti)
 4 | #                 Guoguo Chen
 5 | # Apache 2.0
 6 | # Script to prepare the distribution from the online-nnet build
 7 | 
 8 | other_files= #other files to be included in the build
 9 | other_dirs=
10 | conf_files="ivector_extractor.conf mfcc.conf online_cmvn.conf online_nnet2_decoding.conf splice.conf"
11 | ivec_extractor_files="final.dubm final.ie final.mat global_cmvn.stats online_cmvn.conf splice_opts"
12 | 
13 | echo "$0 $@"  # Print the command line for logging
14 | [ -f path.sh ] && . ./path.sh;
15 | . parse_options.sh || exit 1;
16 | 
17 | if [ $# -ne 3 ]; then
18 |    echo "Usage: $0 <lang-dir> <model-dir> <output-tgz>"
19 |    echo "e.g.: $0 data/lang exp/nnet2_online/nnet_ms_a_online tedlium.tgz"
20 |    exit 1;
21 | fi
22 | 
23 | lang=$1
24 | modeldir=$2
25 | tgzfile=$3
26 | 
27 | for f in $lang/phones.txt $other_files; do
28 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
29 | done
30 | 
31 | build_files=
32 | for d in $modeldir/conf $modeldir/ivector_extractor; do
33 |   [ ! -d $d ] && echo "$0: no such directory $d" && exit 1;
34 | done
35 | 
36 | for f in $ivec_extractor_files; do
37 |   f=$modeldir/ivector_extractor/$f
38 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
39 |   build_files="$build_files $f"
40 | done
41 | 
42 | # Makes a copy of the original config files, as we will change the absolute path
43 | # to relative.
44 | rm -rf $modeldir/conf_abs_path
45 | mkdir -p $modeldir/conf_abs_path
46 | cp -r $modeldir/conf/* $modeldir/conf_abs_path
47 | 
48 | for f in $conf_files; do 
49 |   [ ! -f $modeldir/conf/$f ] && \
50 |     echo "$0: no such file $modeldir/conf/$f" && exit 1;
51 |   # Changes absolute path to relative path. The path entries in the config file
52 |   # are generated by scripts and it is safe to assume that they have structure:
53 |   # variable=path
54 |   cat $modeldir/conf_abs_path/$f | perl -e '
55 |     use File::Spec;
56 |     while(<STDIN>) {
57 |       chomp;
58 |       @col = split("=", $_);
59 |       if (@col == 2 && (-f $col[1])) {
60 |         $col[1] = File::Spec->abs2rel($col[1]);
61 |         print "$col[0]=$col[1]\n";
62 |       } else {
63 |         print "$_\n";
64 |       }
65 |     }
66 |   ' > $modeldir/conf/$f
67 |   build_files="$build_files $modeldir/conf/$f"
68 | done
69 | 
70 | tar -hczvf $tgzfile $lang $build_files $other_files $other_dirs \
71 |   $modeldir/final.mdl $modeldir/tree >/dev/null
72 | 
73 | # Changes back to absolute path.
74 | rm -rf $modeldir/conf
75 | mv $modeldir/conf_abs_path $modeldir/conf
76 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/convert_slf_parallel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright Brno University of Technology (Author: Karel Vesely) 2014.  Apache 2.0.
 3 | 
 4 | # This script converts lattices to HTK format compatible with other toolkits.
 5 | # We can choose to put words to nodes or arcs, as both is valid in the SLF format.
 6 | 
 7 | # begin configuration section.
 8 | cmd=run.pl
 9 | dirname=lats-in-htk-slf
10 | parallel_opts="-tc 50" # We should limit disk stress
11 | word_to_node=false # Words in arcs or nodes? [default:arcs]
12 | #end configuration section.
13 | 
14 | echo "$0 $@"
15 | 
16 | [ -f ./path.sh ] && . ./path.sh
17 | . parse_options.sh || exit 1;
18 | 
19 | if [ $# -ne 3 ]; then
20 |   echo "Usage: $0 [options] <data-dir> <lang-dir|graph-dir> <decode-dir>"
21 |   echo " Options:"
22 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
23 |   echo "    --word-to-link (true|false)     # put word symbols on links or nodes."
24 |   echo "    --parallel-opts STR             # parallelization options (def.: '-tc 50')."
25 |   echo "e.g.:"
26 |   echo "$0 data/dev data/lang exp/tri4a/decode_dev"
27 |   exit 1;
28 | fi
29 | 
30 | data=$1
31 | lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
32 | dir=$3
33 | 
34 | model=$(dirname $dir)/final.mdl # assume model one level up from decoding dir.
35 | 
36 | for f in $lang/words.txt $lang/phones/word_boundary.int $model $dir/lat.1.gz; do
37 |   [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
38 | done
39 | 
40 | [ ! -d $dir/$dirname/log ] && mkdir -p $dir/$dirname
41 | 
42 | echo "$0: Converting lattices into '$dir/$dirname'"
43 | 
44 | # Words in arcs or nodes? [default:nodes]
45 | word_to_link_arg=
46 | $word_to_node && word_to_node_arg="--word-to-node"
47 | 
48 | nj=$(cat $dir/num_jobs)
49 | 
50 | # convert the lattices (individually, gzipped)
51 | $cmd $parallel_opts JOB=1:$nj $dir/$dirname/log/lat_convert.JOB.log \
52 |   mkdir -p $dir/$dirname/JOB/ '&&' \
53 |   lattice-align-words-lexicon --output-error-lats=true --output-if-empty=true $lang/phones/align_lexicon.int $model "ark:gunzip -c $dir/lat.JOB.gz |" ark,t:- \| \
54 |   utils/int2sym.pl -f 3 $lang/words.txt \| \
55 |   utils/convert_slf.pl $word_to_node_arg - $dir/$dirname/JOB/ || exit 1
56 | 
57 | # make list of lattices
58 | find -L $PWD/$dir/$dirname -name *.lat.gz > $dir/$dirname/lat_htk.scp || exit 1
59 | 
60 | # check number of lattices:
61 | nseg=$(cat $data/segments | wc -l)
62 | nlat_out=$(cat $dir/$dirname/lat_htk.scp | wc -l)
63 | echo "segments $nseg, saved-lattices $nlat_out"
64 | #
65 | [ $nseg -ne $nlat_out ] && echo "WARNING: missing $((nseg-nlat_out)) lattices for some segments!" \
66 |   && exit 1
67 | 
68 | echo "success, converted lats to HTK : $PWD/$dir/$dirname/lat_htk.scp"
69 | exit 0
70 | 
71 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/combine_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 3 | #           2014  David Snyder
 4 | 
 5 | # This script operates on a data directory, such as in data/train/.
 6 | # See http://kaldi.sourceforge.net/data_prep.html#data_prep_data
 7 | # for what these directories contain.
 8 | 
 9 | # Begin configuration section. 
10 | extra_files= # specify addtional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
11 | skip_fix=false # skip the fix_data_dir.sh in the end
12 | # End configuration section.
13 | 
14 | echo "$0 $@"  # Print the command line for logging
15 | 
16 | if [ -f path.sh ]; then . ./path.sh; fi
17 | . parse_options.sh || exit 1;
18 | 
19 | if [ $# -lt 2 ]; then
20 |   echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
21 |   echo "Note, files that don't appear in first source dir will not be added even if they appear in later ones."
22 |   exit 1
23 | fi
24 | 
25 | dest=$1;
26 | shift;
27 | 
28 | first_src=$1;
29 | 
30 | rm -r $dest 2>/dev/null
31 | mkdir -p $dest;
32 | 
33 | export LC_ALL=C
34 | 
35 | for dir in $*; do
36 |   if [ ! -f $dir/utt2spk ]; then
37 |     echo "$0: no such file $dir/utt2spk"
38 |     exit 1;
39 |   fi
40 | done
41 | 
42 | # W.r.t. utt2uniq file the script has different behavior compared to other files
43 | # it is not compulsary for it to exist in src directories, but if it exists in 
44 | # even one it should exist in all. We will create the files where necessary
45 | has_utt2uniq=false
46 | for in_dir in $*; do
47 |   if [ -f $in_dir/utt2uniq ]; then
48 |     has_utt2uniq=true
49 |     break
50 |   fi
51 | done
52 | 
53 | if $has_utt2uniq; then
54 |   # we are going to create an utt2uniq file in the destdir
55 |   for in_dir in $*; do
56 |     if [ ! -f $in_dir/utt2uniq ]; then
57 |       # we assume that utt2uniq is a one to one mapping
58 |       cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}' 
59 |     else
60 |       cat $in_dir/utt2uniq
61 |     fi
62 |   done | sort -k1 > $dest/utt2uniq
63 |   echo "$0: combined utt2uniq"
64 | fi
65 | # some of the old scripts might provide utt2uniq as an extrafile, so just remove it
66 | extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
67 | 
68 | for file in utt2spk utt2lang feats.scp text cmvn.scp segments reco2file_and_channel wav.scp spk2gender $extra_files; do
69 |   if [ -f $first_src/$file ]; then
70 |     ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
71 |     echo "$0: combined $file"
72 |   else
73 |     echo "$0 [info]: not combining $file as it does not exist"
74 |   fi
75 | done
76 | 
77 | utils/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
78 | 
79 | if ! $skip_fix ; then
80 |   utils/fix_data_dir.sh $dest || exit 1;
81 | fi
82 | 
83 | exit 0
84 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/apply_map.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use warnings; #sed replacement for -w perl parameter
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 4 | # Apache 2.0.
 5 | 
 6 | # This program is a bit like ./sym2int.pl in that it applies a map
 7 | # to things in a file, but it's a bit more general in that it doesn't
 8 | # assume the things being mapped to are single tokens, they could
 9 | # be sequences of tokens.  See the usage message.
10 | 
11 | 
12 | if (@ARGV > 0 && $ARGV[0] eq "-f") {
13 |   shift @ARGV;
14 |   $field_spec = shift @ARGV; 
15 |   if ($field_spec =~ m/^\d+$/) {
16 |     $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
17 |   }
18 |   if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
19 |     if ($1 ne "") {
20 |       $field_begin = $1 - 1;    # Change to zero-based indexing.
21 |     }
22 |     if ($2 ne "") {
23 |       $field_end = $2 - 1;      # Change to zero-based indexing.
24 |     }
25 |   }
26 |   if (!defined $field_begin && !defined $field_end) {
27 |     die "Bad argument to -f option: $field_spec"; 
28 |   }
29 | }
30 | 
31 | # Mapping is obligatory
32 | $permissive = 0;
33 | if (@ARGV > 0 && $ARGV[0] eq '--permissive') {
34 |   shift @ARGV;
35 |   # Mapping is optional (missing key is printed to output)
36 |   $permissive = 1;
37 | }
38 | 
39 | if(@ARGV != 1) {
40 |   print STDERR "Invalid usage: " . join(" ", @ARGV) . "\n";
41 |   print STDERR "Usage: apply_map.pl [options] map <input >output\n" .
42 |     "options: [-f <field-range> ]\n" .
43 |     "Applies the map 'map' to all input text, where each line of the map\n" .
44 |     "is interpreted as a map from the first field to the list of the other fields\n" .
45 |     "Note: <field-range> can look like 4-5, or 4-, or 5-, or 1, it means the field\n" .
46 |     "range in the input to apply the map to.\n" .
47 |     "e.g.: echo A B | apply_map.pl a.txt\n" .
48 |     "where a.txt is:\n" .
49 |     "A a1 a2\n" .
50 |     "B b\n" .
51 |     "will produce:\n" .
52 |     "a1 a2 b\n";
53 |   exit(1);
54 | }
55 | 
56 | ($map) = @ARGV;
57 | open(M, "<$map") || die "Error opening map file $map: $!";
58 | 
59 | while (<M>) {
60 |   @A = split(" ", $_);
61 |   @A >= 1 || die "apply_map.pl: empty line.";
62 |   $i = shift @A;
63 |   $o = join(" ", @A);
64 |   $map{$i} = $o;
65 | }
66 | 
67 | while(<STDIN>) {
68 |   @A = split(" ", $_);
69 |   for ($x = 0; $x < @A; $x++) {
70 |     if ( (!defined $field_begin || $x >= $field_begin)
71 |          && (!defined $field_end || $x <= $field_end)) {
72 |       $a = $A[$x];
73 |       if (!defined $map{$a}) {
74 |         if (!$permissive) {
75 |           die "apply_map.pl: undefined key $a\n"; 
76 |         } else {
77 |           print STDERR "apply_map.pl: warning! missing key $a\n";
78 |         }
79 |       } else {
80 |         $A[$x] = $map{$a}; 
81 |       }
82 |     }
83 |   }
84 |   print join(" ", @A) . "\n";
85 | }
86 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/local/timit_format_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2013  (Author: Daniel Povey)
 4 | # Apache 2.0
 5 | 
 6 | # This script takes data prepared in a corpus-dependent way
 7 | # in data/local/, and converts it into the "canonical" form,
 8 | # in various subdirectories of data/, e.g. data/lang, data/train, etc.
 9 | 
10 | . ./path.sh || exit 1;
11 | 
12 | echo "Preparing train, dev and test data"
13 | srcdir=data/local/data
14 | lmdir=data/local/nist_lm
15 | tmpdir=data/local/lm_tmp
16 | lexicon=data/local/dict/lexicon.txt
17 | mkdir -p $tmpdir
18 | 
19 | for x in train dev test; do 
20 |   mkdir -p data/$x
21 |   cp $srcdir/${x}_wav.scp data/$x/wav.scp || exit 1;
22 |   cp $srcdir/$x.text data/$x/text || exit 1;
23 |   cp $srcdir/$x.spk2utt data/$x/spk2utt || exit 1;
24 |   cp $srcdir/$x.utt2spk data/$x/utt2spk || exit 1;
25 |   utils/filter_scp.pl data/$x/spk2utt $srcdir/$x.spk2gender > data/$x/spk2gender || exit 1;
26 |   cp $srcdir/${x}.stm data/$x/stm
27 |   cp $srcdir/${x}.glm data/$x/glm
28 |   utils/validate_data_dir.sh --no-feats data/$x || exit 1
29 | done
30 | 
31 | # Next, for each type of language model, create the corresponding FST
32 | # and the corresponding lang_test_* directory.
33 | 
34 | echo Preparing language models for test
35 | 
36 | for lm_suffix in bg; do
37 |   test=data/lang_test_${lm_suffix}
38 |   mkdir -p $test
39 |   cp -r data/lang/* $test
40 |   
41 |   gunzip -c $lmdir/lm_phone_${lm_suffix}.arpa.gz | \
42 |     egrep -v '<s> <s>|</s> <s>|</s> </s>' | \
43 |     arpa2fst - | fstprint | \
44 |     utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$test/words.txt \
45 |      --osymbols=$test/words.txt  --keep_isymbols=false --keep_osymbols=false | \
46 |     fstrmepsilon | fstarcsort --sort_type=ilabel > $test/G.fst
47 |   fstisstochastic $test/G.fst
48 |  # The output is like:
49 |  # 9.14233e-05 -0.259833
50 |  # we do expect the first of these 2 numbers to be close to zero (the second is
51 |  # nonzero because the backoff weights make the states sum to >1).
52 |  # Because of the <s> fiasco for these particular LMs, the first number is not
53 |  # as close to zero as it could be.
54 | 
55 |  # Everything below is only for diagnostic.
56 |  # Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
57 |  # this might cause determinization failure of CLG.
58 |  # #0 is treated as an empty word.
59 |   mkdir -p $tmpdir/g
60 |   awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \
61 |     < "$lexicon"  >$tmpdir/g/select_empty.fst.txt
62 |   fstcompile --isymbols=$test/words.txt --osymbols=$test/words.txt $tmpdir/g/select_empty.fst.txt | \
63 |    fstarcsort --sort_type=olabel | fstcompose - $test/G.fst > $tmpdir/g/empty_words.fst
64 |   fstinfo $tmpdir/g/empty_words.fst | grep cyclic | grep -w 'y' && 
65 |     echo "Language model has cycles with empty words" && exit 1
66 |   rm -r $tmpdir/g
67 | done
68 | 
69 | utils/validate_lang.pl data/lang_test_bg || exit 1
70 | 
71 | echo "Succeeded in formatting data."
72 | rm -r $tmpdir
73 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/local/score_sclite.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 3 | #           2014  Brno University of Technology (Author: Karel Vesely)
 4 | # Apache 2.0
 5 | 
 6 | # begin configuration section.
 7 | cmd=run.pl
 8 | stage=0
 9 | min_lmwt=1
10 | max_lmwt=10
11 | mbr_scale=1.0
12 | #end configuration section.
13 | 
14 | [ -f ./path.sh ] && . ./path.sh
15 | . parse_options.sh || exit 1;
16 | 
17 | if [ $# -ne 3 ]; then
18 |   echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
19 |   echo " Options:"
20 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
21 |   echo "    --stage (0|1|2)                 # start scoring script from part-way through."
22 |   echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
23 |   echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
24 |   exit 1;
25 | fi
26 | 
27 | data=$1
28 | lang_or_graph=$2
29 | dir=$3
30 | 
31 | model=$dir/../final.mdl # assume model one level up from decoding dir.
32 | 
33 | hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl 
34 | [ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1;
35 | hubdir=`dirname $hubscr`
36 | 
37 | phonemap="conf/phones.60-48-39.map"
38 | nj=$(cat $dir/num_jobs)
39 | 
40 | symtab=$lang_or_graph/words.txt
41 | 
42 | for f in $symtab $dir/lat.1.gz $data/text; do
43 |   [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
44 | done
45 | 
46 | mkdir -p $dir/scoring/log
47 | 
48 | # Map reference to 39 phone classes, the silence is optional (.):
49 | local/timit_norm_trans.pl -i $data/stm -m $phonemap -from 48 -to 39 >$dir/scoring/stm_39phn
50 | cp $data/glm $dir/scoring/glm_39phn
51 | 
52 | if [ $stage -le 0 ]; then
53 |   # Get the phone-sequence on the best-path:
54 |   for LMWT in $(seq $min_lmwt $max_lmwt); do
55 |     $cmd JOB=1:$nj $dir/scoring/log/best_path.$LMWT.JOB.log \
56 |       lattice-align-phones $model "ark:gunzip -c $dir/lat.JOB.gz|" ark:- \| \
57 |       lattice-to-ctm-conf --acoustic-scale=$(bc <<<"scale=8; 1/$LMWT*$mbr_scale") --lm-scale=$mbr_scale ark:- $dir/scoring/$LMWT.JOB.ctm || exit 1;
58 |     cat $dir/scoring/$LMWT.*.ctm | sort > $dir/scoring/$LMWT.ctm
59 |     rm $dir/scoring/$LMWT.*.ctm
60 |   done
61 | fi
62 | 
63 | if [ $stage -le 1 ]; then
64 |   # Map ctm to 39 phone classes:
65 |   $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/map_ctm.LMWT.log \
66 |      mkdir $dir/score_LMWT ';' \
67 |      cat $dir/scoring/LMWT.ctm \| \
68 |      utils/int2sym.pl -f 5 $symtab \| \
69 |      local/timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 '>' \
70 |      $dir/scoring/LMWT.ctm_39phn || exit 1
71 | fi
72 | 
73 | # Score the set...
74 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
75 |   cp $dir/scoring/stm_39phn $dir/score_LMWT/stm_39phn '&&' cp $dir/scoring/LMWT.ctm_39phn $dir/score_LMWT/ctm_39phn '&&' \
76 |    $hubscr -p $hubdir -V -l english -h hub5 -g $dir/scoring/glm_39phn -r $dir/score_LMWT/stm_39phn $dir/score_LMWT/ctm_39phn || exit 1;
77 | 
78 | exit 0;
79 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/local/score.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 3 | #           2014  Brno University of Technology (Author: Karel Vesely)
 4 | # Apache 2.0
 5 | 
 6 | # begin configuration section.
 7 | cmd=run.pl
 8 | stage=0
 9 | min_lmwt=1
10 | max_lmwt=10
11 | mbr_scale=1.0
12 | #end configuration section.
13 | :
14 | [ -f ./path.sh ] && . ./path.sh
15 | . parse_options.sh || exit 1;
16 | 
17 | if [ $# -ne 3 ]; then
18 |   echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
19 |   echo " Options:"
20 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
21 |   echo "    --stage (0|1|2)                 # start scoring script from part-way through."
22 |   echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
23 |   echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
24 |   exit 1;
25 | fi
26 | 
27 | data=$1
28 | lang_or_graph=$2
29 | dir=$3
30 | 
31 | model=$dir/../final.mdl # assume model one level up from decoding dir.
32 | 
33 | hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl 
34 | [ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1;
35 | hubdir=`dirname $hubscr`
36 | 
37 | phonemap="conf/phones.60-48-39.map"
38 | nj=$(cat $dir/num_jobs)
39 | 
40 | symtab=$lang_or_graph/words.txt
41 | 
42 | for f in $symtab $dir/lat.1.gz $data/text; do
43 |   [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
44 | done
45 | 
46 | mkdir -p $dir/scoring/log
47 | 
48 | # Map reference to 39 phone classes, the silence is optional (.):
49 | local/timit_norm_trans.pl -i $data/stm -m $phonemap -from 48 -to 39 | \
50 |  sed 's: sil: (sil):g' > $dir/scoring/stm_39phn
51 | cp $data/glm $dir/scoring/glm_39phn
52 | 
53 | if [ $stage -le 0 ]; then
54 |   # Get the phone-sequence on the best-path:
55 |   for LMWT in $(seq $min_lmwt $max_lmwt); do
56 |     $cmd JOB=1:$nj $dir/scoring/log/best_path.$LMWT.JOB.log \
57 |       lattice-align-phones $model "ark:gunzip -c $dir/lat.JOB.gz|" ark:- \| \
58 |       lattice-to-ctm-conf --acoustic-scale=$(bc <<<"scale=8; 1/$LMWT*$mbr_scale") --lm-scale=$mbr_scale ark:- $dir/scoring/$LMWT.JOB.ctm || exit 1;
59 |     cat $dir/scoring/$LMWT.*.ctm | sort > $dir/scoring/$LMWT.ctm
60 |     rm $dir/scoring/$LMWT.*.ctm
61 |   done
62 | fi
63 | 
64 | if [ $stage -le 1 ]; then
65 |   # Map ctm to 39 phone classes:
66 |   $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/map_ctm.LMWT.log \
67 |      mkdir $dir/score_LMWT ';' \
68 |      cat $dir/scoring/LMWT.ctm \| \
69 |      utils/int2sym.pl -f 5 $symtab \| \
70 |      local/timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 '>' \
71 |      $dir/scoring/LMWT.ctm_39phn || exit 1
72 | fi
73 | 
74 | # Score the set...
75 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
76 |   cp $dir/scoring/stm_39phn $dir/score_LMWT/stm_39phn '&&' cp $dir/scoring/LMWT.ctm_39phn $dir/score_LMWT/ctm_39phn '&&' \
77 |    $hubscr -p $hubdir -V -l english -h hub5 -g $dir/scoring/glm_39phn -r $dir/score_LMWT/stm_39phn $dir/score_LMWT/ctm_39phn || exit 1;
78 | 
79 | exit 0;
80 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/local/score_phrich.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 3 | #           2014  Brno University of Technology (Author: Karel Vesely)
 4 | # Apache 2.0
 5 | 
 6 | # begin configuration section.
 7 | cmd=run.pl
 8 | stage=0
 9 | min_lmwt=1
10 | max_lmwt=15
11 | mbr_scale=1.0
12 | #end configuration section.
13 | 
14 | [ -f ./path.sh ] && . ./path.sh
15 | . parse_options.sh || exit 1;
16 | 
17 | if [ $# -ne 3 ]; then
18 |   echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
19 |   echo " Options:"
20 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
21 |   echo "    --stage (0|1|2)                 # start scoring script from part-way through."
22 |   echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
23 |   echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
24 |   exit 1;
25 | fi
26 | 
27 | data=$1
28 | lang_or_graph=$2
29 | dir=$3
30 | 
31 | model=$dir/../final.mdl # assume model one level up from decoding dir.
32 | 
33 | hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl 
34 | [ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1;
35 | hubdir=`dirname $hubscr`
36 | 
37 | phonemap="conf/phones.60-48-39.map"
38 | nj=$(cat $dir/num_jobs)
39 | 
40 | symtab=$lang_or_graph/words.txt
41 | 
42 | for f in $symtab $dir/lat.1.gz $data/text; do
43 |   [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
44 | done
45 | 
46 | mkdir -p $dir/scoring/log
47 | 
48 | # Map reference to 39 phone classes, the silence is deleted (.):
49 | local/timit_norm_trans.pl -i $data/stm -m $phonemap -from 48 -to 39 | \
50 |  sed 's: sil::g' > $dir/scoring/stm_39phn
51 | cp $data/glm $dir/scoring/glm_39phn
52 | 
53 | 
54 | 
55 | if [ $stage -le 0 ]; then
56 |   # Get the phone-sequence on the best-path:
57 |   for LMWT in $(seq $min_lmwt $max_lmwt); do
58 |     $cmd JOB=1:$nj $dir/scoring/log/best_path.$LMWT.JOB.log \
59 |       lattice-align-phones $model "ark:gunzip -c $dir/lat.JOB.gz|" ark:- \| \
60 |       lattice-to-ctm-conf --acoustic-scale=$(bc <<<"scale=8; 1/$LMWT*$mbr_scale") --lm-scale=$mbr_scale ark:- $dir/scoring/$LMWT.JOB.ctm || exit 1;
61 |     cat $dir/scoring/$LMWT.*.ctm | sort > $dir/scoring/$LMWT.ctm
62 |     rm $dir/scoring/$LMWT.*.ctm
63 |   done
64 | fi
65 | 
66 | if [ $stage -le 1 ]; then
67 |   # Map ctm to 39 phone classes:
68 |   $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/map_ctm.LMWT.log \
69 |   mkdir $dir/score_LMWT ';' \
70 |   cat $dir/scoring/LMWT.ctm \| \
71 |   utils/int2sym.pl -f 5 $symtab \| \
72 |   local/timit_norm_trans.pl -i - -m $phonemap -from 48 -to 39 '|' grep -v 'sil' '>' \
73 |   $dir/scoring/LMWT.ctm_39phn || exit 1
74 | fi
75 | 
76 | 
77 | 
78 | # Score the set...
79 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
80 |   cp $dir/scoring/stm_39phn $dir/score_LMWT/stm_39phn '&&' cp $dir/scoring/LMWT.ctm_39phn $dir/score_LMWT/ctm_39phn '&&' \
81 |    $hubscr -p $hubdir -V -l english -h hub5 -g $dir/scoring/glm_39phn -r $dir/score_LMWT/stm_39phn $dir/score_LMWT/ctm_39phn || exit 1;
82 | 
83 | exit 0;
84 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/format_lm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -u
 2 | 
 3 | # Copyright 2012  Arnab Ghoshal
 4 | # Copyright 2010-2011  Microsoft Corporation
 5 | 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #  http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
16 | # See the Apache 2 License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | set -o errexit
20 | 
21 | if [ $# -ne 4 ]; then
22 |   printf "Usage: %s lang_dir LM lexicon out_dir\n" `basename $0`
23 |   echo "  Convert ARPA-format language models to FSTs.";
24 |   exit 1;
25 | fi
26 | 
27 | lang_dir=$1
28 | lm=$2
29 | lexicon=$3
30 | out_dir=$4
31 | mkdir -p $out_dir
32 | 
33 | [ -f ./path.sh ] && . ./path.sh
34 | 
35 | echo "Converting '$lm' to FST"
36 | 
37 | for f in phones.txt words.txt L.fst L_disambig.fst phones/; do
38 |   cp -r $lang_dir/$f $out_dir
39 | done
40 | 
41 | lm_base=$(basename $lm '.gz')
42 | gunzip -c $lm | utils/find_arpa_oovs.pl $out_dir/words.txt \
43 |   > $out_dir/oovs_${lm_base}.txt
44 | 
45 | # Removing all "illegal" combinations of <s> and </s>, which are supposed to 
46 | # occur only at being/end of utt.  These can cause determinization failures 
47 | # of CLG [ends up being epsilon cycles].
48 | gunzip -c $lm \
49 |   | egrep -v '<s> <s>|</s> <s>|</s> </s>' \
50 |   | arpa2fst - | fstprint \
51 |   | utils/remove_oovs.pl $out_dir/oovs_${lm_base}.txt \
52 |   | utils/eps2disambig.pl | utils/s2eps.pl \
53 |   | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
54 |     --keep_isymbols=false --keep_osymbols=false \
55 |   | fstrmepsilon | fstarcsort --sort_type=ilabel > $out_dir/G.fst
56 | set +e
57 | fstisstochastic $out_dir/G.fst
58 | set -e
59 | # The output is like:
60 | # 9.14233e-05 -0.259833
61 | # we do expect the first of these 2 numbers to be close to zero (the second is
62 | # nonzero because the backoff weights make the states sum to >1).
63 | 
64 | # Everything below is only for diagnostic.
65 | # Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
66 | # this might cause determinization failure of CLG.
67 | # #0 is treated as an empty word.
68 | mkdir -p $out_dir/tmpdir.g
69 | awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} 
70 |      END{print "0 0 #0 #0"; print "0";}' \
71 |      < "$lexicon" > $out_dir/tmpdir.g/select_empty.fst.txt
72 | 
73 | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
74 |   $out_dir/tmpdir.g/select_empty.fst.txt \
75 |   | fstarcsort --sort_type=olabel \
76 |   | fstcompose - $out_dir/G.fst > $out_dir/tmpdir.g/empty_words.fst
77 | 
78 | fstinfo $out_dir/tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' \
79 |   && echo "Language model has cycles with empty words" && exit 1
80 | 
81 | rm -r $out_dir/tmpdir.g
82 | 
83 | 
84 | echo "Succeeded in formatting LM: '$lm'"
85 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/filter_scp.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2012 Microsoft Corporation
 3 | #                     Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | # This script takes a list of utterance-ids or any file whose first field
20 | # of each line is an utterance-id, and filters an scp
21 | # file (or any file whose "n-th" field is an utterance id), printing
22 | # out only those lines whose "n-th" field is in id_list. The index of
23 | # the "n-th" field is 1, by default, but can be changed by using
24 | # the -f <n> switch
25 | 
26 | $exclude = 0;
27 | $field = 1;
28 | $shifted = 0;
29 | 
30 | do {
31 |   $shifted=0;
32 |   if ($ARGV[0] eq "--exclude") {
33 |     $exclude = 1;
34 |     shift @ARGV;
35 |     $shifted=1;
36 |   }
37 |   if ($ARGV[0] eq "-f") {
38 |     $field = $ARGV[1];
39 |     shift @ARGV; shift @ARGV;
40 |     $shifted=1
41 |   }
42 | } while ($shifted);
43 | 
44 | if(@ARGV < 1 || @ARGV > 2) {
45 |   die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
46 |       "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
47 |       "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
48 |       "only the lines that were *not* in id_list.\n" .
49 |       "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
50 |       "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
51 |       "-f option, add 1 to the argument.\n" .
52 |       "See also: utils/filter_scp.pl .\n";
53 | }
54 | 
55 | 
56 | $idlist = shift @ARGV;
57 | open(F, "<$idlist") || die "Could not open id-list file $idlist";
58 | while(<F>) {
59 |   @A = split;
60 |   @A>=1 || die "Invalid id-list file line $_";
61 |   $seen{$A[0]} = 1;
62 | }
63 | 
64 | if ($field == 1) { # Treat this as special case, since it is common.
65 |   while(<>) {
66 |     $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
67 |     # $1 is what we filter on.
68 |     if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
69 |       print $_;
70 |     }
71 |   }
72 | } else {
73 |   while(<>) {
74 |     @A = split;
75 |     @A > 0 || die "Invalid scp file line $_";
76 |     @A >= $field || die "Invalid scp file line $_";
77 |     if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
78 |       print $_;
79 |     }
80 |   }
81 | }
82 | 
83 | # tests:
84 | # the following should print "foo 1"
85 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
86 | # the following should print "bar 2".
87 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
88 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/gen_topo.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Generate a topology file.  This allows control of the number of states in the
 6 | # non-silence HMMs, and in the silence HMMs.
 7 | 
 8 | if (@ARGV != 4) {
 9 |   print STDERR "Usage: utils/gen_topo.pl <num-nonsilence-states> <num-silence-states> <colon-separated-nonsilence-phones> <colon-separated-silence-phones>\n";
10 |   print STDERR "e.g.:  utils/gen_topo.pl 3 5 4:5:6:7:8:9:10 1:2:3\n";
11 |   exit (1);
12 | }
13 | 
14 | ($num_nonsil_states, $num_sil_states, $nonsil_phones, $sil_phones) = @ARGV;
15 | 
16 | ( $num_nonsil_states >= 1 && $num_nonsil_states <= 100 ) ||
17 |   die "Unexpected number of nonsilence-model states $num_nonsil_states\n";
18 | (( $num_sil_states == 1 || $num_sil_states >= 3) && $num_sil_states <= 100 ) ||
19 |   die "Unexpected number of silence-model states $num_sil_states\n";
20 | 
21 | $nonsil_phones =~ s/:/ /g;
22 | $sil_phones =~ s/:/ /g;
23 | $nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n";
24 | $sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n";
25 | 
26 | print "<Topology>\n";
27 | print "<TopologyEntry>\n";
28 | print "<ForPhones>\n";
29 | print "$nonsil_phones\n";
30 | print "</ForPhones>\n";
31 | for ($state = 0; $state < $num_nonsil_states; $state++) {
32 |   $statep1 = $state+1;
33 |   print "<State> $state <PdfClass> $state <Transition> $state 0.75 <Transition> $statep1 0.25 </State>\n";
34 | }
35 | print "<State> $num_nonsil_states </State>\n"; # non-emitting final state.
36 | print "</TopologyEntry>\n";
37 | # Now silence phones.  They have a different topology-- apart from the first and
38 | # last states, it's fully connected, as long as you have >= 3 states.
39 | 
40 | if ($num_sil_states > 1) {
41 |   $transp = 1.0 / ($num_sil_states-1);
42 |   print "<TopologyEntry>\n";
43 |   print "<ForPhones>\n";
44 |   print "$sil_phones\n";
45 |   print "</ForPhones>\n";
46 |   print "<State> 0 <PdfClass> 0 ";
47 |   for ($nextstate = 0; $nextstate < $num_sil_states-1; $nextstate++) { # Transitions to all but last
48 |     # emitting state.
49 |     print "<Transition> $nextstate $transp ";
50 |   }
51 |   print "</State>\n";
52 |   for ($state = 1; $state < $num_sil_states-1; $state++) { # the central states all have transitions to
53 |     # themselves and to the last emitting state.
54 |     print "<State> $state <PdfClass> $state ";
55 |     for ($nextstate = 1; $nextstate < $num_sil_states; $nextstate++) {
56 |       print "<Transition> $nextstate $transp ";
57 |     }
58 |     print "</State>\n";
59 |   }
60 |   # Final emitting state (non-skippable).
61 |   $state = $num_sil_states-1;
62 |   print "<State> $state <PdfClass> $state <Transition> $state 0.75 <Transition> $num_sil_states 0.25 </State>\n";
63 |   # Final nonemitting state:
64 |   print "<State> $num_sil_states </State>\n";
65 |   print "</TopologyEntry>\n";
66 | } else {
67 |   print "<TopologyEntry>\n";
68 |   print "<ForPhones>\n";
69 |   print "$sil_phones\n";
70 |   print "</ForPhones>\n";
71 |   print "<State> 0 <PdfClass> 0 ";
72 |   print "<Transition> 0 0.75 ";
73 |   print "<Transition> 1 0.25 ";
74 |   print "</State>\n";
75 |   print "<State> $num_nonsil_states </State>\n"; # non-emitting final state.
76 |   print "</TopologyEntry>\n";
77 | }
78 | 
79 | print "</Topology>\n";
80 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/subset_scp.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | use warnings; #sed replacement for -w perl parameter
  3 | # Copyright 2010-2011 Microsoft Corporation
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #  http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
 15 | # See the Apache 2 License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | # This program selects a subset of N elements in the scp.
 19 | 
 20 | # By default, it selects them evenly from throughout the scp, in order to avoid
 21 | # selecting too many from the same speaker.  It prints them on the standard
 22 | # output.
 23 | # With the option --first, it just selects the N first utterances.
 24 | # With the option --last, it just selects the N last utterances.
 25 | 
 26 | # Last modified by JHU & HKUST @2013
 27 | 
 28 | 
 29 | $quiet = 0;
 30 | $first = 0;
 31 | $last = 0;
 32 | 
 33 | if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
 34 |   shift;
 35 |   $quiet = 1;
 36 | }
 37 | if (@ARGV > 0 && $ARGV[0] eq "--first") {
 38 |   shift;
 39 |   $first = 1;
 40 | }
 41 | if (@ARGV > 0 && $ARGV[0] eq "--last") {
 42 |   shift;
 43 |   $last = 1;
 44 | }
 45 | 
 46 | if(@ARGV < 2 ) {
 47 |     die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
 48 |         " --quiet  causes it to not die if N < num lines in scp.\n" .
 49 |         " --first and --last make it equivalent to head or tail.\n" .
 50 |         "See also: filter_scp.pl\n";
 51 | }
 52 | 
 53 | $N = shift @ARGV;
 54 | if($N == 0) {
 55 |     die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
 56 | }
 57 | $inscp = shift @ARGV;
 58 | open(I, "<$inscp") || die "Opening input scp file $inscp";
 59 | 
 60 | @F = ();
 61 | while(<I>) {
 62 |     push @F, $_;
 63 | }
 64 | $numlines = @F;
 65 | if($N > $numlines) {
 66 |   if ($quiet) {
 67 |     $N = $numlines;
 68 |   } else {
 69 |     die "You requested from subset_scp.pl more elements than available: $N > $numlines";
 70 |   }
 71 | }
 72 | 
 73 | sub select_n {
 74 |   my ($start,$end,$num_needed) = @_;
 75 |   my $diff = $end - $start;
 76 |   if ($num_needed > $diff) {
 77 |     die "select_n: code error";
 78 |   }
 79 |   if ($diff == 1 ) {
 80 |     if ($num_needed  > 0) {
 81 |       print $F[$start];
 82 |     }
 83 |   } else {
 84 |     my $halfdiff = int($diff/2);
 85 |     my $halfneeded = int($num_needed/2);
 86 |     select_n($start, $start+$halfdiff, $halfneeded);
 87 |     select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
 88 |   }
 89 | }
 90 | 
 91 | if ( ! $first && ! $last) {
 92 |   if ($N > 0) {
 93 |     select_n(0, $numlines, $N);
 94 |   }
 95 | } else {
 96 |   if ($first) { # --first option: same as head.
 97 |     for ($n = 0; $n < $N; $n++) {
 98 |       print $F[$n];
 99 |     }
100 |   } else { # --last option: same as tail.
101 |     for ($n = @F - $N; $n < @F; $n++) {
102 |       print $F[$n];
103 |     }
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/convert_ctm.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | 
 5 | # This takes as standard input a ctm file that's "relative to the utterance",
 6 | # i.e. times are measured relative to the beginning of the segments, and it
 7 | # uses a "segments" file (format:
 8 | # utterance-id recording-id start-time end-time
 9 | # ) and a "reco2file_and_channel" file (format:
10 | # recording-id basename-of-file
11 | 
12 | $skip_unknown=undef;
13 | if ( $ARGV[0] eq "--skip-unknown" ) {
14 |   $skip_unknown=1;
15 |   shift @ARGV;
16 | }
17 | 
18 | if (@ARGV < 2 || @ARGV > 3) {
19 |   print STDERR "Usage: convert_ctm.pl <segments-file> <reco2file_and_channel-file> [<utterance-ctm>] > real-ctm\n";
20 |   exit(1);
21 | }
22 | 
23 | $segments = shift @ARGV;
24 | $reco2file_and_channel = shift @ARGV;
25 | 
26 | open(S, "<$segments") || die "opening segments file $segments";
27 | while(<S>) {
28 |   @A = split(" ", $_);
29 |   @A == 4 || die "Bad line in segments file: $_";
30 |   ($utt, $recording_id, $begin_time, $end_time) = @A;
31 |   $utt2reco{$utt} = $recording_id;
32 |   $begin{$utt} = $begin_time;
33 |   $end{$utt} = $end_time;
34 | }
35 | close(S);
36 | open(R, "<$reco2file_and_channel") || die "open reco2file_and_channel file $reco2file_and_channel";
37 | while(<R>) {
38 |   @A = split(" ", $_);
39 |   @A == 3 || die "Bad line in reco2file_and_channel file: $_";
40 |   ($recording_id, $file, $channel) = @A;
41 |   $reco2file{$recording_id} = $file;
42 |   $reco2channel{$recording_id} = $channel;
43 | }
44 | 
45 | 
46 | # Now process the ctm file, which is either the standard input or the third
47 | # command-line argument.
48 | $num_done = 0;
49 | while(<>) {
50 |   @A= split(" ", $_);
51 |   ( @A == 5 || @A == 6 ) || die "Unexpected ctm format: $_";
52 |   # lines look like:
53 |   # <utterance-id> 1 <begin-time> <length> <word> [ confidence ]
54 |   ($utt, $one, $wbegin, $wlen, $w, $conf) = @A;
55 |   $reco = $utt2reco{$utt};
56 |   if (!defined $reco) { 
57 |       next if defined $skip_unknown;
58 |       die "Utterance-id $utt not defined in segments file $segments"; 
59 |   }
60 |   $file = $reco2file{$reco};
61 |   $channel = $reco2channel{$reco};
62 |   if (!defined $file || !defined $channel) { 
63 |     die "Recording-id $reco not defined in reco2file_and_channel file $reco2file_and_channel"; 
64 |   }
65 |   $b = $begin{$utt};
66 |   $e = $end{$utt};
67 |   $wbegin_r = $wbegin + $b; # Make it relative to beginning of the recording.
68 |   $wbegin_r = sprintf("%.2f", $wbegin_r);
69 |   $wlen = sprintf("%.2f", $wlen);
70 |   if (defined $conf) {
71 |     $line = "$file $channel $wbegin_r $wlen $w $conf\n"; 
72 |   } else {
73 |     $line = "$file $channel $wbegin_r $wlen $w\n"; 
74 |   }
75 |   if ($wbegin_r + $wlen > $e + 0.01) {
76 |     print STDERR "Warning: word appears to be past end of recording; line is $line";
77 |   }
78 |   print $line; # goes to stdout.
79 |   $num_done++;
80 | }
81 | 
82 | if ($num_done == 0) { exit 1; } else { exit 0; }
83 | 
84 | __END__
85 | 
86 | # Test example [also test it without the 0.5's]
87 | echo utt reco 10.0 20.0 > segments
88 | echo reco file A > reco2file_and_channel
89 | echo utt 1 8.0 1.0 word 0.5 > ctm_in
90 | echo file A 18.00 1.00 word 0.5 > ctm_out
91 | utils/convert_ctm.pl segments reco2file_and_channel ctm_in | cmp - ctm_out || echo error
92 | rm segments reco2file_and_channel ctm_in ctm_out
93 | 
94 | 
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/tune_hyperparameters.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | ##########################################################
 3 | # pytorch-kaldi v.0.1
 4 | # Mirco Ravanelli, Titouan Parcollet
 5 | # Mila, University of Montreal
 6 | # October 2018
 7 | #
 8 | # Description:
 9 | # This scripts generates config files with the random hyperparamters specified by the user.
10 | # python tune_hyperparameters.py cfg_file out_folder N_exp hyperparameters_spec
11 | # e.g., python tune_hyperparameters.py cfg/TIMIT_MLP_mfcc.cfg exp/TIMIT_MLP_mfcc_tuning 10 arch_lr=randfloat(0.001,0.01) batch_size_train=randint(32,256) dnn_act=choose_str{relu,relu,relu,relu,softmax|tanh,tanh,tanh,tanh,softmax}
12 | ##########################################################
13 | 
14 | 
15 | import random
16 | import re
17 | import os
18 | import sys
19 | from random import randint
20 | 
21 | if __name__ == "__main__":
22 |     cfg_file = sys.argv[1]
23 |     output_folder = sys.argv[2]
24 |     N_exp = int(sys.argv[3])
25 |     hyperparam_list = sys.argv[4:]
26 |     seed = 1234
27 | 
28 |     print("Generating config file for hyperparameter tuning...")
29 | 
30 |     if not os.path.exists(output_folder):
31 |         os.makedirs(output_folder)
32 | 
33 |     random.seed(seed)
34 | 
35 |     for i in range(N_exp):
36 | 
37 |         cfg_file_out = output_folder + "/exp" + str(i) + ".cfg"
38 | 
39 |         with open(cfg_file_out, "wt") as cfg_out, open(cfg_file, "rt") as cfg_in:
40 |             for line in cfg_in:
41 | 
42 |                 key = line.split("=")[0]
43 | 
44 |                 if key == "out_folder":
45 |                     line = "out_folder=" + output_folder + "/exp" + str(i) + "\n"
46 | 
47 |                 hyper_found = False
48 |                 for hyperparam in hyperparam_list:
49 | 
50 |                     key_hyper = hyperparam.split("=")[0]
51 | 
52 |                     if key == key_hyper:
53 | 
54 |                         if "randint" in hyperparam:
55 |                             lower, higher = re.search("randint\((.+?)\)", hyperparam).group(1).split(",")
56 |                             value_hyper = randint(int(lower), int(higher))
57 |                             hyper_found = True
58 | 
59 |                         if "randfloat" in hyperparam:
60 |                             lower, higher = re.search("randfloat\((.+?)\)", hyperparam).group(1).split(",")
61 |                             value_hyper = random.uniform(float(lower), float(higher))
62 |                             hyper_found = True
63 | 
64 |                         if "choose_str" in hyperparam:
65 |                             value_hyper = random.choice(re.search("\{(.+?)\}", hyperparam).group(1).split("|"))
66 |                             hyper_found = True
67 | 
68 |                         if "choose_int" in hyperparam:
69 |                             value_hyper = int(random.choice(re.search("\{(.+?)\}", hyperparam).group(1).split("|")))
70 |                             hyper_found = True
71 | 
72 |                         if "choose_float" in hyperparam:
73 |                             value_hyper = float(random.choice(re.search("\{(.+?)\}", hyperparam).group(1).split("|")))
74 |                             hyper_found = True
75 | 
76 |                         line_out = key + "=" + str(value_hyper) + "\n"
77 | 
78 |                 if not hyper_found:
79 |                     line_out = line
80 | 
81 |                 cfg_out.write(line_out)
82 | 
83 |             print("Done %s" % cfg_file_out)
84 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/rnnlm_compute_scores.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Compute scores from RNNLM.  This script takes a directory
 4 | # $dir (e.g. dir=local/rnnlm/rnnlm.voc30.hl30 ),
 5 | # where it expects the files:
 6 | #  rnnlm  wordlist.rnn  unk.probs,
 7 | # and also an input file location where it can get the sentences to score, and
 8 | # an output file location to put the scores (negated logprobs) for each
 9 | # sentence.  This script uses the Kaldi-style "archive" format, so the input and
10 | # output files will have a first field that corresponds to some kind of
11 | # utterance-id or, in practice, utterance-id-1, utterance-id-2, etc., for the
12 | # N-best list.
13 | #
14 | # Here, "wordlist.rnn" is the set of words, like a vocabulary,
15 | # that the RNN was trained on (note, it won't include <s> or </s>),
16 | # plus <RNN_UNK> which is a kind of class where we put low-frequency
17 | # words; unk.probs gives the probs for words given this class, and it
18 | # has, on each line, "word prob".
19 | 
20 | rnnlm_ver=rnnlm-0.3e
21 | 
22 | . ./path.sh || exit 1;
23 | . utils/parse_options.sh
24 | 
25 | rnnlm=$KALDI_ROOT/tools/$rnnlm_ver/rnnlm
26 | 
27 | [ ! -f $rnnlm ] && echo No such program $rnnlm && exit 1;
28 | 
29 | if [ $# != 4 ]; then
30 |   echo "Usage: rnnlm_compute_scores.sh <rnn-dir> <temp-dir> <input-text> <output-scores>"
31 |   exit 1;
32 | fi
33 | 
34 | dir=$1
35 | tempdir=$2
36 | text_in=$3
37 | scores_out=$4
38 | 
39 | for x in rnnlm wordlist.rnn unk.probs; do
40 |   if [ ! -f $dir/$x ]; then 
41 |     echo "rnnlm_compute_scores.sh: expected file $dir/$x to exist."
42 |     exit 1;
43 |   fi
44 | done
45 | 
46 | mkdir -p $tempdir
47 | cat $text_in | awk '{for (x=2;x<=NF;x++) {printf("%s ", $x)} printf("\n");}' >$tempdir/text
48 | cat $text_in | awk '{print $1}' > $tempdir/ids # e.g. utterance ids.
49 | cat $tempdir/text | awk -v voc=$dir/wordlist.rnn -v unk=$dir/unk.probs \
50 |   -v logprobs=$tempdir/loglikes.oov \
51 |  'BEGIN{ while((getline<voc)>0) { invoc[$1]=1; } while ((getline<unk)>0){ unkprob[$1]=$2;} }
52 |   { logprob=0;
53 |     if (NF==0) { printf "<RNN_UNK>"; logprob = log(1.0e-07);
54 |       print "Warning: empty sequence." | "cat 1>&2"; }
55 |     for (x=1;x<=NF;x++) { w=$x;  
56 |     if (invoc[w]) { printf("%s ",w); } else {
57 |       printf("<RNN_UNK> ");
58 |       if (unkprob[w] != 0) { logprob += log(unkprob[w]); }
59 |       else { print "Warning: unknown word ", w | "cat 1>&2"; logprob += log(1.0e-07); }}}
60 |     printf("\n"); print logprob > logprobs } ' > $tempdir/text.nounk
61 | 
62 | # OK, now we compute the scores on the text with OOVs replaced
63 | # with <RNN_UNK>
64 | 
65 | if [ $rnnlm_ver == "faster-rnnlm" ]; then
66 |   $rnnlm -independent -rnnlm $dir/rnnlm -test $tempdir/text.nounk -nbest -debug 0 | \
67 |      awk '{print $1*log(10);}' > $tempdir/loglikes.rnn
68 | else
69 |   # add the utterance_id as required by Mikolove's rnnlm
70 |   paste $tempdir/ids $tempdir/text.nounk > $tempdir/id_text.nounk
71 | 
72 |   $rnnlm -independent -rnnlm $dir/rnnlm -test $tempdir/id_text.nounk -nbest -debug 0 | \
73 |      awk '{print $1*log(10);}' > $tempdir/loglikes.rnn
74 | fi
75 | 
76 | [ `cat $tempdir/loglikes.rnn | wc -l` -ne `cat $tempdir/loglikes.oov | wc -l` ] && \
77 |   echo "rnnlm rescoring failed" && exit 1;
78 | 
79 | paste $tempdir/loglikes.rnn $tempdir/loglikes.oov | awk '{print -($1+$2);}' >$tempdir/scores
80 | 
81 | # scores out, with utterance-ids.
82 | paste $tempdir/ids $tempdir/scores  > $scores_out
83 | 
84 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/decode_dnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | 
  4 | # Copyright 2013    Yajie Miao    Carnegie Mellon University
  5 | # Apache 2.0
  6 | 
  7 | # Decode the DNN model. The [srcdir] in this script should be the same as dir in
  8 | # build_nnet_pfile.sh. Also, the DNN model has been trained and put in srcdir.
  9 | # All these steps will be done automatically if you run the recipe file run-dnn.sh
 10 | 
 11 | # Modified 2018 Mirco Ravanelli Univeristé de Montréal - Mila
 12 | 
 13 | 
 14 | cfg_file=$1
 15 | out_folder=$2
 16 | 
 17 | 
 18 | 
 19 | # Reading the options in the cfg file
 20 | source <(grep = $cfg_file | sed 's/ *= */=/g')
 21 | 
 22 | cd $decoding_script_folder
 23 | 
 24 | ./path.sh
 25 | ./cmd.sh
 26 | 
 27 | 
 28 | ## Begin configuration section
 29 | num_threads=1
 30 | stage=0
 31 | cmd=utils/run.pl
 32 | 
 33 | 
 34 | echo "$0 $@"  # Print the command line for logging
 35 | 
 36 | ./parse_options.sh || exit 1;
 37 | 
 38 | if [ $# != 3 ]; then
 39 |    echo "Wrong #arguments ($#, expected 5)"
 40 |    echo "Usage: steps/decode_dnn.sh [options] <graph-dir> <data-dir> <ali-dir> <decode-dir>"
 41 |    echo " e.g.: steps/decode_dnn.sh exp/tri4/graph data/test exp/tri4_ali exp/tri4_dnn/decode"
 42 |    echo "main options (for others, see top of script file)"
 43 |    echo "  --stage                                  # starts from which stage"
 44 |    echo "  --nj <nj>                                # number of parallel jobs"
 45 |    echo "  --cmd <cmd>                              # command to run in parallel with"
 46 |    echo "  --acwt <acoustic-weight>                 # default 0.1 ... used to get posteriors"
 47 |    echo "  --num-threads <n>                        # number of threads to use, default 4."
 48 |    echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
 49 |    echo "  --scoring-opts <opts>                    # options to local/score.sh"
 50 |    exit 1;
 51 | fi
 52 | 
 53 | 
 54 | 
 55 | dir=`echo $out_folder | sed 's:/$::g'` # remove any trailing slash.
 56 | featstring=$3
 57 | srcdir=`dirname $dir`; # assume model directory one level up from decoding directory.
 58 | sdata=$data/split$nj;
 59 | 
 60 | thread_string=
 61 | [ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
 62 | 
 63 | 
 64 | mkdir -p $dir/log
 65 | 
 66 | arr_ck=($(ls $featstring))
 67 | 
 68 | nj=${#arr_ck[@]}
 69 | 
 70 | echo $nj > $dir/num_jobs
 71 | 
 72 | # Some checks.  Note: we don't need $srcdir/tree but we expect
 73 | # it should exist, given the current structure of the scripts.
 74 | for f in $graphdir/HCLG.fst $data/feats.scp; do
 75 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
 76 | done
 77 | 
 78 | 
 79 | JOB=1
 80 | for ck_data in "${arr_ck[@]}"
 81 | do
 82 | 
 83 |     finalfeats="ark,s,cs: cat $ck_data |"
 84 |     latgen-faster-mapped$thread_string --min-active=$min_active --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$latbeam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt $alidir/final.mdl $graphdir/HCLG.fst "$finalfeats" "ark:|gzip -c > $dir/lat.$JOB.gz" &> $dir/log/decode.$JOB.log &
 85 |     JOB=$((JOB+1))
 86 | done
 87 | wait
 88 | 
 89 | 
 90 | 
 91 | # Copy the source model in order for scoring
 92 | cp $alidir/final.mdl $srcdir
 93 |   
 94 | 
 95 | if ! $skip_scoring ; then
 96 |   [ ! -x $scoring_script ] && \
 97 |     echo "$0: not scoring because local/score.sh does not exist or not executable." && exit 1;
 98 |   $scoring_script $scoring_opts $data $graphdir $dir
 99 | fi
100 | 
101 | exit 0;
102 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/perturb_data_dir_speed.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash 
 2 | 
 3 | # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 4 | #           2014  Tom Ko
 5 | # Apache 2.0
 6 | 
 7 | # This script operates on a directory, such as in data/train/,
 8 | # that contains some subset of the following files:
 9 | #  wav.scp
10 | #  spk2utt
11 | #  utt2spk
12 | #  text
13 | #
14 | # It generates the files which are used for perturbing the speed of the original data.
15 | 
16 | . utils/parse_options.sh
17 | 
18 | if [ $# != 3 ]; then
19 |   echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
20 |   echo "e.g.:"
21 |   echo " $0 0.9 data/train_si284 data/train_si284p"
22 |   exit 1
23 | fi
24 | 
25 | export LC_ALL=C
26 | 
27 | factor=$1
28 | srcdir=$2
29 | destdir=$3
30 | label="sp"
31 | spk_prefix=$label$factor"-"
32 | utt_prefix=$label$factor"-"
33 | 
34 | #check is sox on the path
35 | which sox &>/dev/null
36 | ! [ $? -eq 0 ] && echo "sox: command not found" && exit 1;
37 | 
38 | if [ ! -f $srcdir/utt2spk ]; then
39 |   echo "$0: no such file $srcdir/utt2spk" 
40 |   exit 1;
41 | fi
42 | 
43 | set -e;
44 | set -o pipefail
45 | 
46 | mkdir -p $destdir
47 | 
48 | cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/utt_map
49 | cat $srcdir/spk2utt | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/spk_map
50 | cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s%s %s\n", p, $1, $1);}' > $destdir/utt2uniq
51 | 
52 | cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
53 |   utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
54 | 
55 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
56 | 
57 | if [ -f $srcdir/segments ]; then
58 |   # also apply the spk_prefix to the recording-ids.
59 |   cat $srcdir/wav.scp | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/reco_map
60 | 
61 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments | \
62 |     utils/apply_map.pl -f 2 $destdir/reco_map | \
63 |       awk -v factor=$factor \
64 |         '{printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);}' >$destdir/segments
65 | 
66 |   utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \
67 |     awk -v factor=$factor \
68 |         '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"} 
69 |           else {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp
70 |   if [ -f $srcdir/reco2file_and_channel ]; then
71 |     utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
72 |   fi
73 |   
74 |   rm $destdir/reco_map 2>/dev/null
75 | else # no segments->wav indexed by utterance.
76 |   if [ -f $srcdir/wav.scp ]; then
77 |     utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \
78 |      awk -v factor=$factor \
79 |        '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"} 
80 |          else {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp
81 |   fi
82 | fi
83 | 
84 | if [ -f $srcdir/text ]; then
85 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
86 | fi
87 | if [ -f $srcdir/spk2gender ]; then
88 |   utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
89 | fi
90 | 
91 | 
92 | rm $destdir/spk_map $destdir/utt_map 2>/dev/null
93 | echo "$0: generated speed-perturbed version of data in $srcdir, in $destdir"
94 | utils/validate_data_dir.sh --no-feats $destdir
95 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/local/timit_prepare_dict.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2013   (Authors: Daniel Povey, Bagher BabaAli)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # Call this script from one level above, e.g. from the s3/ directory.  It puts
19 | # its output in data/local/.
20 | 
21 | # The parts of the output of this that will be needed are
22 | # [in data/local/dict/ ]
23 | # lexicon.txt
24 | # extra_questions.txt
25 | # nonsilence_phones.txt
26 | # optional_silence.txt
27 | # silence_phones.txt
28 | 
29 | # run this from ../
30 | srcdir=data/local/data
31 | dir=data/local/dict
32 | lmdir=data/local/nist_lm
33 | tmpdir=data/local/lm_tmp
34 | 
35 | mkdir -p $dir $lmdir $tmpdir
36 | 
37 | [ -f path.sh ] && . ./path.sh
38 | 
39 | #(1) Dictionary preparation:
40 | 
41 | # Make phones symbol-table (adding in silence and verbal and non-verbal noises at this point).
42 | # We are adding suffixes _B, _E, _S for beginning, ending, and singleton phones.
43 | 
44 | # silence phones, one per line.
45 | echo sil > $dir/silence_phones.txt
46 | echo sil > $dir/optional_silence.txt
47 | 
48 | # nonsilence phones; on each line is a list of phones that correspond
49 | # really to the same base phone.
50 | 
51 | # Create the lexicon, which is just an identity mapping
52 | cut -d' ' -f2- $srcdir/train.text | tr ' ' '\n' | sort -u > $dir/phones.txt
53 | paste $dir/phones.txt $dir/phones.txt > $dir/lexicon.txt || exit 1;
54 | grep -v -F -f $dir/silence_phones.txt $dir/phones.txt > $dir/nonsilence_phones.txt 
55 | 
56 | # A few extra questions that will be added to those obtained by automatically clustering
57 | # the "real" phones.  These ask about stress; there's also one for silence.
58 | cat $dir/silence_phones.txt| awk '{printf("%s ", $1);} END{printf "\n";}' > $dir/extra_questions.txt || exit 1;
59 | cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_)) {
60 |   $p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \
61 |  >> $dir/extra_questions.txt || exit 1;
62 | 
63 | # (2) Create the phone bigram LM
64 | if [ -z $IRSTLM ] ; then
65 |   export IRSTLM=$KALDI_ROOT/tools/irstlm/
66 | fi
67 | export PATH=${PATH}:$IRSTLM/bin
68 | if ! command -v prune-lm >/dev/null 2>&1 ; then
69 |   echo "$0: Error: the IRSTLM is not available or compiled" >&2
70 |   echo "$0: Error: We used to install it by default, but." >&2
71 |   echo "$0: Error: this is no longer the case." >&2
72 |   echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
73 |   echo "$0: Error: and run extras/install_irstlm.sh" >&2
74 |   exit 1
75 | fi
76 | 
77 | cut -d' ' -f2- $srcdir/train.text | sed -e 's:^:<s> :' -e 's:$: </s>:' \
78 |   > $srcdir/lm_train.text
79 | 
80 | build-lm.sh -i $srcdir/lm_train.text -n 2 \
81 |   -o $tmpdir/lm_phone_bg.ilm.gz
82 | 
83 | compile-lm $tmpdir/lm_phone_bg.ilm.gz -t=yes /dev/stdout | \
84 | grep -v unk | gzip -c > $lmdir/lm_phone_bg.arpa.gz 
85 | 
86 | echo "Dictionary & language model preparation succeeded"
87 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/summarize_logs.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
  4 | 
  5 | #scalar(@ARGV) >= 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;
  6 | 
  7 | sub split_hundreds { # split list of filenames into groups of 100.
  8 |   my $names = shift @_;
  9 |   my @A = split(" ", $names);
 10 |   my @ans = ();
 11 |   while (@A > 0) {
 12 |     my $group = "";
 13 |     for ($x = 0; $x < 100 && @A>0; $x++) {
 14 |       $fname = pop @A;
 15 |       $group .= "$fname ";
 16 |     }
 17 |     push @ans, $group;
 18 |   }
 19 |   return @ans;
 20 | }
 21 | 
 22 | sub parse_accounting_entry {
 23 |   $entry= shift @_;
 24 | 
 25 |   @elems = split " ", $entry;
 26 |   
 27 |   $time=undef;
 28 |   $threads=undef;
 29 |   foreach $elem (@elems) {
 30 |     if ( $elem=~ m/time=(\d+)/ ) {
 31 |       $elem =~ s/time=(\d+)/$1/;
 32 |       $time = $elem;
 33 |     } elsif ( $elem=~ m/threads=(\d+)/ ) {
 34 |       $elem =~ s/threads=(\d+)/$1/g;
 35 |       $threads = $elem;
 36 |     } else {
 37 |       die "Unknown entry \"$elem\" when parsing \"$entry\" \n";
 38 |     }
 39 |   }
 40 | 
 41 |   if (defined($time) and defined($threads) ) {
 42 |     return ($time, $threads);
 43 |   } else {
 44 |     die "The accounting entry \"$entry\" did not contain all necessary attributes";
 45 |   }
 46 | }
 47 | 
 48 | foreach $dir (@ARGV) {
 49 | 
 50 |   #$dir = $ARGV[0];
 51 |   print $dir
 52 | 
 53 |   ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" ;
 54 | 
 55 |   $dir =~ s:/$::; # Remove trailing slash.
 56 | 
 57 | 
 58 |   # Group the files into categories where all have the same base-name.
 59 |   foreach $f (glob ("$dir/*.log")) {
 60 |     $f_category = $f;
 61 |     # do next expression twice; s///g doesn't work as they overlap.
 62 |     $f_category =~ s:\.\d+\.(?!\d+):.*.:;
 63 |     #$f_category =~ s:\.\d+\.:.*.:;
 64 |     $fmap{$f_category} .= " $f";
 65 |   }
 66 | }
 67 | 
 68 | foreach $c (sort (keys %fmap) ) {
 69 |   $n = 0;
 70 |   foreach $fgroup (split_hundreds($fmap{$c})) {
 71 |     $n += `grep -w WARNING $fgroup | wc -l`;
 72 |   }
 73 |   if ($n != 0) {
 74 |     print "$n warnings in $c\n"
 75 |   }
 76 | }
 77 | foreach $c (sort (keys %fmap)) {
 78 |   $n = 0;
 79 |   foreach $fgroup (split_hundreds($fmap{$c})) {
 80 |     $n += `grep -w ERROR $fgroup | wc -l`;
 81 |   }
 82 |   if ($n != 0) {
 83 |     print "$n errors in $c\n"
 84 |   }
 85 | }
 86 | 
 87 | $supertotal_cpu_time=0.0;
 88 | $supertotal_clock_time=0.0;
 89 | $supertotal_threads=0.0;
 90 | 
 91 | foreach $c (sort (keys %fmap)) {
 92 |   $n = 0;
 93 | 
 94 |   $total_cpu_time=0.0;
 95 |   $total_clock_time=0.0;
 96 |   $total_threads=0.0;
 97 |   foreach $fgroup (split_hundreds($fmap{$c})) {
 98 |     $lines=`grep -a "# Accounting: " $fgroup |sed 's/.* Accounting: *//g'`;
 99 |     
100 |     #print $lines ."\n";
101 | 
102 |     @entries = split "\n", $lines;
103 | 
104 |     foreach $line (@entries) {
105 |       $time, $threads = parse_accounting_entry($line);
106 | 
107 |       $total_cpu_time += $time * $threads;
108 |       $total_threads += $threads;
109 |       if ( $time > $total_clock_time ) {
110 |         $total_clock_time = $time;
111 |       }
112 |     }
113 |   }
114 |   print "total_cpu_time=$total_cpu_time clock_time=$total_clock_time total_threads=$total_threads group=$c\n";
115 | 
116 |   $supertotal_cpu_time += $total_cpu_time;
117 |   $supertotal_clock_time += $total_clock_time;
118 |   $supertotal_threads += $total_threads;
119 | }
120 | print "total_cpu_time=$supertotal_cpu_time clock_time=$supertotal_clock_time total_threads=$supertotal_threads group=all\n";
121 | 
122 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/local/timit_norm_trans.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | # Copyright 2012  Arnab Ghoshal
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | # This script normalizes the TIMIT phonetic transcripts that have been 
20 | # extracted in a format where each line contains an utterance ID followed by 
21 | # the transcript, e.g.:
22 | # fcke0_si1111 h# hh ah dx ux w iy dcl d ix f ay n ih q h#
23 | 
24 | my $usage = "Usage: timit_norm_trans.pl -i transcript -m phone_map -from [60|48] -to [48|39] > normalized\n
25 | Normalizes phonetic transcriptions for TIMIT, by mapping the phones to a 
26 | smaller set defined by the -m option. This script assumes that the mapping is 
27 | done in the \"standard\" fashion, i.e. to 48 or 39 phones.  The input is 
28 | assumed to have 60 phones (+1 for glottal stop, which is deleted), but that can
29 | be changed using the -from option. The input format is assumed to be utterance 
30 | ID followed by transcript on the same line.\n";
31 | 
32 | use strict;
33 | use Getopt::Long;
34 | die "$usage" unless(@ARGV >= 1);
35 | my ($in_trans, $phone_map, $num_phones_out);
36 | my $num_phones_in = 60;
37 | GetOptions ("i=s" => \$in_trans,          # Input transcription
38 | 	    "m=s" => \$phone_map,         # File containing phone mappings
39 | 	    "from=i" => \$num_phones_in,  # Input #phones: must be 60 or 48
40 | 	    "to=i" => \$num_phones_out ); # Output #phones: must be 48 or 39
41 | 
42 | die $usage unless(defined($in_trans) && defined($phone_map) && 
43 | 		  defined($num_phones_out));
44 | if ($num_phones_in != 60 && $num_phones_in != 48) {
45 |   die "Can only used 60 or 48 for -from (used $num_phones_in)."
46 | }
47 | if ($num_phones_out != 48 && $num_phones_out != 39) {
48 |   die "Can only used 48 or 39 for -to (used $num_phones_out)."
49 | }
50 | unless ($num_phones_out < $num_phones_in) {
51 |   die "Argument to -from ($num_phones_in) must be greater than that to -to ($num_phones_out)."
52 | }
53 | 
54 | 
55 | open(M, "<$phone_map") or die "Cannot open mappings file '$phone_map': $!";
56 | my (%phonemap, %seen_phones);
57 | my $num_seen_phones = 0;
58 | while (<M>) {
59 |   chomp;
60 |   next if ($_ =~ /^q\s*.*$/); # Ignore glottal stops.
61 |   m:^(\S+)\s+(\S+)\s+(\S+)$: or die "Bad line: $_";
62 |   my $mapped_from = ($num_phones_in == 60)? $1 : $2;
63 |   my $mapped_to = ($num_phones_out == 48)? $2 : $3;
64 |   if (!defined($seen_phones{$mapped_to})) {
65 |     $seen_phones{$mapped_to} = 1;
66 |     $num_seen_phones += 1;
67 |   }
68 |   $phonemap{$mapped_from} = $mapped_to;
69 | }
70 | if ($num_seen_phones != $num_phones_out) {
71 |   die "Trying to map to $num_phones_out phones, but seen only $num_seen_phones";
72 | }
73 | 
74 | open(T, "<$in_trans") or die "Cannot open transcription file '$in_trans': $!";
75 | while (<T>) {
76 |   chomp;
77 |   $_ =~ m:^(\S+)\s+(.+): or die "Bad line: $_";
78 |   my $utt_id = $1;
79 |   my $trans = $2;
80 | 
81 |   $trans =~ s/q//g;  # Remove glottal stops.
82 |   $trans =~ s/^\s*//; $trans =~ s/\s*$//;  # Normalize spaces
83 | 
84 |   print $utt_id;
85 |   for my $phone (split(/\s+/, $trans)) {
86 |     if(exists $phonemap{$phone}) { print " $phonemap{$phone}"; }
87 |     if(not exists $phonemap{$phone}) { print " $phone"; }
88 |   }
89 |   print "\n";
90 | }
91 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/local/score_wsj.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
  3 | # Apache 2.0
  4 | 
  5 | [ -f ./path.sh ] && . ./path.sh
  6 | 
  7 | # begin configuration section.
  8 | cmd=run.pl
  9 | stage=0
 10 | decode_mbr=true
 11 | reverse=false
 12 | word_ins_penalty=0.0
 13 | min_lmwt=5
 14 | max_lmwt=20
 15 | #end configuration section.
 16 | 
 17 | [ -f ./path.sh ] && . ./path.sh
 18 | . parse_options.sh || exit 1;
 19 | 
 20 | if [ $# -ne 3 ]; then
 21 |   echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
 22 |   echo " Options:"
 23 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
 24 |   echo "    --stage (0|1|2)                 # start scoring script from part-way through."
 25 |   echo "    --decode_mbr (true/false)       # maximum bayes risk decoding (confusion network)."
 26 |   echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
 27 |   echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
 28 |   echo "    --reverse (true/false)          # score with time reversed features "
 29 |   exit 1;
 30 | fi
 31 | 
 32 | hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl 
 33 | [ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1;
 34 | hubdir=`dirname $hubscr`
 35 | 
 36 | data=$1
 37 | lang_or_graph=$2
 38 | dir=$3
 39 | 
 40 | symtab=$lang_or_graph/words.txt
 41 | 
 42 | hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl 
 43 | [ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1;
 44 | hubdir=`dirname $hubscr`
 45 | 
 46 | 
 47 | for f in $symtab $dir/lat.1.gz $data/text; do
 48 |   [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
 49 | done
 50 | 
 51 | mkdir -p $dir/scoring/log
 52 | 
 53 | cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
 54 | 
 55 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \
 56 |   lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
 57 |   lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \
 58 |   lattice-best-path --word-symbol-table=$symtab \
 59 |     ark:- ark,t:$dir/scoring/LMWT.tra || exit 1;
 60 | 
 61 | if $reverse; then
 62 |   for lmwt in `seq $min_lmwt $max_lmwt`; do
 63 |     mv $dir/scoring/$lmwt.tra $dir/scoring/$lmwt.tra.orig
 64 |     awk '{ printf("%s ",$1); for(i=NF; i>1; i--){ printf("%s ",$i); } printf("\n"); }' \
 65 |        <$dir/scoring/$lmwt.tra.orig >$dir/scoring/$lmwt.tra
 66 |   done
 67 | fi
 68 | 
 69 | # Note: the double level of quoting for the sed command
 70 | #$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
 71 | #   cat $dir/scoring/LMWT.tra \| \
 72 | #    utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
 73 | #    compute-wer --text --mode=present \
 74 | #     ark:$dir/scoring/test_filt.txt  ark,p:- ">&" $dir/wer_LMWT || exit 1;
 75 | 
 76 | 
 77 | # glm file
 78 | echo ";; empty.glm" > $dir/scoring/glm
 79 | echo "  [FAKE]     =>  %HESITATION     / [ ] __ [ ] ;; hesitation token" >> $dir/scoring/glm
 80 | echo "" >> $dir/scoring/glm
 81 | 
 82 | 
 83 | # Creare scoring folders
 84 | for lmwt in `seq $min_lmwt $max_lmwt`; do
 85 |  mkdir -p $dir/score_$lmwt/
 86 | done
 87 | 
 88 | 
 89 | # ctm file (for sclite)
 90 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
 91 |    cat $dir/scoring/LMWT.tra \| \
 92 |     utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' "|" awk '{for (i = 2; i <= NF; i++) {printf "%s 1 0.000 0.000 %s\n",$1,$i}}' "|" \
 93 |     tr -d . ">&" $dir/score_LMWT/ctm || exit 1
 94 | 
 95 | 
 96 | # Score the set...
 97 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
 98 |    $hubscr -p $hubdir -V -l english -h hub5 -g $dir/scoring/glm -r $data/stm $dir/score_LMWT/ctm || exit 1;
 99 | 
100 | 
101 | exit 0;
102 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/sym2int.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
  3 | 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #  http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
 14 | # See the Apache 2 License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | 
 18 | $ignore_oov = 0;
 19 | 
 20 | for($x = 0; $x < 2; $x++) {
 21 |   if ($ARGV[0] eq "--map-oov") {
 22 |     shift @ARGV; 
 23 |     $map_oov = shift @ARGV;
 24 |     if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
 25 |       # disallow '-f', the empty string and anything ending in words.txt as the
 26 |       # OOV symbol because these are likely command-line errors.
 27 |       die "the --map-oov option requires an argument";
 28 |     }
 29 |   }
 30 |   if ($ARGV[0] eq "-f") {
 31 |     shift @ARGV;
 32 |     $field_spec = shift @ARGV; 
 33 |     if ($field_spec =~ m/^\d+$/) {
 34 |       $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
 35 |     }
 36 |     if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
 37 |       if ($1 ne "") {
 38 |         $field_begin = $1 - 1;  # Change to zero-based indexing.
 39 |       }
 40 |       if ($2 ne "") {
 41 |         $field_end = $2 - 1;    # Change to zero-based indexing.
 42 |       }
 43 |     }
 44 |     if (!defined $field_begin && !defined $field_end) {
 45 |       die "Bad argument to -f option: $field_spec"; 
 46 |     }
 47 |   }
 48 | }
 49 | 
 50 | $symtab = shift @ARGV;
 51 | if (!defined $symtab) {
 52 |   print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
 53 |     "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
 54 |       "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
 55 | }
 56 | open(F, "<$symtab") || die "Error opening symbol table file $symtab";
 57 | while(<F>) {
 58 |     @A = split(" ", $_);
 59 |     @A == 2 || die "bad line in symbol table file: $_";
 60 |     $sym2int{$A[0]} = $A[1] + 0;
 61 | }
 62 | 
 63 | if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
 64 |   if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
 65 |   $map_oov = $sym2int{$map_oov};
 66 | }
 67 | 
 68 | $num_warning = 0;
 69 | $max_warning = 20;
 70 | 
 71 | while (<>) {
 72 |   @A = split(" ", $_);
 73 |   @B = ();
 74 |   for ($n = 0; $n < @A; $n++) {
 75 |     $a = $A[$n];
 76 |     if ( (!defined $field_begin || $n >= $field_begin)
 77 |          && (!defined $field_end || $n <= $field_end)) {
 78 |       $i = $sym2int{$a};
 79 |       if (!defined ($i)) {
 80 |         if (defined $map_oov) {
 81 |           if ($num_warning++ < $max_warning) {
 82 |             print STDERR "sym2int.pl: replacing $a with $map_oov\n";
 83 |             if ($num_warning == $max_warning) {
 84 |               print STDERR "sym2int.pl: not warning for OOVs any more times\n";
 85 |             }
 86 |           }
 87 |           $i = $map_oov;
 88 |         } else {
 89 |           $pos = $n+1;
 90 |           die "sym2int.pl: undefined symbol $a (in position $pos)\n";
 91 |         }
 92 |       }
 93 |       $a = $i;
 94 |     }
 95 |     push @B, $a;
 96 |   }
 97 |   print join(" ", @B);
 98 |   print "\n";
 99 | }
100 | if ($num_warning > 0) {
101 |   print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n"; 
102 | }
103 | 
104 | exit(0);
105 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/copy_data_dir.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
  4 | # Apache 2.0
  5 | 
  6 | # This script operates on a directory, such as in data/train/,
  7 | # that contains some subset of the following files:
  8 | #  feats.scp
  9 | #  wav.scp
 10 | #  spk2utt
 11 | #  utt2spk
 12 | #  text
 13 | #
 14 | # It copies to another directory, possibly adding a specified prefix or a suffix
 15 | # to the utterance and/or speaker names.  Note, the recording-ids stay the same.
 16 | #
 17 | 
 18 | 
 19 | # begin configuration section
 20 | spk_prefix=
 21 | utt_prefix=
 22 | spk_suffix=
 23 | utt_suffix=
 24 | validate_opts=   # should rarely be needed.
 25 | # end configuration section
 26 | 
 27 | . utils/parse_options.sh
 28 | 
 29 | if [ $# != 2 ]; then
 30 |   echo "Usage: "
 31 |   echo "  $0 [options] <srcdir> <destdir>"
 32 |   echo "e.g.:"
 33 |   echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
 34 |   echo "Options"
 35 |   echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
 36 |   echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
 37 |   echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
 38 |   echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
 39 |   exit 1;
 40 | fi
 41 | 
 42 | 
 43 | export LC_ALL=C
 44 | 
 45 | srcdir=$1
 46 | destdir=$2
 47 | 
 48 | if [ ! -f $srcdir/utt2spk ]; then
 49 |   echo "copy_data_dir.sh: no such file $srcdir/utt2spk" 
 50 |   exit 1;
 51 | fi
 52 | 
 53 | set -e;
 54 | 
 55 | mkdir -p $destdir
 56 | 
 57 | cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
 58 | cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
 59 | 
 60 | if [ ! -f $srcdir/utt2uniq ]; then
 61 |   if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
 62 |     cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
 63 |   fi
 64 | else
 65 |   cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
 66 | fi
 67 | 
 68 | cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
 69 |   utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
 70 | 
 71 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
 72 | 
 73 | if [ -f $srcdir/feats.scp ]; then
 74 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
 75 | fi
 76 | 
 77 | 
 78 | if [ -f $srcdir/segments ]; then
 79 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
 80 |   cp $srcdir/wav.scp $destdir
 81 |   if [ -f $srcdir/reco2file_and_channel ]; then
 82 |     cp $srcdir/reco2file_and_channel $destdir/
 83 |   fi
 84 | else # no segments->wav indexed by utt.
 85 |   if [ -f $srcdir/wav.scp ]; then 
 86 |     utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
 87 |   fi
 88 | fi
 89 | 
 90 | if [ -f $srcdir/text ]; then
 91 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
 92 | fi
 93 | if [ -f $srcdir/spk2gender ]; then
 94 |   utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
 95 | fi
 96 | if [ -f $srcdir/cmvn.scp ]; then
 97 |   utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
 98 | fi
 99 | for f in stm glm ctm; do
100 |   if [ -f $srcdir/$f ]; then
101 |     cp $srcdir/$f $destdir
102 |   fi
103 | done
104 | 
105 | rm $destdir/spk_map $destdir/utt_map
106 | 
107 | echo "$0: copied data from $srcdir to $destdir"
108 | 
109 | [ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
110 | [ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
111 | 
112 | utils/validate_data_dir.sh $validate_opts $destdir
113 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/parse_options.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
 4 | #                 Arnab Ghoshal, Karel Vesely
 5 | 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #  http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
16 | # See the Apache 2 License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | 
20 | # Parse command-line options.
21 | # To be sourced by another script (as in ". parse_options.sh").
22 | # Option format is: --option-name arg
23 | # and shell variable "option_name" gets set to value "arg."
24 | # The exception is --help, which takes no arguments, but prints the 
25 | # $help_message variable (if defined).
26 | 
27 | 
28 | ###
29 | ### The --config file options have lower priority to command line 
30 | ### options, so we need to import them first...
31 | ###
32 | 
33 | # Now import all the configs specified by command-line, in left-to-right order
34 | for ((argpos=1; argpos<$#; argpos++)); do
35 |   if [ "${!argpos}" == "--config" ]; then
36 |     argpos_plus1=$((argpos+1))
37 |     config=${!argpos_plus1}
38 |     [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
39 |     . $config  # source the config file.
40 |   fi
41 | done
42 | 
43 | 
44 | ###
45 | ### No we process the command line options
46 | ###
47 | while true; do
48 |   [ -z "${1:-}" ] && break;  # break if there are no arguments
49 |   case "$1" in
50 |     # If the enclosing script is called with --help option, print the help 
51 |     # message and exit.  Scripts should put help messages in $help_message
52 |   --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
53 | 	  else printf "$help_message\n" 1>&2 ; fi; 
54 | 	  exit 0 ;; 
55 |   --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
56 |        exit 1 ;;
57 |     # If the first command-line argument begins with "--" (e.g. --foo-bar), 
58 |     # then work out the variable name as $name, which will equal "foo_bar".
59 |   --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; 
60 |     # Next we test whether the variable in question is undefned-- if so it's 
61 |     # an invalid option and we die.  Note: $0 evaluates to the name of the 
62 |     # enclosing script.
63 |     # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
64 |     # is undefined.  We then have to wrap this test inside "eval" because 
65 |     # foo_bar is itself inside a variable ($name).
66 |       eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
67 |       
68 |       oldval="`eval echo \\$$name`";
69 |     # Work out whether we seem to be expecting a Boolean argument.
70 |       if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then 
71 | 	was_bool=true;
72 |       else 
73 | 	was_bool=false;
74 |       fi
75 | 
76 |     # Set the variable to the right value-- the escaped quotes make it work if
77 |     # the option had spaces, like --cmd "queue.pl -sync y"
78 |       eval $name=\"$2\"; 
79 |         
80 |     # Check that Boolean-valued arguments are really Boolean.
81 |       if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
82 |         echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
83 |         exit 1;
84 |       fi
85 |       shift 2;
86 |       ;;
87 |   *) break;
88 |   esac
89 | done
90 | 
91 | 
92 | # Check for an empty argument to the --cmd option, which can easily occur as a 
93 | # result of scripting errors.
94 | [ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
95 | 
96 | 
97 | true; # so this script returns exit code 0.
98 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/parse_options.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
 4 | #                 Arnab Ghoshal, Karel Vesely
 5 | 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #  http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
16 | # See the Apache 2 License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | 
20 | # Parse command-line options.
21 | # To be sourced by another script (as in ". parse_options.sh").
22 | # Option format is: --option-name arg
23 | # and shell variable "option_name" gets set to value "arg."
24 | # The exception is --help, which takes no arguments, but prints the 
25 | # $help_message variable (if defined).
26 | 
27 | 
28 | ###
29 | ### The --config file options have lower priority to command line 
30 | ### options, so we need to import them first...
31 | ###
32 | 
33 | # Now import all the configs specified by command-line, in left-to-right order
34 | for ((argpos=1; argpos<$#; argpos++)); do
35 |   if [ "${!argpos}" == "--config" ]; then
36 |     argpos_plus1=$((argpos+1))
37 |     config=${!argpos_plus1}
38 |     [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
39 |     . $config  # source the config file.
40 |   fi
41 | done
42 | 
43 | 
44 | ###
45 | ### No we process the command line options
46 | ###
47 | while true; do
48 |   [ -z "${1:-}" ] && break;  # break if there are no arguments
49 |   case "$1" in
50 |     # If the enclosing script is called with --help option, print the help 
51 |     # message and exit.  Scripts should put help messages in $help_message
52 |   --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
53 | 	  else printf "$help_message\n" 1>&2 ; fi; 
54 | 	  exit 0 ;; 
55 |   --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
56 |        exit 1 ;;
57 |     # If the first command-line argument begins with "--" (e.g. --foo-bar), 
58 |     # then work out the variable name as $name, which will equal "foo_bar".
59 |   --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; 
60 |     # Next we test whether the variable in question is undefned-- if so it's 
61 |     # an invalid option and we die.  Note: $0 evaluates to the name of the 
62 |     # enclosing script.
63 |     # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
64 |     # is undefined.  We then have to wrap this test inside "eval" because 
65 |     # foo_bar is itself inside a variable ($name).
66 |       eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
67 |       
68 |       oldval="`eval echo \\$$name`";
69 |     # Work out whether we seem to be expecting a Boolean argument.
70 |       if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then 
71 | 	was_bool=true;
72 |       else 
73 | 	was_bool=false;
74 |       fi
75 | 
76 |     # Set the variable to the right value-- the escaped quotes make it work if
77 |     # the option had spaces, like --cmd "queue.pl -sync y"
78 |       eval $name=\"$2\"; 
79 |         
80 |     # Check that Boolean-valued arguments are really Boolean.
81 |       if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
82 |         echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
83 |         exit 1;
84 |       fi
85 |       shift 2;
86 |       ;;
87 |   *) break;
88 |   esac
89 | done
90 | 
91 | 
92 | # Check for an empty argument to the --cmd option, which can easily occur as a 
93 | # result of scripting errors.
94 | [ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
95 | 
96 | 
97 | true; # so this script returns exit code 0.
98 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/reverse_lm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2012  Brno University of Technology (Author: Mirko Hannemann)
 4 | # JHU (Author: Dan Povey)
 5 | # Apache 2.0
 6 | 
 7 | # configuration section
 8 | tmpdir=data/local/lm_tmp  # only for OOVs and checks
 9 | lexicon=data/local/lang_tmp.reverse/lexicon.txt # only for checks
10 | # end config section
11 | 
12 | mkdir -p $tmpdir
13 | 
14 | echo "$0 $@"  # Print the command line for logging
15 | 
16 | [ -f ./path.sh ] && . ./path.sh; # source the path.
17 | . parse_options.sh || exit 1;
18 | 
19 | if [ $# != 3 ]; then
20 |    echo "Usage: utils/reverse_lm.sh [options] <arpa-gz-file> <lang-dir> <out-dir>"
21 |    echo "e.g.: utils/reverse_lm.sh data/local/nist_lm/lm_tgpr_5k.arpa.gz data/lang.reverse data/lang_test_tgpr_5k.reverse"
22 |    echo "... where files from <lang-dir> are copied into <out-dir>"
23 |    echo "options:"
24 |    echo " --lexicon <lexicon-file>   reversed lexicon (only for checks)"
25 |    exit 1;
26 | fi
27 | 
28 | lm=$1 # gzipped arpa file
29 | langdir=$2
30 | outdir=$3 # output directory
31 | 
32 | # create the corresponding FST for the language model
33 | # and the corresponding lang_test_* directory.
34 | 
35 | echo Preparing reverse language model from $lm into $outdir
36 | echo "Finding OOVs and strange silences"
37 | mkdir -p $outdir
38 | for f in phones.txt words.txt L.fst L_disambig.fst phones/; do
39 |   cp -r $langdir/$f $outdir
40 | done
41 | gunzip -c $lm | utils/find_arpa_oovs.pl $outdir/words.txt  > $tmpdir/oovs.txt
42 | 
43 | # grep -v '<s> <s>' because the LM seems to have some strange and useless
44 | # stuff in it with multiple <s>'s in the history.  Encountered some other similar
45 | # things in a LM from Geoff.  Removing all "illegal" combinations of <s> and </s>,
46 | # which are supposed to occur only at being/end of utt.  These can cause 
47 | # determinization failures of CLG [ends up being epsilon cycles].
48 | gunzip -c $lm | \
49 |   grep -v '<s> <s>' | \
50 |   grep -v '</s> <s>' | \
51 |   grep -v '</s> </s>' > $outdir/forward.arpa
52 | echo "Mapping ARPA to reverse ARPA"
53 | python utils/reverse_arpa.py $outdir/forward.arpa > $outdir/reverse.arpa
54 | arpa2fst $outdir/reverse.arpa | fstprint | \
55 |   grep -v "230258.5" | \
56 |   utils/remove_oovs.pl $tmpdir/oovs.txt | \
57 |   utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$outdir/words.txt \
58 |     --osymbols=$outdir/words.txt  --keep_isymbols=false --keep_osymbols=false \
59 |     | fstrmepsilon > $outdir/G_org.fst
60 | #--arc_type=log
61 | 
62 | echo "Push weights to make it stochastic (log semi-ring)"
63 | # delta must be very small otherwise weight pushing won't succeed
64 | #fstpush --push_weights=true --push_labels=true --delta=1E-7 $outdir/G_log.fst >$outdir/G_log_pushed.fst
65 | fstpushspecial --delta=1E-5 $outdir/G_org.fst |\
66 |   fstarcsort --sort_type=ilabel >$outdir/G.fst
67 | 
68 | fstisstochastic $outdir/G.fst
69 | # The output is like:
70 | # 9.14233e-05 -0.259833
71 | # we do expect the first of these 2 numbers to be close to zero (the second is
72 | # nonzero because the backoff weights make the states sum to >1).
73 | # Because of the <s> fiasco for these particular LMs, the first number is not
74 | # as close to zero as it could be.
75 | 
76 | # Everything below is only for diagnostic.
77 | # Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
78 | # this might cause determinization failure of CLG.
79 | # #0 is treated as an empty word.
80 | 
81 | if [ -f $lexicon ]; then
82 |   mkdir -p $tmpdir/g
83 |   awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \
84 |     < "$lexicon"  >$tmpdir/g/select_empty.fst.txt
85 |   fstcompile --isymbols=$outdir/words.txt --osymbols=$outdir/words.txt $tmpdir/g/select_empty.fst.txt | \
86 |     fstarcsort --sort_type=olabel | fstcompose - $outdir/G.fst > $tmpdir/g/empty_words.fst
87 |   fstinfo $tmpdir/g/empty_words.fst | grep cyclic | grep -w 'y' && 
88 |   echo "Language model has cycles with empty words" && exit 1
89 |   rm -r $tmpdir/g
90 | fi
91 | echo "Succeeded in creating reversed language model."
92 | rm -r $tmpdir
93 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/nnet/make_lstm_proto.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright 2015  Brno University of Technology (author: Karel Vesely)
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #  http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
 15 | # See the Apache 2 License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | # Generated Nnet prototype, to be initialized by 'nnet-initialize'.
 19 | from __future__ import print_function
 20 | 
 21 | import sys
 22 | 
 23 | ###
 24 | ### Parse options
 25 | ###
 26 | from optparse import OptionParser
 27 | 
 28 | usage = "%prog [options] <feat-dim> <num-leaves> >nnet-proto-file"
 29 | parser = OptionParser(usage)
 30 | #
 31 | parser.add_option(
 32 |     "--num-cells", dest="num_cells", type="int", default=800, help="Number of LSTM cells [default: %default]"
 33 | )
 34 | parser.add_option(
 35 |     "--num-recurrent",
 36 |     dest="num_recurrent",
 37 |     type="int",
 38 |     default=512,
 39 |     help="Number of LSTM recurrent units [default: %default]",
 40 | )
 41 | parser.add_option(
 42 |     "--num-layers", dest="num_layers", type="int", default=2, help="Number of LSTM layers [default: %default]"
 43 | )
 44 | parser.add_option(
 45 |     "--lstm-stddev-factor",
 46 |     dest="lstm_stddev_factor",
 47 |     type="float",
 48 |     default=0.01,
 49 |     help="Standard deviation of initialization [default: %default]",
 50 | )
 51 | parser.add_option(
 52 |     "--param-stddev-factor",
 53 |     dest="param_stddev_factor",
 54 |     type="float",
 55 |     default=0.04,
 56 |     help="Standard deviation in output layer [default: %default]",
 57 | )
 58 | parser.add_option(
 59 |     "--clip-gradient",
 60 |     dest="clip_gradient",
 61 |     type="float",
 62 |     default=5.0,
 63 |     help="Clipping constant applied to gradients [default: %default]",
 64 | )
 65 | #
 66 | (o, args) = parser.parse_args()
 67 | if len(args) != 2:
 68 |     parser.print_help()
 69 |     sys.exit(1)
 70 | 
 71 | (feat_dim, num_leaves) = list(map(int, args))
 72 | 
 73 | # Original prototype from Jiayu,
 74 | # <NnetProto>
 75 | # <Transmit> <InputDim> 40 <OutputDim> 40
 76 | # <LstmProjectedStreams> <InputDim> 40 <OutputDim> 512 <CellDim> 800 <ParamScale> 0.01 <NumStream> 4
 77 | # <AffineTransform> <InputDim> 512 <OutputDim> 8000 <BiasMean> 0.000000 <BiasRange> 0.000000 <ParamStddev> 0.04
 78 | # <Softmax> <InputDim> 8000 <OutputDim> 8000
 79 | # </NnetProto>
 80 | 
 81 | print("<NnetProto>")
 82 | # normally we won't use more than 2 layers of LSTM
 83 | if o.num_layers == 1:
 84 |     print(
 85 |         "<LstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f"
 86 |         % (feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
 87 |     )
 88 | elif o.num_layers == 2:
 89 |     print(
 90 |         "<LstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f"
 91 |         % (feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
 92 |     )
 93 |     print(
 94 |         "<LstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f"
 95 |         % (o.num_recurrent, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
 96 |     )
 97 | else:
 98 |     sys.stderr.write("make_lstm_proto.py ERROR: more than 2 layers of LSTM, not supported yet.\n")
 99 |     sys.exit(1)
100 | print(
101 |     "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0 <ParamStddev> %f"
102 |     % (o.num_recurrent, num_leaves, o.param_stddev_factor)
103 | )
104 | print("<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves))
105 | print("</NnetProto>")
106 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/subset_data_dir_tr_cv.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2013  Hong Kong University of Science and Technology (Author: Ricky Chan Ho Yin);
  3 | #                 Brno University of Technology (Author: Karel Vesely);
  4 | #                 Johns Hopkins University (Author: Daniel Povey);
  5 | # Apache 2.0
  6 | 
  7 | # This script splits dataset to two parts : 
  8 | # training set from (100-P)% of speakers/utterances and 
  9 | # held-out set (or cross-validation) from P% of remaining speakers/remaining utterances,
 10 | # which will be later on used for neural network training
 11 | #
 12 | # There are two options for choosing held-out (or cross-validation) set, either by
 13 | # --cv-spk-percent P , which will give you CV set based on random chosen P% of speakers, or
 14 | # --cv-utt-percent P , which will give you CV set based on last P% utterances in the dataset
 15 | # 
 16 | # If you don't apply the above two options, by default the script will use --cv-utt-percent option,
 17 | # and the default cross validation percentage portion is equal to 10% (i.e. P=10)
 18 | #
 19 | # The --cv-spk-percent option is useful if you would like to have subset chosen from random speakers order, 
 20 | # especially for the cases where dataset contains multiple different corpora,
 21 | # where type of speakers or recording channels may be quite different 
 22 | 
 23 | # Begin configuration.
 24 | cv_spk_percent= # % of speakers is parsed by option
 25 | cv_utt_percent=10 # default 10% of total utterances 
 26 | seed=777 # use seed for speaker shuffling
 27 | # End configuration.
 28 | 
 29 | echo "$0 $@"  # Print the command line for logging
 30 | 
 31 | uttbase=true; # by default, we choose last 10% utterances for CV
 32 | 
 33 | if [ "$1" == "--cv-spk-percent" ]; then
 34 |   uttbase=false;
 35 |   spkbase=true;
 36 | fi
 37 | 
 38 | [ -f path.sh ] && . ./path.sh; 
 39 | 
 40 | . parse_options.sh || exit 1;
 41 | 
 42 | if [ $# != 3 ]; then
 43 |   echo "Usage: $0 [--cv-spk-percent P|--cv-utt-percent P] <srcdir> <traindir> <crossvaldir>"
 44 |   echo "  --cv-spk-percent P  Cross Validation portion of the total speakers, recommend value is 10% (i.e. P=10)"
 45 |   echo "  --cv-utt-percent P  Cross Validation portion of the total utterances, default is 10% (i.e. P=10)"
 46 |   echo "  "
 47 |   exit 1;
 48 | fi
 49 | 
 50 | srcdir=$1
 51 | trndir=$2
 52 | cvdir=$3
 53 | 
 54 | ## use simple last P% utterance for CV
 55 | if $uttbase; then
 56 |   if [ ! -f $srcdir/utt2spk ]; then
 57 |     echo "$0: no such file $srcdir/utt2spk"
 58 |     exit 1;
 59 |   fi
 60 | 
 61 |   #total number of lines
 62 |   N=$(cat $srcdir/utt2spk | wc -l)
 63 |   #get line number where (100-P)% of the data lies
 64 |   P_utt=$((N * cv_utt_percent / 100))
 65 |   N_head=$((N -P_utt))
 66 |   #move the boundary so it is located on speaker change
 67 |   N_head=$(cat $srcdir/utt2spk | uniq -f1 -c | awk '{ if(n+$1<='$N_head') { n += $1 } else { nextfile } } END{ print n }')
 68 |   #the rest of the data will be that big
 69 |   N_tail=$((N-N_head))
 70 | 
 71 |   #now call the subset_data_dir.sh and fix the directories
 72 |   subset_data_dir.sh --first $srcdir $N_head $trndir
 73 |   subset_data_dir.sh --last $srcdir $N_tail $cvdir
 74 | 
 75 |   exit 0;
 76 | fi
 77 | 
 78 | ## use random chosen P% speakers for CV
 79 | if [ ! -f $srcdir/spk2utt ]; then
 80 |   echo "$0: no such file $srcdir/spk2utt" 
 81 |   exit 1;
 82 | fi
 83 | 
 84 | #total, cv, train number of speakers
 85 | N=$(cat $srcdir/spk2utt | wc -l)
 86 | N_spk_cv=$((N * cv_spk_percent / 100))
 87 | N_spk_trn=$((N - N_spk_cv))
 88 | 
 89 | mkdir -p $cvdir $trndir
 90 | 
 91 | #shuffle the speaker list
 92 | awk '{print $1}' $srcdir/spk2utt | shuffle_list.pl --srand $seed > $trndir/_tmpf_randspk
 93 | 
 94 | #split the train/cv
 95 | head -n $N_spk_cv $trndir/_tmpf_randspk > $cvdir/_tmpf_cvspk
 96 | tail -n $N_spk_trn $trndir/_tmpf_randspk > $trndir/_tmpf_trainspk
 97 | 
 98 | #now call the subset_data_dir.sh 
 99 | subset_data_dir.sh --spk-list $trndir/_tmpf_trainspk $srcdir $trndir
100 | subset_data_dir.sh --spk-list $cvdir/_tmpf_cvspk $srcdir $cvdir
101 | 
102 | #clean-up
103 | rm -f $trndir/_tmpf_randspk $trndir/_tmpf_trainspk $cvdir/_tmpf_cvspk
104 | 
105 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/map_arpa_lm.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | # Copyright 2014  Guoguo Chen
  4 | #           2014  Johns Hopkins University (author: Daniel Povey)
  5 | # Apache 2.0.
  6 | #
  7 | use strict;
  8 | use warnings;
  9 | use Getopt::Long;
 10 | 
 11 | my $Usage = <<EOU;
 12 | This script reads the Arpa format language model, and maps the words into
 13 | integers or vice versa. It ignores the words that are not in the symbol table,
 14 | and updates the head information.
 15 | 
 16 | It will be used joinly with lmbin/arpa-to-const-arpa to build ConstArpaLm format
 17 | language model. We first map the words in an Arpa format language model to
 18 | integers, and then use lmbin/arpa-to-const-arpa to build a ConstArpaLm format
 19 | language model.
 20 | 
 21 | Usage: utils/map_arpa_lm.pl [options] <vocab-file> < input-arpa >output-arpa
 22 |  e.g.: utils/map_arpa_lm.pl words.txt <arpa_lm.txt >arpa_lm.int
 23 | 
 24 | Allowed options:
 25 |   --sym2int   : If true, maps words to integers, other wise maps integers to
 26 |                 words. (boolean, default = true)
 27 | 
 28 | EOU
 29 | 
 30 | my $sym2int = "true";
 31 | GetOptions('sym2int=s' => \$sym2int);
 32 | 
 33 | ($sym2int eq "true" || $sym2int eq "false") ||
 34 |   die "$0: Bad value for option --sym2int\n";
 35 | 
 36 | if (@ARGV != 1) {
 37 |   die $Usage;
 38 | }
 39 | 
 40 | # Gets parameters.
 41 | my $symtab = shift @ARGV;
 42 | my $arpa_in = shift @ARGV;
 43 | my $arpa_out = shift @ARGV;
 44 | 
 45 | # Opens files.
 46 | open(M, "<$symtab") || die "$0: Fail to open $symtab\n";
 47 | 
 48 | # Reads in the mapper.
 49 | my %mapper;
 50 | while (<M>) {
 51 |   chomp;
 52 |   my @col = split(/[\s]+/, $_);
 53 |   @col == 2 || die "$0: Bad line in mapper file \"$_\"\n";
 54 |   if ($sym2int eq "true") {
 55 |     if (defined($mapper{$col[0]})) {
 56 |       die "$0: Duplicate entry \"$col[0]\"\n";
 57 |     }
 58 |     $mapper{$col[0]} = $col[1];
 59 |   } else {
 60 |     if (defined($mapper{$col[1]})) {
 61 |       die "$0: Duplicate entry \"$col[1]\"\n";
 62 |     }
 63 |     $mapper{$col[1]} = $col[0];
 64 |   }
 65 | }
 66 | 
 67 | my $num_oov_lines = 0;
 68 | my $max_oov_warn = 20;
 69 | 
 70 | # Parses Arpa n-gram language model.
 71 | my $arpa = "";
 72 | my $current_order = -1;
 73 | my %head_ngram_count;
 74 | my %actual_ngram_count;
 75 | while (<STDIN>) {
 76 |   chomp;
 77 |   my @col = split(" ", $_);
 78 | 
 79 |   if ($current_order == -1 and ! m/^\\data\\$/) {
 80 |     next;
 81 |   }
 82 | 
 83 |   if (m/^\\data\\$/) {
 84 |     print STDERR "$0: Processing \"\\data\\\"\n";
 85 |     print "$_\n";
 86 |     $current_order = 0;
 87 |   } elsif (m/^\\[0-9]*-grams:$/) {
 88 |     $current_order = $_;
 89 |     $current_order =~ s/-grams:$//g;
 90 |     $current_order =~ s/^\\//g;
 91 |     print "$_\n";
 92 |     print STDERR "$0: Processing \"\\$current_order-grams:\\\"\n";
 93 |   } elsif (m/^\\end\\/) {
 94 |     print "$_\n";
 95 |   } elsif ($_ eq "") {
 96 |     if ($current_order >= 1) {
 97 |       print "\n";
 98 |     }
 99 |   } else {
100 |     if ($current_order == 0) {
101 |       # echo head section.
102 |       print "$_\n";
103 |     } else {
104 |       # Parses n-gram section.
105 |       if (@col > 2 + $current_order || @col < 1 + $current_order) {
106 |         die "$0: Bad line in arpa lm \"$_\"\n";
107 |       }
108 |       my $prob = shift @col;
109 |       my $is_oov = 0;
110 |       for (my $i = 0; $i < $current_order; $i++) {
111 |         my $temp = $mapper{$col[$i]};
112 |         if (!defined($temp)) {
113 |           $is_oov = 1;
114 |           $num_oov_lines++;
115 |           last;
116 |         } else {
117 |           $col[$i] = $temp;
118 |         }
119 |       }
120 |       if (!$is_oov) {
121 |         my $rest_of_line = join(" ", @col);
122 |         print "$prob\t$rest_of_line\n";
123 |       } else {
124 |         if ($num_oov_lines < $max_oov_warn) {
125 |           print STDERR "$0: Warning: OOV line $_\n";
126 |         }
127 |       }
128 |     }
129 |   }
130 | }
131 | 
132 | if ($num_oov_lines > 0) {
133 |   print STDERR "$0: $num_oov_lines lines of the Arpa file contained OOVs and ";
134 |   print STDERR "were not printed.\n";
135 | }
136 | 
137 | close(M);
138 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/nnet/make_blstm_proto.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright 2015  Brno University of Technology (author: Karel Vesely)
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #  http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
 15 | # See the Apache 2 License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | # Generated Nnet prototype, to be initialized by 'nnet-initialize'.
 19 | from __future__ import print_function
 20 | 
 21 | import sys
 22 | 
 23 | ###
 24 | ### Parse options
 25 | ###
 26 | from optparse import OptionParser
 27 | 
 28 | usage = "%prog [options] <feat-dim> <num-leaves> >nnet-proto-file"
 29 | parser = OptionParser(usage)
 30 | #
 31 | parser.add_option(
 32 |     "--num-cells", dest="num_cells", type="int", default=800, help="Number of LSTM cells [default: %default]"
 33 | )
 34 | parser.add_option(
 35 |     "--num-recurrent",
 36 |     dest="num_recurrent",
 37 |     type="int",
 38 |     default=512,
 39 |     help="Number of LSTM recurrent units [default: %default]",
 40 | )
 41 | parser.add_option(
 42 |     "--num-layers", dest="num_layers", type="int", default=2, help="Number of LSTM layers [default: %default]"
 43 | )
 44 | parser.add_option(
 45 |     "--lstm-stddev-factor",
 46 |     dest="lstm_stddev_factor",
 47 |     type="float",
 48 |     default=0.01,
 49 |     help="Standard deviation of initialization [default: %default]",
 50 | )
 51 | parser.add_option(
 52 |     "--param-stddev-factor",
 53 |     dest="param_stddev_factor",
 54 |     type="float",
 55 |     default=0.04,
 56 |     help="Standard deviation in output layer [default: %default]",
 57 | )
 58 | parser.add_option(
 59 |     "--clip-gradient",
 60 |     dest="clip_gradient",
 61 |     type="float",
 62 |     default=5.0,
 63 |     help="Clipping constant applied to gradients [default: %default]",
 64 | )
 65 | #
 66 | (o, args) = parser.parse_args()
 67 | if len(args) != 2:
 68 |     parser.print_help()
 69 |     sys.exit(1)
 70 | 
 71 | (feat_dim, num_leaves) = list(map(int, args))
 72 | 
 73 | # Original prototype from Jiayu,
 74 | # <NnetProto>
 75 | # <Transmit> <InputDim> 40 <OutputDim> 40
 76 | # <LstmProjectedStreams> <InputDim> 40 <OutputDim> 512 <CellDim> 800 <ParamScale> 0.01 <NumStream> 4
 77 | # <AffineTransform> <InputDim> 512 <OutputDim> 8000 <BiasMean> 0.000000 <BiasRange> 0.000000 <ParamStddev> 0.04
 78 | # <Softmax> <InputDim> 8000 <OutputDim> 8000
 79 | # </NnetProto>
 80 | 
 81 | print("<NnetProto>")
 82 | # normally we won't use more than 2 layers of LSTM
 83 | if o.num_layers == 1:
 84 |     print(
 85 |         "<BLstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f"
 86 |         % (feat_dim, 2 * o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
 87 |     )
 88 | elif o.num_layers == 2:
 89 |     print(
 90 |         "<BLstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f"
 91 |         % (feat_dim, 2 * o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
 92 |     )
 93 |     print(
 94 |         "<BLstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f"
 95 |         % (2 * o.num_recurrent, 2 * o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
 96 |     )
 97 | else:
 98 |     sys.stderr.write("make_lstm_proto.py ERROR: more than 2 layers of LSTM, not supported yet.\n")
 99 |     sys.exit(1)
100 | print(
101 |     "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0 <ParamStddev> %f"
102 |     % (2 * o.num_recurrent, num_leaves, o.param_stddev_factor)
103 | )
104 | print("<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves))
105 | print("</NnetProto>")
106 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/pinyin_map.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use warnings; #sed replacement for -w perl parameter
 3 | 
 4 | $num_args = $#ARGV + 1;
 5 | if ($num_args != 1) {
 6 |   print "\nUsage: pinyin2phone.pl pinyin2phone\n";
 7 |   exit;
 8 | }
 9 | 
10 | open(MAPS, $ARGV[0]) or die("Could not open pinyin map file.");
11 | my %py2ph; foreach $line (<MAPS>) { @A = split(" ", $line);
12 |   $py = shift(@A);
13 |   $py2ph{$py} = [@A]; 
14 | }
15 | 
16 | #foreach $word ( keys %py2ph ) {
17 |      #foreach $i ( 0 .. $#{ $py2ph{$word} } ) {
18 |      #    print " $word = $py2ph{$word}[$i]";
19 |      #}
20 |      #print " $#{ $py2ph{$word} }";
21 |      #print "\n";
22 | #}
23 | 
24 | my @entry;
25 | 
26 | while (<STDIN>) {
27 |   @A = split(" ", $_);
28 |   @entry = (); 
29 |   $W = shift(@A);
30 |   push(@entry, $W);
31 |   for($i = 0; $i < @A; $i++) {
32 |     $initial= $A[$i]; $final = $A[$i];
33 |     #print $initial, " ", $final, "\n";
34 |     if ($A[$i] =~ /^CH[A-Z0-9]+$/) {$initial =~ s:(CH)[A-Z0-9]+:$1:; $final =~ s:CH([A-Z0-9]+):$1:;}
35 |     elsif ($A[$i] =~ /^SH[A-Z0-9]+$/) {$initial =~ s:(SH)[A-Z0-9]+:$1:; $final =~ s:SH([A-Z0-9]+):$1:;} 
36 |     elsif ($A[$i] =~ /^ZH[A-Z0-9]+$/) {$initial =~ s:(ZH)[A-Z0-9]+:$1:; $final =~ s:ZH([A-Z0-9]+):$1:;}
37 |     elsif ($A[$i] =~ /^B[A-Z0-9]+$/) {$initial =~ s:(B)[A-Z0-9]+:$1:; $final =~ s:B([A-Z0-9]+):$1:;}
38 |     elsif ($A[$i] =~ /^C[A-Z0-9]+$/) {$initial =~ s:(C)[A-Z0-9]+:$1:; $final =~ s:C([A-Z0-9]+):$1:;}
39 |     elsif ($A[$i] =~ /^D[A-Z0-9]+$/) {$initial =~ s:(D)[A-Z0-9]+:$1:; $final =~ s:D([A-Z0-9]+):$1:;}
40 |     elsif ($A[$i] =~ /^F[A-Z0-9]+$/) {$initial =~ s:(F)[A-Z0-9]+:$1:; $final =~ s:F([A-Z0-9]+):$1:;}
41 |     elsif ($A[$i] =~ /^G[A-Z0-9]+$/) {$initial =~ s:(G)[A-Z0-9]+:$1:; $final =~ s:G([A-Z0-9]+):$1:;}
42 |     elsif ($A[$i] =~ /^H[A-Z0-9]+$/) {$initial =~ s:(H)[A-Z0-9]+:$1:; $final =~ s:H([A-Z0-9]+):$1:;}
43 |     elsif ($A[$i] =~ /^J[A-Z0-9]+$/) {$initial =~ s:(J)[A-Z0-9]+:$1:; $final =~ s:J([A-Z0-9]+):$1:;}
44 |     elsif ($A[$i] =~ /^K[A-Z0-9]+$/) {$initial =~ s:(K)[A-Z0-9]+:$1:; $final =~ s:K([A-Z0-9]+):$1:;}
45 |     elsif ($A[$i] =~ /^L[A-Z0-9]+$/) {$initial =~ s:(L)[A-Z0-9]+:$1:; $final =~ s:L([A-Z0-9]+):$1:;}
46 |     elsif ($A[$i] =~ /^M[A-Z0-9]+$/) {$initial =~ s:(M)[A-Z0-9]+:$1:; $final =~ s:M([A-Z0-9]+):$1:;}
47 |     elsif ($A[$i] =~ /^N[A-Z0-9]+$/) {$initial =~ s:(N)[A-Z0-9]+:$1:; $final =~ s:N([A-Z0-9]+):$1:;}
48 |     elsif ($A[$i] =~ /^P[A-Z0-9]+$/) {$initial =~ s:(P)[A-Z0-9]+:$1:; $final =~ s:P([A-Z0-9]+):$1:;}
49 |     elsif ($A[$i] =~ /^Q[A-Z0-9]+$/) {$initial =~ s:(Q)[A-Z0-9]+:$1:; $final =~ s:Q([A-Z0-9]+):$1:;}
50 |     elsif ($A[$i] =~ /^R[A-Z0-9]+$/) {$initial =~ s:(R)[A-Z0-9]+:$1:; $final =~ s:R([A-Z0-9]+):$1:;}
51 |     elsif ($A[$i] =~ /^S[A-Z0-9]+$/) {$initial =~ s:(S)[A-Z0-9]+:$1:; $final =~ s:S([A-Z0-9]+):$1:;}
52 |     elsif ($A[$i] =~ /^T[A-Z0-9]+$/) {$initial =~ s:(T)[A-Z0-9]+:$1:; $final =~ s:T([A-Z0-9]+):$1:;}
53 |     elsif ($A[$i] =~ /^W[A-Z0-9]+$/) {$initial =~ s:(W)[A-Z0-9]+:$1:; $final =~ s:W([A-Z0-9]+):$1:;}
54 |     elsif ($A[$i] =~ /^X[A-Z0-9]+$/) {$initial =~ s:(X)[A-Z0-9]+:$1:; $final =~ s:X([A-Z0-9]+):$1:;}
55 |     elsif ($A[$i] =~ /^Y[A-Z0-9]+$/) {$initial =~ s:(Y)[A-Z0-9]+:$1:; $final =~ s:Y([A-Z0-9]+):$1:;}
56 |     elsif ($A[$i] =~ /^Z[A-Z0-9]+$/) {$initial =~ s:(Z)[A-Z0-9]+:$1:; $final =~ s:Z([A-Z0-9]+):$1:;}
57 |     if ($initial ne $A[$i]) {
58 |       $tone = $final;
59 |       $final =~ s:([A-Z]+)[0-9]:$1:;
60 |       $tone =~ s:[A-Z]+([0-9]):$1:;
61 |       if (!(exists $py2ph{$initial}) or !(exists $py2ph{$final})) { print "1: no entry find for ", $A[$i], " ", $initial, " ", $final;  exit;}
62 |       push(@entry, @{$py2ph{$initial}}); 
63 |       @tmp = @{$py2ph{$final}};
64 |       for($j = 0; $j < @tmp ; $j++) {$tmp[$j] = $tmp[$j].$tone;}
65 |       push(@entry, @tmp); 
66 |     }
67 |     else {
68 |       $tone = $A[$i];
69 |       $A[$i] =~ s:([A-Z]+)[0-9]:$1:;   
70 |       $tone =~ s:[A-Z]+([0-9]):$1:;
71 |       if (!(exists $py2ph{$A[$i]})) { print "2: no entry find for ", $A[$i];  exit;}
72 |       @tmp = @{$py2ph{$A[$i]}};
73 |       for($j = 0; $j < @tmp ; $j++) {$tmp[$j] = $tmp[$j].$tone;}
74 |       push(@entry, @tmp); 
75 |     }
76 |   } 
77 |   print "@entry";
78 |   print "\n";
79 | }
80 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/filter_scps.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | # Copyright 2010-2012 Microsoft Corporation
  3 | #                     Johns Hopkins University (author: Daniel Povey)
  4 | #           2015      Xiaohui Zhang
  5 | 
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #  http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
 16 | # See the Apache 2 License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | 
 19 | 
 20 | # This script takes multiple lists of utterance-ids or any file whose first field
 21 | # of each line is an utterance-id, as filters, and filters an scp
 22 | # file (or any file whose "n-th" field is an utterance id), printing
 23 | # out only those lines whose "n-th" field is in filter. The index of
 24 | # the "n-th" field is 1, by default, but can be changed by using
 25 | # the -f <n> switch
 26 | 
 27 | 
 28 | if(@ARGV != 4) {
 29 |   die "Usage: utils/filter_scps.pl  <job-range-specifier> <filter-pattern> <input-scp> <output-scp-pattern>\n" .
 30 |        "e.g.:  utils/filter_scps.pl [-f <field-to-filter-on>] JOB=1:10 data/train/split10/JOB/spk2utt data/train/feats.scp data/train/split10/JOB/feats.scp\n" .
 31 |        "similar to utils/filter_scp.pl, but it uses multiple filters and output multiple filtered files.\n".
 32 |        "The -f option specifies the field in <input-scp> that we filter on (default: 1)." .
 33 |        "See also: utils/filter_scp.pl\n";
 34 | }
 35 | 
 36 | if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) { # e.g. JOB=1:10
 37 |   $jobname = $1;
 38 |   $jobstart = $2;
 39 |   $jobend = $3;
 40 |   shift;
 41 |   if ($jobstart > $jobend) {
 42 |     die "filter_scps.pl: invalid job range $ARGV[0]";
 43 |   }
 44 | } else {
 45 |   die "filter_scps.pl: bad job-range specifier $ARGV[0]: expected e.g. JOB=1:10";
 46 | }
 47 | 
 48 | $field = 1;
 49 | $shifted = 0;
 50 | do {
 51 |   $shifted=0;
 52 |   if ($ARGV[0] eq "-f") {
 53 |     $field = $ARGV[1];
 54 |     shift @ARGV; shift @ARGV;
 55 |     $shifted=1
 56 |   }
 57 | } while ($shifted);
 58 | 
 59 | $idlist = shift @ARGV;
 60 | 
 61 | if (defined $jobname && $idlist !~ m/$jobname/ &&
 62 |     $jobend > $jobstart) {
 63 |   print STDERR "filter_scps.pl: you are trying to use multiple filter files as filter patterns but "
 64 |     . "you are providing just one filter file ($idlist)\n";
 65 |   exit(1);
 66 | }
 67 | 
 68 | 
 69 | $infile = shift @ARGV;
 70 | open (F, "< $infile") or die "Can't open $infile for read: $!";
 71 | my @inlines;
 72 | @inlines = <F>;
 73 | close(F);
 74 | 
 75 | $outfile = shift @ARGV;
 76 | 
 77 | if (defined $jobname && $outfile !~ m/$jobname/ &&
 78 |     $jobend > $jobstart) {
 79 |   print STDERR "filter_scps.pl: you are trying to create multiple filtered files but "
 80 |     . "you are providing just one output file ($outfile)\n";
 81 |   exit(1);
 82 | }
 83 | 
 84 | for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
 85 |   $outfile_n = $outfile;
 86 |   $idlist_n = $idlist;
 87 |   if (defined $jobname) { 
 88 |     $idlist_n =~ s/$jobname/$jobid/g;
 89 |     $outfile_n =~ s/$jobname/$jobid/g;
 90 |   }
 91 | 
 92 |   open(F, "<$idlist_n") || die "Could not open id-list file $idlist_n";
 93 |   my %seen;
 94 |   while(<F>) {
 95 |     @A = split;
 96 |     @A>=1 || die "Invalid line $_ in id-list file $idlist_n";
 97 |     $seen{$A[0]} = 1;
 98 |   }
 99 |   close(F);
100 |   open(FW, ">$outfile_n") || die "Could not open output file $outfile_n";
101 |   foreach (@inlines) {
102 |     if ($field == 1) { # Treat this as special case, since it is common.
103 |       $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
104 |       # $1 is what we filter on.
105 |       if ($seen{$1}) {
106 |         print FW $_;
107 |       }
108 |     } else {
109 |       @A = split;
110 |       @A > 0 || die "Invalid scp file line $_";
111 |       @A >= $field || die "Invalid scp file line $_";
112 |       if ($seen{$A[$field-1]}) {
113 |         print FW $_;
114 |       }
115 |     }
116 |   }
117 |   close(FW);
118 | }
119 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/create_data_link.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | # Copyright 2013  Guoguo Chen
  4 | #           2014  Johns Hopkins University (author: Daniel Povey)
  5 | # Apache 2.0.
  6 | #
  7 | # This script distributes data onto different file systems by making symbolic
  8 | # links. It is supposed to use together with utils/create_split_dir.pl, which
  9 | # creates a "storage" directory that links to different file systems.
 10 | #
 11 | # If a sub-directory egs/storage does not exist, it does nothing. If it exists,
 12 | # then it selects pseudo-randomly a number from those available in egs/storage/*
 13 | # creates a link such as
 14 | #
 15 | #   egs/egs.3.4.ark -> storage/4/egs.3.4.ark
 16 | #
 17 | use strict;
 18 | use warnings;
 19 | use File::Basename;
 20 | use File::Spec;
 21 | use Getopt::Long;
 22 | 
 23 | sub GetGCD {
 24 |   my ($a, $b) = @_;
 25 |   while ($a != $b) {
 26 |     if ($a > $b) {
 27 |       $a = $a - $b;
 28 |     } else {
 29 |       $b = $b - $a;
 30 |     }
 31 |   }
 32 |   return $a;
 33 | }
 34 | 
 35 | my $Usage = <<EOU;
 36 | This script distributes data onto different file systems by making symbolic
 37 | links. It is supposed to use together with utils/create_split_dir.pl, which
 38 | creates a "storage" directory that links to different file systems.
 39 | 
 40 | If a sub-directory foo/storage does not exist, it does nothing. If it exists,
 41 | then it selects pseudo-randomly a number from those available in foo/storage/*
 42 | creates a link such as
 43 | 
 44 |   foo/egs.3.4.ark -> storage/4/egs.3.4.ark
 45 | 
 46 | Usage: utils/create_data_link.pl <data-archive1> [<data-archive2> ... ]
 47 |  e.g.: utils/create_data_link.pl foo/bar/egs.3.4.ark foo/bar/egs.3.5.ark
 48 |  (note: the dirname, e.g. foo/bar/, must be the same in all cases).
 49 | 
 50 | See also utils/remove_data_links.sh
 51 | EOU
 52 | 
 53 | GetOptions();
 54 | 
 55 | if (@ARGV == 0) {
 56 |   die $Usage;
 57 | }
 58 | 
 59 | my $example_fullpath = $ARGV[0];
 60 | 
 61 | # Check if the storage has been created. If so, do nothing.
 62 | my $dirname = dirname($example_fullpath);
 63 | if (! -d "$dirname/storage") {
 64 |   exit(0);
 65 | }
 66 | 
 67 | # Storage exists, create symbolic links in the next few steps.
 68 | 
 69 | # First, get a list of the available storage directories, and check if they are
 70 | # properly created.
 71 | opendir(my $dh, "$dirname/storage/") || die "$0: Fail to open $dirname/storage/\n";
 72 | my @storage_dirs = grep(/^[0-9]*$/, readdir($dh));
 73 | closedir($dh);
 74 | my $num_storage = scalar(@storage_dirs);
 75 | for (my $x = 1; $x <= $num_storage; $x++) {
 76 |   (-d "$dirname/storage/$x") || die "$0: $dirname/storage/$x does not exist\n";
 77 | }
 78 | 
 79 | # Second, get the coprime list.
 80 | my @coprimes;
 81 | for (my $n = 1; $n < $num_storage; $n++) {
 82 |   if (GetGCD($n, $num_storage) == 1) {
 83 |     push(@coprimes, $n);
 84 |   }
 85 | }
 86 | 
 87 | my $ret = 0;
 88 | 
 89 | foreach my $fullpath (@ARGV) {
 90 |   if ($dirname ne dirname($fullpath)) {
 91 |     die "Mismatch in directory names of arguments: $example_fullpath versus $fullpath";
 92 |   }
 93 | 
 94 |   # Finally, work out the directory index where we should put the data to.
 95 |   my $basename = basename($fullpath);
 96 |   my $filename_numbers = $basename;
 97 |   $filename_numbers =~ s/[^0-9]+/ /g;
 98 |   my @filename_numbers = split(" ", $filename_numbers);
 99 |   my $total = 0;
100 |   my $index = 0;
101 |   foreach my $x (@filename_numbers) {
102 |     if ($index >= scalar(@coprimes)) {
103 |       $index = 0;
104 |     }
105 |     $total += $x * $coprimes[$index];
106 |     $index++;
107 |   }
108 |   my $dir_index = $total % $num_storage + 1;
109 | 
110 |   # Make the symbolic link.
111 |   if (-e $fullpath) {
112 |     unlink($fullpath);
113 |   }
114 |   if (symlink("storage/$dir_index/$basename", $fullpath) != 1) { # failure
115 |     $ret = 1;  # will exit with error status.
116 |   }
117 | }
118 | 
119 | exit($ret);
120 | 
121 | ## testing:
122 | # rm -rf foo bar
123 | # mkdir -p bar/{1,2,3,4}
124 | # mkdir -p foo/storage
125 | # for x in 1 2 3 4; do ln -s ../../bar/$x foo/storage/$x; done
126 | # utils/create_data_link.pl utils/create_data_link.pl foo/1.3.ark  foo/2.3.ark
127 | # ls -l foo
128 | # total 0
129 | # lrwxrwxrwx 1 dpovey fax 17 Sep  2 17:41 1.3.ark -> storage/3/1.3.ark
130 | # lrwxrwxrwx 1 dpovey fax 17 Sep  2 17:41 2.3.ark -> storage/4/2.3.ark
131 | # drwxr-xr-x 2 dpovey fax 38 Sep  2 17:40 storage
132 | 


--------------------------------------------------------------------------------
/cfg/TIMIT_baselines/TIMIT_MLP_mfcc_basic.cfg:
--------------------------------------------------------------------------------
  1 | [cfg_proto]
  2 | cfg_proto = proto/global.proto
  3 | cfg_proto_chunk = proto/global_chunk.proto
  4 | 
  5 | [exp]
  6 | cmd = 
  7 | run_nn_script = run_nn
  8 | out_folder = exp/TIMIT_MLP_basic
  9 | seed = 1234
 10 | use_cuda = True
 11 | multi_gpu = False
 12 | save_gpumem = False
 13 | n_epochs_tr = 24
 14 | 
 15 | [dataset1]
 16 | data_name = TIMIT_tr
 17 | fea = fea_name=mfcc
 18 | 	fea_lst=/home/mirco/kaldi-trunk/egs/timit/s5/data/train/feats.scp
 19 | 	fea_opts=apply-cmvn --utt2spk=ark:/home/mirco/kaldi-trunk/egs/timit/s5/data/train/utt2spk  ark:/home/mirco/kaldi-trunk/egs/timit/s5/mfcc/cmvn_train.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- |
 20 | 	cw_left=5
 21 | 	cw_right=5
 22 | 	
 23 | 
 24 | lab = lab_name=lab_cd
 25 | 	lab_folder=/home/mirco/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali
 26 | 	lab_opts=ali-to-pdf
 27 | 	lab_count_file=auto
 28 | 	lab_data_folder=/home/mirco/kaldi-trunk/egs/timit/s5/data/train/
 29 | 	lab_graph=/home/mirco/kaldi-trunk/egs/timit/s5/exp/tri3/graph
 30 | 	
 31 | 
 32 | n_chunks = 5
 33 | 
 34 | [dataset2]
 35 | data_name = TIMIT_dev
 36 | fea = fea_name=mfcc
 37 | 	fea_lst=/home/mirco/kaldi-trunk/egs/timit/s5/data/dev/feats.scp
 38 | 	fea_opts=apply-cmvn --utt2spk=ark:/home/mirco/kaldi-trunk/egs/timit/s5/data/dev/utt2spk  ark:/home/mirco/kaldi-trunk/egs/timit/s5/mfcc/cmvn_dev.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- |
 39 | 	cw_left=5
 40 | 	cw_right=5
 41 | 	
 42 | 
 43 | lab = lab_name=lab_cd
 44 | 	lab_folder=/home/mirco/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali_dev
 45 | 	lab_opts=ali-to-pdf
 46 | 	lab_count_file=auto
 47 | 	lab_data_folder=/home/mirco/kaldi-trunk/egs/timit/s5/data/dev/
 48 | 	lab_graph=/home/mirco/kaldi-trunk/egs/timit/s5/exp/tri3/graph
 49 | 	
 50 | 
 51 | n_chunks = 1
 52 | 
 53 | [dataset3]
 54 | data_name = TIMIT_test
 55 | fea = fea_name=mfcc
 56 | 	fea_lst=/home/mirco/kaldi-trunk/egs/timit/s5/data/test/feats.scp
 57 | 	fea_opts=apply-cmvn --utt2spk=ark:/home/mirco/kaldi-trunk/egs/timit/s5/data/test/utt2spk  ark:/home/mirco/kaldi-trunk/egs/timit/s5/mfcc/cmvn_test.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- |
 58 | 	cw_left=5
 59 | 	cw_right=5
 60 | 	
 61 | 
 62 | lab = lab_name=lab_cd
 63 | 	lab_folder=/home/mirco/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali_test
 64 | 	lab_opts=ali-to-pdf
 65 | 	lab_count_file=auto
 66 | 	lab_data_folder=/home/mirco/kaldi-trunk/egs/timit/s5/data/test/
 67 | 	lab_graph=/home/mirco/kaldi-trunk/egs/timit/s5/exp/tri3/graph
 68 | 	
 69 | 
 70 | n_chunks = 1
 71 | 
 72 | [data_use]
 73 | train_with = TIMIT_tr
 74 | valid_with = TIMIT_dev
 75 | forward_with = TIMIT_test
 76 | 
 77 | [batches]
 78 | batch_size_train = 128
 79 | max_seq_length_train = 1000
 80 | increase_seq_length_train = False
 81 | start_seq_len_train = 100
 82 | multply_factor_seq_len_train = 2
 83 | batch_size_valid = 128
 84 | max_seq_length_valid = 1000
 85 | 
 86 | [architecture1]
 87 | arch_name = MLP_layers1
 88 | arch_proto = proto/MLP.proto
 89 | arch_library = neural_networks
 90 | arch_class = MLP
 91 | arch_pretrain_file = none
 92 | arch_freeze = False
 93 | arch_seq_model = False
 94 | dnn_lay = 1024,1024,1024,1024,N_out_lab_cd
 95 | dnn_drop = 0.15,0.15,0.15,0.15,0.0
 96 | dnn_use_laynorm_inp = False
 97 | dnn_use_batchnorm_inp = False
 98 | dnn_use_batchnorm = True,True,True,True,False
 99 | dnn_use_laynorm = False,False,False,False,False
100 | dnn_act = relu,relu,relu,relu,softmax
101 | arch_lr = 0.08
102 | arch_halving_factor = 0.5
103 | arch_improvement_threshold = 0.001
104 | arch_opt = sgd
105 | opt_momentum = 0.0
106 | opt_weight_decay = 0.0
107 | opt_dampening = 0.0
108 | opt_nesterov = False
109 | 
110 | [model]
111 | model_proto = proto/model.proto
112 | model = out_dnn1=compute(MLP_layers1,mfcc)
113 | 	loss_final=cost_nll(out_dnn1,lab_cd)
114 | 	err_final=cost_err(out_dnn1,lab_cd)
115 | 
116 | [forward]
117 | forward_out = out_dnn1
118 | normalize_posteriors = True
119 | normalize_with_counts_from = lab_cd
120 | save_out_file = False
121 | require_decoding = True
122 | 
123 | [decoding]
124 | decoding_script_folder = kaldi_decoding_scripts/
125 | decoding_script = decode_dnn.sh
126 | decoding_proto = proto/decoding.proto
127 | min_active = 200
128 | max_active = 7000
129 | max_mem = 50000000
130 | beam = 13.0
131 | latbeam = 8.0
132 | acwt = 0.2
133 | max_arcs = -1
134 | skip_scoring = false
135 | scoring_script = local/score.sh
136 | scoring_opts = "--min-lmwt 1 --max-lmwt 10"
137 | norm_vars = False
138 | 
139 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/reverse_lm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2012  Brno University of Technology (Author: Mirko Hannemann)
 4 | # Apache 2.0
 5 | 
 6 | # configuration section
 7 | utterances=4
 8 | maxlen=30
 9 | nbest=10
10 | # end config section
11 | 
12 | echo "$0 $@"  # Print the command line for logging
13 | 
14 | [ -f ./path.sh ] && . ./path.sh; # source the path.
15 | . parse_options.sh || exit 1;
16 | 
17 | if [ $# != 2 ]; then
18 |    echo "Usage: utils/reverse_lm_test.sh [options] <fwd-lm-dir> <bwd-lm-dir>"
19 |    echo "example: utils/reverse_lm_test.sh data/lang_test_tgpr_5k data/lang_test_tgpr_5k.reverse"
20 |    echo "options:"
21 |    echo "  --utterances <int>   number of random test utterances"
22 |    echo "  --maxlen <int>       max number of arcs (words) in utterance"
23 |    echo "  --nbest <int>        compare n best paths"
24 |    exit 1;
25 | fi
26 | 
27 | test_fwd=$1
28 | test_bwd=$2
29 | nb=`echo $nbest | awk '{print $1-1;}'`
30 | 
31 | # For each language model the corresponding FST in lang_test_* directory.
32 | 
33 | echo "compare LM scores using "$test_fwd/G.fst" and "$test_bwd/G.fst
34 | 
35 | for utt in `seq 1 $utterances`
36 | do
37 |   # generate random sentence with forward language model
38 |   len=1000 # big number
39 |   while [ $len -gt $maxlen ]
40 |   do
41 |     fstrandgen --npath=1 $test_fwd/G.fst | fstprint --acceptor --isymbols=$test_fwd/words.txt --osymbols=$test_fwd/words.txt > sent$utt
42 |     len=`cat sent$utt | wc -l`
43 |   done
44 |   cat sent$utt | awk '(NF>1){if ($3!="#0") {a[length(a)+1]=$3;}} END{printf "utterance:"; for(i=1;i<=length(a);i++) {printf " %s",a[i];} printf "\n";}'  
45 |   
46 |   # get n best paths with forward language model
47 |   cat sent$utt | awk '(NF>1){if ($3!="#0") {a[length(a)+1]=$3;}} END{for(i=1;i<=length(a);i++) {print i-1,i,a[i];} print length(a);}' > sent$utt.forward
48 |   fstcompile --acceptor --isymbols=$test_fwd/words.txt  --osymbols=$test_fwd/words.txt sent$utt.forward > sent$utt.forward.fst
49 |   fstcompose $test_fwd/G.fst sent$utt.forward.fst > sent$utt.composed.forward.fst
50 |   fstshortestpath --nshortest=$nbest sent$utt.composed.forward.fst | fstprint > sent$utt.composed.forward.n
51 | 
52 |   rm sent$utt.forward.scores 2>/dev/null
53 |   for n in `seq 0 $nb`
54 |   do
55 |     # select path with rank n
56 |     cat sent$utt.composed.forward.n | awk '(NR>'$n' || $1!="0"){print;}' | fstcompile | fstconnect > sent$utt.composed.forward.$n.fst
57 |     fstprint sent$utt.composed.forward.$n.fst > sent$utt.composed.forward.$n
58 |     # compute shortest distance to final states
59 |     fstshortestdistance sent$utt.composed.forward.$n.fst | \
60 |       awk -v list=sent$utt.composed.forward.$n 'BEGIN{mincost=1E5; while (getline < list > 0){if (NF==2) final[$1]=$2; if (NF==1) final[$1]=0.00001;}} \
61 |       { if (final[$1]) { cost=$2+final[$1]; if (cost<mincost) {mincost=cost;} };} END {print mincost;}' \
62 |       >> sent$utt.forward.scores
63 |   done
64 |   
65 |   # get n best paths with reverse language model
66 |   cat sent$utt | awk '(NF>1){if ($3!="#0") {a[length(a)+1]=$3;}} END{for(i=1;i<=length(a);i++) {print i-1,i,a[length(a)-i+1];} print length(a);}' > sent$utt.reverse
67 |   fstcompile --acceptor --isymbols=$test_fwd/words.txt --osymbols=$test_fwd/words.txt sent$utt.reverse > sent$utt.reverse.fst
68 |   fstcompose $test_bwd/G.fst sent$utt.reverse.fst > sent$utt.composed.reverse.fst
69 |   fstshortestpath --nshortest=$nbest sent$utt.composed.reverse.fst | fstprint > sent$utt.composed.reverse.n
70 | 
71 |   rm sent$utt.reverse.scores 2>/dev/null
72 |   for n in `seq 0 $nb`
73 |   do
74 |     # select path with rank n
75 |     cat sent$utt.composed.reverse.n | awk '(NR>'$n' || $1!="0"){print;}' | fstcompile | fstconnect > sent$utt.composed.reverse.$n.fst
76 |     fstprint sent$utt.composed.reverse.$n.fst > sent$utt.composed.reverse.$n
77 |     # compute shortest distance to final states
78 |     fstshortestdistance sent$utt.composed.reverse.$n.fst | \
79 |       awk -v list=sent$utt.composed.reverse.$n 'BEGIN{mincost=1E5; while (getline < list > 0){if (NF==2) final[$1]=$2; if (NF==1) final[$1]=0.00001;}} \
80 |       { if (final[$1]) { cost=$2+final[$1]; if (cost<mincost) {mincost=cost;} };} END {print mincost;}' \
81 |       >> sent$utt.reverse.scores
82 |   done
83 | 
84 |   # present results
85 |   paste sent$utt.forward.scores sent$utt.reverse.scores | \
86 |     awk '{diff=$1-$2; if ( (diff<0?-diff:diff) > 0.001 ) print NR,$1,$2,"!!!"; else print NR,$1,$2;}'
87 |   # clean up
88 |   rm sent$utt
89 |   rm sent$utt.*
90 | done
91 | 


--------------------------------------------------------------------------------
/cfg/TIMIT_baselines/TIMIT_MLP_mfcc_basic_flex.cfg:
--------------------------------------------------------------------------------
  1 | [cfg_proto]
  2 | cfg_proto = proto/global.proto
  3 | cfg_proto_chunk = proto/global_chunk.proto
  4 | 
  5 | [exp]
  6 | cmd = 
  7 | run_nn_script = run_nn
  8 | out_folder = exp/TIMIT_MLP_basic_flex
  9 | seed = 1234
 10 | use_cuda = True
 11 | multi_gpu = False
 12 | save_gpumem = False
 13 | n_epochs_tr = 24
 14 | 
 15 | [dataset1]
 16 | data_name = TIMIT_tr
 17 | fea = fea_name=mfcc
 18 | 	fea_lst=/home/mirco/kaldi-trunk/egs/timit/s5/data/train/feats.scp
 19 | 	fea_opts=apply-cmvn --utt2spk=ark:/home/mirco/kaldi-trunk/egs/timit/s5/data/train/utt2spk  ark:/home/mirco/kaldi-trunk/egs/timit/s5/mfcc/cmvn_train.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- |
 20 | 	cw_left=5
 21 | 	cw_right=5
 22 | 	
 23 | 
 24 | lab = lab_name=lab_cd
 25 | 	lab_folder=/home/mirco/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali
 26 | 	lab_opts=ali-to-pdf
 27 | 	lab_count_file=auto
 28 | 	lab_data_folder=/home/mirco/kaldi-trunk/egs/timit/s5/data/train/
 29 | 	lab_graph=/home/mirco/kaldi-trunk/egs/timit/s5/exp/tri3/graph
 30 | 	
 31 | 
 32 | n_chunks = 5
 33 | 
 34 | [dataset2]
 35 | data_name = TIMIT_dev
 36 | fea = fea_name=mfcc
 37 | 	fea_lst=/home/mirco/kaldi-trunk/egs/timit/s5/data/dev/feats.scp
 38 | 	fea_opts=apply-cmvn --utt2spk=ark:/home/mirco/kaldi-trunk/egs/timit/s5/data/dev/utt2spk  ark:/home/mirco/kaldi-trunk/egs/timit/s5/mfcc/cmvn_dev.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- |
 39 | 	cw_left=5
 40 | 	cw_right=5
 41 | 	
 42 | 
 43 | lab = lab_name=lab_cd
 44 | 	lab_folder=/home/mirco/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali_dev
 45 | 	lab_opts=ali-to-pdf
 46 | 	lab_count_file=auto
 47 | 	lab_data_folder=/home/mirco/kaldi-trunk/egs/timit/s5/data/dev/
 48 | 	lab_graph=/home/mirco/kaldi-trunk/egs/timit/s5/exp/tri3/graph
 49 | 	
 50 | 
 51 | n_chunks = 1
 52 | 
 53 | [dataset3]
 54 | data_name = TIMIT_test
 55 | fea = fea_name=mfcc
 56 | 	fea_lst=/home/mirco/kaldi-trunk/egs/timit/s5/data/test/feats.scp
 57 | 	fea_opts=apply-cmvn --utt2spk=ark:/home/mirco/kaldi-trunk/egs/timit/s5/data/test/utt2spk  ark:/home/mirco/kaldi-trunk/egs/timit/s5/mfcc/cmvn_test.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- |
 58 | 	cw_left=5
 59 | 	cw_right=5
 60 | 	
 61 | 
 62 | lab = lab_name=lab_cd
 63 | 	lab_folder=/home/mirco/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali_test
 64 | 	lab_opts=ali-to-pdf
 65 | 	lab_count_file=auto
 66 | 	lab_data_folder=/home/mirco/kaldi-trunk/egs/timit/s5/data/test/
 67 | 	lab_graph=/home/mirco/kaldi-trunk/egs/timit/s5/exp/tri3/graph
 68 | 	
 69 | 
 70 | n_chunks = 1
 71 | 
 72 | [data_use]
 73 | train_with = TIMIT_tr
 74 | valid_with = TIMIT_dev
 75 | forward_with = TIMIT_test
 76 | 
 77 | [batches]
 78 | batch_size_train = 128*12 | 64*10 | 32*2
 79 | max_seq_length_train = 1000*18 | 500*6
 80 | increase_seq_length_train = False
 81 | start_seq_len_train = 100
 82 | multply_factor_seq_len_train = 2
 83 | batch_size_valid = 128
 84 | max_seq_length_valid = 1000
 85 | 
 86 | [architecture1]
 87 | arch_name = MLP_layers1
 88 | arch_proto = proto/MLP.proto
 89 | arch_library = neural_networks
 90 | arch_class = MLP
 91 | arch_pretrain_file = none
 92 | arch_freeze = False
 93 | arch_seq_model = False
 94 | dnn_lay = 1024,1024,1024,1024,N_out_lab_cd
 95 | dnn_drop = 0.15*12|0.20*12,0.15,0.15*10|0.20*14,0.15,0.0
 96 | dnn_use_laynorm_inp = False
 97 | dnn_use_batchnorm_inp = False
 98 | dnn_use_batchnorm = True,True,True,True,False
 99 | dnn_use_laynorm = False,False,False,False,False
100 | dnn_act = relu,relu,relu,relu,softmax
101 | arch_lr = 0.08*10|0.04*5|0.02*3|0.01*2|0.005*2|0.0025*2
102 | arch_halving_factor = 0.5
103 | arch_improvement_threshold = 0.001
104 | arch_opt = sgd
105 | opt_momentum = 0.0
106 | opt_weight_decay = 0.0
107 | opt_dampening = 0.0
108 | opt_nesterov = False
109 | 
110 | [model]
111 | model_proto = proto/model.proto
112 | model = out_dnn1=compute(MLP_layers1,mfcc)
113 | 	loss_final=cost_nll(out_dnn1,lab_cd)
114 | 	err_final=cost_err(out_dnn1,lab_cd)
115 | 
116 | [forward]
117 | forward_out = out_dnn1
118 | normalize_posteriors = True
119 | normalize_with_counts_from = lab_cd
120 | save_out_file = False
121 | require_decoding = True
122 | 
123 | [decoding]
124 | decoding_script_folder = kaldi_decoding_scripts/
125 | decoding_script = decode_dnn.sh
126 | decoding_proto = proto/decoding.proto
127 | min_active = 200
128 | max_active = 7000
129 | max_mem = 50000000
130 | beam = 13.0
131 | latbeam = 8.0
132 | acwt = 0.2
133 | max_arcs = -1
134 | skip_scoring = false
135 | scoring_script = local/score.sh
136 | scoring_opts = "--min-lmwt 1 --max-lmwt 10"
137 | norm_vars = False
138 | 
139 | 


--------------------------------------------------------------------------------
/cfg/TIMIT_baselines/TIMIT_MLP_fbank_autoencoder.cfg:
--------------------------------------------------------------------------------
  1 | [cfg_proto]
  2 | cfg_proto = proto/global.proto
  3 | cfg_proto_chunk = proto/global_chunk.proto
  4 | 
  5 | [exp]
  6 | cmd = 
  7 | run_nn_script = run_nn.py
  8 | out_folder = exp/TIMIT_MLP_fbank_autoencoder
  9 | seed = 2234
 10 | use_cuda = True
 11 | multi_gpu = False
 12 | save_gpumem = False
 13 | n_epochs_tr = 10
 14 | 
 15 | [dataset1]
 16 | data_name = TIMIT_tr
 17 | fea = fea_name=fbank
 18 | 	fea_lst=quick_test/data/train/feats_fbank.scp
 19 | 	fea_opts=apply-cmvn --utt2spk=ark:quick_test/data/train/utt2spk  ark:quick_test/fbank/cmvn_train.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- |
 20 | 	cw_left=5
 21 | 	cw_right=5
 22 | 
 23 | lab = lab_name=lab_cd
 24 | 	lab_folder=quick_test/dnn4_pretrain-dbn_dnn_ali
 25 | 	lab_opts=ali-to-pdf
 26 | 	lab_count_file=auto
 27 | 	lab_data_folder=quick_test/data/train/
 28 | 	lab_graph=quick_test/graph
 29 | 	
 30 | n_chunks = 5
 31 | 
 32 | [dataset2]
 33 | data_name = TIMIT_dev
 34 | fea = fea_name=fbank
 35 | 	fea_lst=quick_test/data/dev/feats_fbank.scp
 36 | 	fea_opts=apply-cmvn --utt2spk=ark:quick_test/data/dev/utt2spk  ark:quick_test/fbank/cmvn_dev.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- |
 37 | 	cw_left=5
 38 | 	cw_right=5
 39 | 	
 40 | 	
 41 | lab = lab_name=lab_cd
 42 | 	lab_folder=quick_test/dnn4_pretrain-dbn_dnn_ali_dev
 43 | 	lab_opts=ali-to-pdf
 44 | 	lab_count_file=auto
 45 | 	lab_data_folder=quick_test/data/dev/
 46 | 	lab_graph=quick_test/graph
 47 | 	
 48 | n_chunks = 1
 49 | 
 50 | [dataset3]
 51 | data_name = TIMIT_test
 52 | fea = fea_name=fbank
 53 | 	fea_lst=quick_test/data/test/feats_fbank.scp
 54 | 	fea_opts=apply-cmvn --utt2spk=ark:quick_test/data/test/utt2spk  ark:quick_test/fbank/cmvn_test.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- |
 55 | 	cw_left=5
 56 | 	cw_right=5
 57 | 
 58 | lab = lab_name=lab_cd
 59 | 	lab_folder=quick_test/dnn4_pretrain-dbn_dnn_ali_test
 60 | 	lab_opts=ali-to-pdf
 61 | 	lab_count_file=auto
 62 | 	lab_data_folder=quick_test/data/test/
 63 | 	lab_graph=quick_test/graph
 64 | 	
 65 | n_chunks = 1
 66 | 
 67 | [data_use]
 68 | train_with = TIMIT_tr
 69 | valid_with = TIMIT_dev
 70 | forward_with = TIMIT_test
 71 | 
 72 | [batches]
 73 | batch_size_train = 128
 74 | max_seq_length_train = 1000
 75 | increase_seq_length_train = False
 76 | start_seq_len_train = 100
 77 | multply_factor_seq_len_train = 2
 78 | batch_size_valid = 128
 79 | max_seq_length_valid = 1000
 80 | 
 81 | [architecture1]
 82 | arch_name = MLP_encoder
 83 | arch_proto = proto/MLP.proto
 84 | arch_library = neural_networks
 85 | arch_class = MLP
 86 | arch_pretrain_file = none
 87 | arch_freeze = False
 88 | arch_seq_model = False
 89 | dnn_lay = 1024,100
 90 | dnn_drop = 0.15,0.15
 91 | dnn_use_laynorm_inp = False
 92 | dnn_use_batchnorm_inp = False
 93 | dnn_use_batchnorm = True,True
 94 | dnn_use_laynorm = False,False
 95 | dnn_act = relu,linear
 96 | arch_lr = 0.08
 97 | arch_halving_factor = 0.5
 98 | arch_improvement_threshold = 0.001
 99 | arch_opt = sgd
100 | opt_momentum = 0.0
101 | opt_weight_decay = 0.0
102 | opt_dampening = 0.0
103 | opt_nesterov = False
104 | 
105 | [architecture2]
106 | arch_name = MLP_decoder
107 | arch_proto = proto/MLP.proto
108 | arch_library = neural_networks
109 | arch_class = MLP
110 | arch_pretrain_file = none
111 | arch_freeze = False
112 | arch_seq_model = False
113 | dnn_lay = 1024,440
114 | dnn_drop = 0.15,0.0
115 | dnn_use_laynorm_inp = False
116 | dnn_use_batchnorm_inp = False
117 | dnn_use_batchnorm = True,False
118 | dnn_use_laynorm = False,False
119 | dnn_act = relu,linear
120 | arch_lr = 0.08
121 | arch_halving_factor = 0.5
122 | arch_improvement_threshold = 0.001
123 | arch_opt = sgd
124 | opt_momentum = 0.0
125 | opt_weight_decay = 0.0
126 | opt_dampening = 0.0
127 | opt_nesterov = False
128 | 
129 | 
130 | [model]
131 | model_proto = proto/model.proto
132 | model = enc_out=compute(MLP_encoder,fbank)
133 | 	dec_out=compute(MLP_decoder,enc_out)
134 | 	loss_final=mse(dec_out,fbank)
135 | 	err_final=cost_err(dec_out,lab_cd)
136 | 
137 | [forward]
138 | forward_out = enc_out
139 | normalize_posteriors = False
140 | normalize_with_counts_from = None
141 | save_out_file = True
142 | require_decoding = False
143 | 
144 | [decoding]
145 | decoding_script_folder = kaldi_decoding_scripts/
146 | decoding_script = decode_dnn.sh
147 | decoding_proto = proto/decoding.proto
148 | min_active = 200
149 | max_active = 7000
150 | max_mem = 50000000
151 | beam = 13.0
152 | latbeam = 8.0
153 | acwt = 0.2
154 | max_arcs = -1
155 | skip_scoring = false
156 | scoring_script = local/score.sh
157 | scoring_opts = "--min-lmwt 1 --max-lmwt 10"
158 | norm_vars = False
159 | 
160 | 


--------------------------------------------------------------------------------
/save_raw_fea.py:
--------------------------------------------------------------------------------
  1 | ##########################################################
  2 | # pytorch-kaldi v.0.1
  3 | # Mirco Ravanelli, Titouan Parcollet
  4 | # Mila, University of Montreal
  5 | # October 2018
  6 | #
  7 | # Description: This script generates kaldi ark files containing raw features.
  8 | # The file list must be a file containing "snt_id file.wav".
  9 | # Note that only wav files are supported here (sphere or other format are not supported)
 10 | ##########################################################
 11 | 
 12 | 
 13 | import scipy.io.wavfile
 14 | import math
 15 | import numpy as np
 16 | import os
 17 | from data_io import read_vec_int_ark, write_mat
 18 | 
 19 | 
 20 | # Run it for all the data chunks (e.g., train, dev, test) => uncomment
 21 | 
 22 | lab_folder = "/users/parcollet/KALDI/kaldi-trunk/egs/timit/s5/exp/dnn4_pretrain-dbn_dnn_ali_test"
 23 | lab_opts = "ali-to-pdf"
 24 | out_folder = "/users/parcollet/KALDI/kaldi-trunk/egs/timit/s5/data/raw_TIMIT_200ms/test"
 25 | wav_lst = "/users/parcollet/KALDI/kaldi-trunk/egs/timit/s5/data/test/wav.lst"
 26 | scp_file_out = "/users/parcollet/KALDI/kaldi-trunk/egs/timit/s5/data/raw_TIMIT_200ms/test/feats_raw.scp"
 27 | 
 28 | # lab_folder='quick_test/dnn4_pretrain-dbn_dnn_ali_dev'
 29 | # lab_opts='ali-to-pdf'
 30 | # out_folder='raw_TIMIT_200ms/dev'
 31 | # wav_lst='/home/mirco/pytorch-kaldi-new/quick_test/data/dev/wav_lst.scp'
 32 | # scp_file_out='quick_test/data/dev/feats_raw.scp'
 33 | 
 34 | # lab_folder='quick_test/dnn4_pretrain-dbn_dnn_ali_test'
 35 | # lab_opts='ali-to-pdf'
 36 | # out_folder='raw_TIMIT_200ms/test'
 37 | # wav_lst='/home/mirco/pytorch-kaldi-new/quick_test/data/test/wav_lst.scp'
 38 | # scp_file_out='quick_test/data/test/feats_raw.scp'
 39 | 
 40 | 
 41 | sig_fs = 16000  # Hz
 42 | sig_wlen = 200  # ms
 43 | 
 44 | lab_fs = 16000  # Hz
 45 | lab_wlen = 25  # ms
 46 | lab_wshift = 10  # ms
 47 | 
 48 | sig_wlen_samp = int((sig_fs * sig_wlen) / 1000)
 49 | lab_wlen_samp = int((lab_fs * lab_wlen) / 1000)
 50 | lab_wshift_samp = int((lab_fs * lab_wshift) / 1000)
 51 | 
 52 | 
 53 | # Create the output folder
 54 | try:
 55 |     os.stat(out_folder)
 56 | except:
 57 |     os.makedirs(out_folder)
 58 | 
 59 | 
 60 | # Creare the scp file
 61 | scp_file = open(scp_file_out, "w")
 62 | 
 63 | # reading the labels
 64 | lab = {
 65 |     k: v
 66 |     for k, v in read_vec_int_ark(
 67 |         "gunzip -c " + lab_folder + "/ali*.gz | " + lab_opts + " " + lab_folder + "/final.mdl ark:- ark:-|", out_folder
 68 |     )
 69 | }
 70 | 
 71 | # reading the list file
 72 | with open(wav_lst) as f:
 73 |     sig_lst = f.readlines()
 74 | 
 75 | sig_lst = [x.strip() for x in sig_lst]
 76 | 
 77 | for sig_file in sig_lst:
 78 |     sig_id = sig_file.split(" ")[0]
 79 |     sig_path = sig_file.split(" ")[1]
 80 |     [fs, signal] = scipy.io.wavfile.read(sig_path)
 81 |     signal = signal.astype(float) / 32768
 82 |     signal = signal / np.max(np.abs(signal))
 83 | 
 84 |     cnt_fr = 0
 85 |     beg_samp = 0
 86 |     frame_all = []
 87 | 
 88 |     while beg_samp + lab_wlen_samp < signal.shape[0]:
 89 |         sample_fr = np.zeros(sig_wlen_samp)
 90 |         central_sample_lab = int(((beg_samp + lab_wlen_samp / 2) - 1))
 91 |         central_fr_index = int(((sig_wlen_samp / 2) - 1))
 92 | 
 93 |         beg_signal_fr = int(central_sample_lab - (sig_wlen_samp / 2))
 94 |         end_signal_fr = int(central_sample_lab + (sig_wlen_samp / 2))
 95 | 
 96 |         if beg_signal_fr >= 0 and end_signal_fr <= signal.shape[0]:
 97 |             sample_fr = signal[beg_signal_fr:end_signal_fr]
 98 |         else:
 99 |             if beg_signal_fr < 0:
100 |                 n_left_samples = central_sample_lab
101 |                 sample_fr[central_fr_index - n_left_samples + 1 :] = signal[0:end_signal_fr]
102 |             if end_signal_fr > signal.shape[0]:
103 |                 n_right_samples = signal.shape[0] - central_sample_lab
104 |                 sample_fr[0 : central_fr_index + n_right_samples + 1] = signal[beg_signal_fr:]
105 | 
106 |         frame_all.append(sample_fr)
107 |         cnt_fr = cnt_fr + 1
108 |         beg_samp = beg_samp + lab_wshift_samp
109 | 
110 |     frame_all = np.asarray(frame_all)
111 | 
112 |     # Save the matrix into a kaldi ark
113 |     out_file = out_folder + "/" + sig_id + ".ark"
114 |     write_mat(out_folder, out_file, frame_all, key=sig_id)
115 |     print(sig_id)
116 |     scp_file.write(sig_id + " " + out_folder + "/" + sig_id + ".ark:" + str(len(sig_id) + 1) + "\n")
117 | 
118 |     N_fr_comp = 1 + math.floor((signal.shape[0] - 400) / 160)
119 |     # print("%s %i %i "%(lab[sig_id].shape[0],N_fr_comp,cnt_fr))
120 | 
121 | scp_file.close()
122 | 


--------------------------------------------------------------------------------
/cfg/Librispeech_baselines/libri_MLP_fmllr.cfg:
--------------------------------------------------------------------------------
  1 | [cfg_proto]
  2 | cfg_proto=proto/global.proto
  3 | cfg_proto_chunk=proto/global_chunk.proto
  4 | 
  5 | [exp]
  6 | cmd=
  7 | run_nn_script=run_nn
  8 | out_folder=exp/libri_MLP_fmllr
  9 | seed=1234
 10 | use_cuda=True
 11 | multi_gpu=False
 12 | save_gpumem=False
 13 | N_epochs_tr=24
 14 | 
 15 | [dataset1]
 16 | data_name=train_clean_100
 17 | fea:fea_name=fmllr
 18 |     fea_lst=/scratch/ravanelm/exp/librispeech/s5/fmllr/train_clean_100/feats.scp
 19 |     fea_opts=apply-cmvn --utt2spk=ark:/scratch/ravanelm/exp/librispeech/s5/fmllr/train_clean_100/utt2spk  ark:/scratch/ravanelm/exp/librispeech/s5/fmllr/train_clean_100/data/cmvn_speaker.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- |
 20 |     cw_left=5
 21 |     cw_right=5
 22 | 
 23 |     
 24 | lab:lab_name=lab_cd
 25 |     lab_folder=/scratch/ravanelm/exp/librispeech/s5/exp/tri4b/
 26 |     lab_opts=ali-to-pdf 
 27 |     lab_count_file=auto
 28 |     lab_data_folder=/scratch/ravanelm/exp/librispeech/s5/fmllr/train_clean_100/
 29 |     lab_graph=/scratch/ravanelm/exp/librispeech/s5/exp/tri4b/graph_tgsmall/
 30 | 
 31 | N_chunks=50
 32 |         
 33 | [dataset2]
 34 | data_name=dev_clean
 35 | fea:fea_name=fmllr
 36 |     fea_lst=/scratch/ravanelm/exp/librispeech/s5/fmllr/dev_clean/feats.scp
 37 |     fea_opts=apply-cmvn --utt2spk=ark:/scratch/ravanelm/exp/librispeech/s5/fmllr/dev_clean/utt2spk  ark:/scratch/ravanelm/exp/librispeech/s5/fmllr/dev_clean/data/cmvn_speaker.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- |
 38 |     cw_left=5
 39 |     cw_right=5
 40 | 
 41 | 
 42 | lab:lab_name=lab_cd
 43 |     lab_folder=/scratch/ravanelm/exp/librispeech/s5/exp/tri4b_ali_dev_clean_100
 44 |     lab_opts=ali-to-pdf 
 45 |     lab_count_file=auto
 46 |     lab_data_folder=/scratch/ravanelm/exp/librispeech/s5/fmllr/dev_clean/
 47 |     lab_graph=/scratch/ravanelm/exp/librispeech/s5/exp/tri4b/graph_tgsmall/
 48 | 
 49 | N_chunks=4
 50 | 
 51 | [dataset3]
 52 | data_name=test_clean
 53 | fea:fea_name=fmllr
 54 |     fea_lst=/scratch/ravanelm/exp/librispeech/s5/fmllr/test_clean/feats.scp
 55 |     fea_opts=apply-cmvn --utt2spk=ark:/scratch/ravanelm/exp/librispeech/s5/fmllr/test_clean/utt2spk  ark:/scratch/ravanelm/exp/librispeech/s5/fmllr/test_clean/data/cmvn_speaker.ark ark:- ark:- | add-deltas --delta-order=0 ark:- ark:- |
 56 |     cw_left=5
 57 |     cw_right=5
 58 | 
 59 | 
 60 | lab:lab_name=lab_cd
 61 |     lab_folder=/scratch/ravanelm/exp/librispeech/s5/exp/tri4b_ali_test_clean_100
 62 |     lab_opts=ali-to-pdf 
 63 |     lab_count_file=auto
 64 |     lab_data_folder=/scratch/ravanelm/exp/librispeech/s5/fmllr/test_clean/
 65 |     lab_graph=/scratch/ravanelm/exp/librispeech/s5/exp/tri4b/graph_tgsmall/
 66 | 
 67 | N_chunks=8
 68 | 
 69 |         
 70 | [data_use]
 71 | train_with=train_clean_100
 72 | valid_with=dev_clean
 73 | forward_with=test_clean
 74 | 
 75 | 
 76 | [batches]
 77 | batch_size_train=128
 78 | max_seq_length_train=1000
 79 | increase_seq_length_train=False
 80 | start_seq_len_train=100
 81 | multply_factor_seq_len_train=2
 82 | batch_size_valid=128
 83 | max_seq_length_valid=1000
 84 | 
 85 | 
 86 | [architecture1]
 87 | arch_name=MLP_layers
 88 | arch_proto=proto/MLP.proto
 89 | arch_library=neural_networks
 90 | arch_class=MLP
 91 | arch_pretrain_file=none
 92 | arch_freeze=False
 93 | arch_seq_model=False
 94 | 
 95 | dnn_lay=1024,1024,1024,1024,1024,N_out_lab_cd
 96 | dnn_drop=0.15,0.15,0.15,0.15,0.15,0.0
 97 | dnn_use_laynorm_inp=False
 98 | dnn_use_batchnorm_inp=False
 99 | dnn_use_batchnorm=True,True,True,True,True,False
100 | dnn_use_laynorm=False,False,False,False,False,False
101 | dnn_act=relu,relu,relu,relu,relu,softmax
102 | 
103 | arch_lr=0.08
104 | arch_halving_factor=0.5
105 | arch_improvement_threshold=0.001
106 | arch_opt=sgd
107 | opt_momentum=0.0
108 | opt_weight_decay=0.0
109 | opt_dampening=0.0
110 | opt_nesterov=False
111 | 
112 | 
113 | [model]
114 | model_proto=proto/model.proto
115 | model:out_dnn1=compute(MLP_layers,fmllr)
116 |       loss_final=cost_nll(out_dnn1,lab_cd)
117 |       err_final=cost_err(out_dnn1,lab_cd)
118 |  
119 |       
120 | [forward]
121 | forward_out=out_dnn1
122 | normalize_posteriors=True
123 | normalize_with_counts_from=lab_cd
124 | save_out_file=False
125 | require_decoding=True
126 | 
127 | 
128 | [decoding]
129 | decoding_script_folder=kaldi_decoding_scripts/
130 | decoding_script=decode_dnn.sh
131 | decoding_proto=proto/decoding.proto
132 | min_active=200
133 | max_active=7000
134 | max_mem=50000000
135 | beam=20.0
136 | latbeam=12.0
137 | acwt=0.10
138 | max_arcs=-1
139 | skip_scoring=false
140 | scoring_script=/scratch/ravanelm/exp/librispeech/s5/local/score.sh
141 | scoring_opts="--min-lmwt 4 --max-lmwt 23"
142 | norm_vars=False
143 | 
144 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/utils/format_lm_sri.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2012  Arnab Ghoshal
  4 | # Copyright 2010-2011  Microsoft Corporation
  5 | 
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #  http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
 16 | # See the Apache 2 License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | 
 19 | # Begin configuration section.
 20 | srilm_opts="-subset -prune-lowprobs -unk -tolower"
 21 | # end configuration sections
 22 | 
 23 | 
 24 | . utils/parse_options.sh
 25 | 
 26 | if [ $# -ne 4 ] && [ $# -ne 3 ]; then
 27 |   echo "Usage: $0 [options] <lang-dir> <arpa-LM> [<lexicon>] <out-dir>"
 28 |   echo "The <lexicon> argument is no longer needed but is supported for back compatibility"
 29 |   echo "E.g.: utils/format_lm_sri.sh data/lang data/local/lm/foo.kn.gz data/local/dict/lexicon.txt data/lang_test"
 30 |   echo "Converts ARPA-format language models to FSTs. Change the LM vocabulary using SRILM."
 31 |   echo "Note: if you want to just convert ARPA LMs to FSTs, there is a simpler way to do this"
 32 |   echo "that doesn't require SRILM: see examples in egs/wsj/s5/local/wsj_format_local_lms.sh"
 33 |   echo "options:"
 34 |   echo " --help                 # print this message and exit"
 35 |   echo " --srilm-opts STRING      # options to pass to SRILM tools (default: '$srilm_opts')"
 36 |   exit 1;
 37 | fi
 38 | 
 39 | 
 40 | if [ $# -eq 4 ] ; then
 41 |   lang_dir=$1
 42 |   lm=$2
 43 |   lexicon=$3
 44 |   out_dir=$4
 45 | else
 46 |   lang_dir=$1
 47 |   lm=$2
 48 |   out_dir=$3
 49 | fi
 50 | 
 51 | mkdir -p $out_dir
 52 | 
 53 | for f in $lm $lang_dir/words.txt; do
 54 |   if [ ! -f $f ]; then
 55 |     echo "$0: expected input file $f to exist."
 56 |     exit 1;
 57 |   fi
 58 | done
 59 | 
 60 | [ -f ./path.sh ] && . ./path.sh
 61 | 
 62 | loc=`which change-lm-vocab`
 63 | if [ -z $loc ]; then
 64 |   if uname -a | grep 64 >/dev/null; then # some kind of 64 bit...
 65 |     sdir=`pwd`/../../../tools/srilm/bin/i686-m64
 66 |   else
 67 |     sdir=`pwd`/../../../tools/srilm/bin/i686
 68 |   fi
 69 |   if [ -f $sdir/../change-lm-vocab ]; then
 70 |     echo Using SRILM tools from $sdir
 71 |     export PATH=$PATH:$sdir:$sdir/..
 72 |   else
 73 |     echo You appear to not have SRILM tools installed, either on your path,
 74 |     echo or installed in $sdir.  See tools/install_srilm.sh for installation
 75 |     echo instructions.
 76 |     exit 1
 77 |   fi
 78 | fi
 79 | 
 80 | echo "Converting '$lm' to FST"
 81 | tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
 82 | trap 'rm -rf "$tmpdir"' EXIT
 83 | 
 84 | mkdir -p $out_dir
 85 | cp -r $lang_dir/* $out_dir || exit 1;
 86 | 
 87 | lm_base=$(basename $lm '.gz')
 88 | gunzip -c $lm | utils/find_arpa_oovs.pl $out_dir/words.txt \
 89 |   > $out_dir/oovs_${lm_base}.txt || exit 1;
 90 | 
 91 | # Removing all "illegal" combinations of <s> and </s>, which are supposed to 
 92 | # occur only at being/end of utt.  These can cause determinization failures 
 93 | # of CLG [ends up being epsilon cycles].
 94 | gunzip -c $lm \
 95 |   | egrep -v '<s> <s>|</s> <s>|</s> </s>' \
 96 |   | gzip -c > $tmpdir/lm.gz || exit 1;
 97 | 
 98 | awk '{print $1}' $out_dir/words.txt > $tmpdir/voc || exit 1;
 99 | 
100 | # Change the LM vocabulary to be the intersection of the current LM vocabulary
101 | # and the set of words in the pronunciation lexicon. This also renormalizes the 
102 | # LM by recomputing the backoff weights, and remove those ngrams whose 
103 | # probabilities are lower than the backed-off estimates.
104 | change-lm-vocab -vocab $tmpdir/voc -lm $tmpdir/lm.gz -write-lm $tmpdir/out_lm \
105 |   $srilm_opts || exit 1;
106 | 
107 | arpa2fst $tmpdir/out_lm | fstprint \
108 |   | utils/eps2disambig.pl | utils/s2eps.pl \
109 |   | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
110 |     --keep_isymbols=false --keep_osymbols=false \
111 |   | fstrmepsilon | fstarcsort --sort_type=ilabel > $out_dir/G.fst || exit 1;
112 | 
113 | fstisstochastic $out_dir/G.fst
114 | 
115 | # The output is like:
116 | # 9.14233e-05 -0.259833
117 | # we do expect the first of these 2 numbers to be close to zero (the second is
118 | # nonzero because the backoff weights make the states sum to >1).
119 | 
120 | echo "Succeeded in formatting LM '$lm' -> '$out_dir/G.fst'"
121 | 


--------------------------------------------------------------------------------
/kaldi_decoding_scripts/local/nnet/run_dnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2012-2014  Brno University of Technology (Author: Karel Vesely)
  4 | # Apache 2.0
  5 | 
  6 | # This example script trains a DNN on top of fMLLR features. 
  7 | # The training is done in 3 stages,
  8 | #
  9 | # 1) RBM pre-training:
 10 | #    in this unsupervised stage we train stack of RBMs, 
 11 | #    a good starting point for frame cross-entropy trainig.
 12 | # 2) frame cross-entropy training:
 13 | #    the objective is to classify frames to correct pdfs.
 14 | # 3) sequence-training optimizing sMBR: 
 15 | #    the objective is to emphasize state-sequences with better 
 16 | #    frame accuracy w.r.t. reference alignment.
 17 | 
 18 | . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
 19 |            ## This relates to the queue.
 20 | 
 21 | . ./path.sh ## Source the tools/utils (import the queue.pl)
 22 | 
 23 | # Config:
 24 | gmmdir=exp/tri3
 25 | data_fmllr=data-fmllr-tri3
 26 | stage=2 # resume training with --stage=N
 27 | # End of config.
 28 | . utils/parse_options.sh || exit 1;
 29 | #
 30 | 
 31 | if [ $stage -le 0 ]; then
 32 |   # Store fMLLR features, so we can train on them easily,
 33 |   # test
 34 |   dir=$data_fmllr/test
 35 |   steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
 36 |      --transform-dir $gmmdir/decode_test \
 37 |      $dir data/test $gmmdir $dir/log $dir/data || exit 1
 38 |   # dev
 39 |   dir=$data_fmllr/dev
 40 |   steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
 41 |      --transform-dir $gmmdir/decode_dev \
 42 |      $dir data/dev $gmmdir $dir/log $dir/data || exit 1
 43 |   # train
 44 |   dir=$data_fmllr/train
 45 |   steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
 46 |      --transform-dir ${gmmdir}_ali \
 47 |      $dir data/train $gmmdir $dir/log $dir/data || exit 1
 48 |   # split the data : 90% train 10% cross-validation (held-out)
 49 |   utils/subset_data_dir_tr_cv.sh $dir ${dir}_tr90 ${dir}_cv10 || exit 1
 50 | fi
 51 | 
 52 | if [ $stage -le 1 ]; then
 53 |   # Pre-train DBN, i.e. a stack of RBMs (small database, smaller DNN)
 54 |   dir=exp/dnn4_pretrain-dbn
 55 |   (tail --pid=$$ -F $dir/log/pretrain_dbn.log 2>/dev/null)& # forward log
 56 |  # $cuda_cmd $dir/log/pretrain_dbn.log \
 57 |  #   steps/nnet/pretrain_dbn.sh --hid-dim 1024 --rbm-iter 20 $data_fmllr/train $dir || exit 1;
 58 | fi
 59 | 
 60 | if [ $stage -le 2 ]; then
 61 |   # Train the DNN optimizing per-frame cross-entropy.
 62 |   dir=exp/dnn4_pretrain-dbn_dnn
 63 |   ali=${gmmdir}_ali
 64 |   feature_transform=exp/dnn4_pretrain-dbn/final.feature_transform
 65 |   dbn=exp/dnn4_pretrain-dbn/6.dbn
 66 |   (tail --pid=$$ -F $dir/log/train_nnet.log 2>/dev/null)& # forward log
 67 |   # Train
 68 |   $cuda_cmd $dir/log/train_nnet.log \
 69 |   #  steps/nnet/train.sh --feature-transform $feature_transform --hid-layers 6 --learn-rate 0.008 \
 70 |   #  $data_fmllr/train_tr90 $data_fmllr/train_cv10 data/lang $ali $ali $dir || exit 1;
 71 |   # Decode (reuse HCLG graph)
 72 |   steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --acwt 0.2 \
 73 |     $gmmdir/graph $data_fmllr/test $dir/decode_test || exit 1;
 74 |   steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --acwt 0.2 \
 75 |     $gmmdir/graph $data_fmllr/dev $dir/decode_dev || exit 1;
 76 | fi
 77 | 
 78 | 
 79 | # Sequence training using sMBR criterion, we do Stochastic-GD 
 80 | # with per-utterance updates. We use usually good acwt 0.1
 81 | dir=exp/dnn4_pretrain-dbn_dnn_smbr
 82 | srcdir=exp/dnn4_pretrain-dbn_dnn
 83 | acwt=0.2
 84 | 
 85 | if [ $stage -le 3 ]; then
 86 |   # First we generate lattices and alignments:
 87 |   steps/nnet/align.sh --nj 20 --cmd "$train_cmd" \
 88 |     $data_fmllr/train data/lang $srcdir ${srcdir}_ali || exit 1;
 89 |   steps/nnet/make_denlats.sh --nj 20 --cmd "$decode_cmd" --acwt $acwt \
 90 |     --lattice-beam 10.0 --beam 18.0 \
 91 |     $data_fmllr/train data/lang $srcdir ${srcdir}_denlats || exit 1;
 92 | fi
 93 | 
 94 | if [ $stage -le 4 ]; then
 95 |   # Re-train the DNN by 6 iterations of sMBR 
 96 |   steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 6 --acwt $acwt \
 97 |     --do-smbr true \
 98 |     $data_fmllr/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
 99 |   # Decode
100 |   for ITER in 1 6; do
101 |     steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" \
102 |       --nnet $dir/${ITER}.nnet --acwt $acwt \
103 |       $gmmdir/graph $data_fmllr/test $dir/decode_test_it${ITER} || exit 1
104 |     steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" \
105 |       --nnet $dir/${ITER}.nnet --acwt $acwt \
106 |       $gmmdir/graph $data_fmllr/dev $dir/decode_dev_it${ITER} || exit 1
107 |   done 
108 | fi
109 | 
110 | echo Success
111 | exit 0
112 | 
113 | # Getting results [see RESULTS file]
114 | # for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
115 | 


--------------------------------------------------------------------------------