├── .gitignore
├── LICENSE
├── README.md
├── conf
├── mfcc.conf
└── topo_orig.proto
├── data_prep.py
├── lm
├── prepare_lm.sh
└── yesno-unigram.arpabo
├── local
└── score.sh
├── path.sh
├── steps
├── align_basis_fmllr.sh
├── align_fmllr.sh
├── align_fmllr_lats.sh
├── align_lvtln.sh
├── align_raw_fmllr.sh
├── align_sgmm.sh
├── align_sgmm2.sh
├── align_si.sh
├── append_feats.sh
├── cleanup
│ ├── clean_and_segment_data.sh
│ ├── combine_short_segments.py
│ ├── create_segments_from_ctm.pl
│ ├── debug_lexicon.sh
│ ├── decode_segmentation.sh
│ ├── find_bad_utts.sh
│ ├── find_bad_utts_nnet.sh
│ ├── get_ctm_edits.py
│ ├── get_non_scored_words.py
│ ├── internal
│ │ ├── get_ctm_edits.py
│ │ ├── get_non_scored_words.py
│ │ ├── make_one_biased_lm.py
│ │ ├── modify_ctm_edits.py
│ │ ├── segment_ctm_edits.py
│ │ └── taint_ctm_edits.py
│ ├── lattice_oracle_align.sh
│ ├── make_biased_lm_graphs.sh
│ ├── make_biased_lms.py
│ ├── make_one_biased_lm.py
│ ├── make_segmentation_data_dir.sh
│ ├── make_segmentation_graph.sh
│ ├── make_utterance_fsts.pl
│ ├── make_utterance_graph.sh
│ ├── modify_ctm_edits.py
│ ├── segment_ctm_edits.py
│ ├── split_long_utterance.sh
│ └── taint_ctm_edits.py
├── combine_ali_dirs.sh
├── compute_cmvn_stats.sh
├── conf
│ ├── append_eval_to_ctm.py
│ ├── append_prf_to_ctm.py
│ ├── apply_calibration.sh
│ ├── convert_ctm_to_tra.py
│ ├── lattice_depth_per_frame.sh
│ ├── parse_arpa_unigrams.py
│ ├── prepare_calibration_data.py
│ ├── prepare_word_categories.py
│ └── train_calibration.sh
├── data
│ ├── data_dir_manipulation_lib.py
│ └── reverberate_data_dir.py
├── decode.sh
├── decode_basis_fmllr.sh
├── decode_biglm.sh
├── decode_combine.sh
├── decode_fmllr.sh
├── decode_fmllr_extra.sh
├── decode_fmmi.sh
├── decode_fromlats.sh
├── decode_lvtln.sh
├── decode_nnet.sh
├── decode_nolats.sh
├── decode_raw_fmllr.sh
├── decode_sgmm.sh
├── decode_sgmm2.sh
├── decode_sgmm2_fromlats.sh
├── decode_sgmm2_rescore.sh
├── decode_sgmm2_rescore_project.sh
├── decode_sgmm_fromlats.sh
├── decode_sgmm_rescore.sh
├── decode_si.sh
├── decode_with_map.sh
├── diagnostic
│ ├── analyze_alignments.sh
│ ├── analyze_lats.sh
│ ├── analyze_lattice_depth_stats.py
│ └── analyze_phone_length_stats.py
├── get_ctm.sh
├── get_fmllr_basis.sh
├── get_lexicon_probs.sh
├── get_prons.sh
├── get_train_ctm.sh
├── info
│ ├── chain_dir_info.pl
│ ├── gmm_dir_info.pl
│ ├── nnet2_dir_info.pl
│ └── nnet3_dir_info.pl
├── kl_hmm
│ ├── build_tree.sh
│ ├── decode_kl_hmm.sh
│ └── train_kl_hmm.sh
├── lmrescore.sh
├── lmrescore_const_arpa.sh
├── lmrescore_rnnlm_lat.sh
├── make_denlats.sh
├── make_denlats_sgmm.sh
├── make_denlats_sgmm2.sh
├── make_fbank.sh
├── make_fbank_pitch.sh
├── make_index.sh
├── make_mfcc.sh
├── make_mfcc_pitch.sh
├── make_mfcc_pitch_online.sh
├── make_phone_graph.sh
├── make_plp.sh
├── make_plp_pitch.sh
├── mixup.sh
├── nnet
│ ├── align.sh
│ ├── decode.sh
│ ├── make_bn_feats.sh
│ ├── make_denlats.sh
│ ├── make_fmllr_feats.sh
│ ├── make_fmmi_feats.sh
│ ├── make_priors.sh
│ ├── pretrain_dbn.sh
│ ├── train.sh
│ ├── train_mmi.sh
│ ├── train_mpe.sh
│ └── train_scheduler.sh
├── nnet2
│ ├── adjust_priors.sh
│ ├── align.sh
│ ├── convert_lda_to_raw.sh
│ ├── convert_nnet1_to_nnet2.sh
│ ├── create_appended_model.sh
│ ├── decode.sh
│ ├── dump_bottleneck_features.sh
│ ├── get_egs.sh
│ ├── get_egs2.sh
│ ├── get_egs_discriminative2.sh
│ ├── get_lda.sh
│ ├── get_lda_block.sh
│ ├── get_num_frames.sh
│ ├── get_perturbed_feats.sh
│ ├── make_denlats.sh
│ ├── make_multisplice_configs.py
│ ├── relabel_egs.sh
│ ├── relabel_egs2.sh
│ ├── remove_egs.sh
│ ├── retrain_fast.sh
│ ├── retrain_simple2.sh
│ ├── retrain_tanh.sh
│ ├── train_block.sh
│ ├── train_convnet_accel2.sh
│ ├── train_discriminative.sh
│ ├── train_discriminative2.sh
│ ├── train_discriminative_multilang2.sh
│ ├── train_more.sh
│ ├── train_more2.sh
│ ├── train_multilang2.sh
│ ├── train_multisplice_accel2.sh
│ ├── train_multisplice_ensemble.sh
│ ├── train_pnorm.sh
│ ├── train_pnorm_accel2.sh
│ ├── train_pnorm_bottleneck_fast.sh
│ ├── train_pnorm_ensemble.sh
│ ├── train_pnorm_fast.sh
│ ├── train_pnorm_multisplice.sh
│ ├── train_pnorm_multisplice2.sh
│ ├── train_pnorm_simple.sh
│ ├── train_pnorm_simple2.sh
│ ├── train_tanh.sh
│ ├── train_tanh_bottleneck.sh
│ ├── train_tanh_fast.sh
│ └── update_nnet.sh
├── nnet3
│ ├── adjust_priors.sh
│ ├── align.sh
│ ├── chain
│ │ ├── build_tree.sh
│ │ ├── gen_topo.pl
│ │ ├── gen_topo.py
│ │ ├── gen_topo2.py
│ │ ├── gen_topo3.py
│ │ ├── gen_topo4.py
│ │ ├── gen_topo5.py
│ │ ├── get_egs.sh
│ │ ├── nnet3_chain_lib.py
│ │ ├── train.py
│ │ └── train_tdnn.sh
│ ├── components.py
│ ├── decode.sh
│ ├── dot
│ │ ├── descriptor_parser.py
│ │ └── nnet3_to_dot.py
│ ├── get_egs.sh
│ ├── get_egs_discriminative.sh
│ ├── get_egs_targets.sh
│ ├── get_successful_models.py
│ ├── lstm
│ │ ├── make_configs.py
│ │ └── train.sh
│ ├── make_denlats.sh
│ ├── make_jesus_configs.py
│ ├── make_tdnn_configs.py
│ ├── nnet3_to_dot.sh
│ ├── nnet3_train_lib.py
│ ├── report
│ │ ├── generate_plots.py
│ │ └── nnet3_log_parse_lib.py
│ ├── tdnn
│ │ ├── make_configs.py
│ │ ├── train.sh
│ │ └── train_raw_nnet.sh
│ ├── train_discriminative.sh
│ ├── train_dnn.py
│ ├── train_rnn.py
│ └── train_tdnn.sh
├── online
│ ├── decode.sh
│ ├── nnet2
│ │ ├── align.sh
│ │ ├── copy_data_dir.sh
│ │ ├── decode.sh
│ │ ├── dump_nnet_activations.sh
│ │ ├── extract_ivectors.sh
│ │ ├── extract_ivectors_online.sh
│ │ ├── get_egs.sh
│ │ ├── get_egs2.sh
│ │ ├── get_egs_discriminative2.sh
│ │ ├── make_denlats.sh
│ │ ├── prepare_online_decoding.sh
│ │ ├── prepare_online_decoding_retrain.sh
│ │ ├── prepare_online_decoding_transfer.sh
│ │ ├── train_diag_ubm.sh
│ │ └── train_ivector_extractor.sh
│ ├── nnet3
│ │ ├── decode.sh
│ │ └── prepare_online_decoding.sh
│ └── prepare_online_decoding.sh
├── oracle_wer.sh
├── paste_feats.sh
├── resegment_data.sh
├── resegment_text.sh
├── rnnlmrescore.sh
├── score_kaldi.sh
├── score_kaldi_compare.sh
├── search_index.sh
├── select_feats.sh
├── shift_feats.sh
├── tandem
│ ├── align_fmllr.sh
│ ├── align_sgmm.sh
│ ├── align_sgmm2.sh
│ ├── align_si.sh
│ ├── decode.sh
│ ├── decode_fmllr.sh
│ ├── decode_sgmm.sh
│ ├── decode_sgmm2.sh
│ ├── decode_si.sh
│ ├── make_denlats.sh
│ ├── make_denlats_sgmm.sh
│ ├── make_denlats_sgmm2.sh
│ ├── mk_aslf_lda_mllt.sh
│ ├── mk_aslf_sgmm2.sh
│ ├── train_deltas.sh
│ ├── train_lda_mllt.sh
│ ├── train_mllt.sh
│ ├── train_mmi.sh
│ ├── train_mmi_sgmm.sh
│ ├── train_mmi_sgmm2.sh
│ ├── train_mono.sh
│ ├── train_sat.sh
│ ├── train_sgmm.sh
│ ├── train_sgmm2.sh
│ └── train_ubm.sh
├── train_deltas.sh
├── train_diag_ubm.sh
├── train_lda_mllt.sh
├── train_lvtln.sh
├── train_map.sh
├── train_mmi.sh
├── train_mmi_fmmi.sh
├── train_mmi_fmmi_indirect.sh
├── train_mmi_sgmm.sh
├── train_mmi_sgmm2.sh
├── train_mono.sh
├── train_mpe.sh
├── train_nnet.sh
├── train_quick.sh
├── train_raw_sat.sh
├── train_sat.sh
├── train_sat_basis.sh
├── train_segmenter.sh
├── train_sgmm.sh
├── train_sgmm2.sh
├── train_sgmm2_group.sh
├── train_smbr.sh
├── train_ubm.sh
└── word_align_lattices.sh
├── utils
├── add_disambig.pl
├── add_lex_disambig.pl
├── analyze_segments.pl
├── apply_map.pl
├── best_wer.sh
├── build_const_arpa_lm.sh
├── combine_data.sh
├── convert_ctm.pl
├── convert_slf.pl
├── convert_slf_parallel.sh
├── copy_data_dir.sh
├── create_data_link.pl
├── create_split_dir.pl
├── data
│ ├── combine_data.sh
│ ├── combine_short_segments.sh
│ ├── copy_data_dir.sh
│ ├── extend_segment_times.py
│ ├── fix_data_dir.sh
│ ├── get_frame_shift.sh
│ ├── get_num_frames.sh
│ ├── get_segments_for_data.sh
│ ├── get_utt2dur.sh
│ ├── internal
│ │ ├── choose_utts_to_combine.py
│ │ └── modify_speaker_info.py
│ ├── modify_speaker_info.sh
│ ├── normalize_data_range.pl
│ ├── perturb_data_dir_speed.sh
│ ├── perturb_data_dir_speed_3way.sh
│ ├── perturb_data_dir_volume.sh
│ ├── remove_dup_utts.sh
│ ├── split_data.sh
│ ├── subsegment_data_dir.sh
│ ├── subset_data_dir.sh
│ └── validate_data_dir.sh
├── dict_dir_add_pronprobs.sh
├── eps2disambig.pl
├── filt.py
├── filter_scp.pl
├── filter_scps.pl
├── find_arpa_oovs.pl
├── fix_ctm.sh
├── fix_data_dir.sh
├── format_lm.sh
├── format_lm_sri.sh
├── gen_topo.pl
├── int2sym.pl
├── kwslist_post_process.pl
├── lang
│ ├── add_lex_disambig.pl
│ ├── check_g_properties.pl
│ ├── check_phones_compatible.sh
│ ├── prepare_lang.sh
│ └── validate_lang.pl
├── ln.pl
├── make_lexicon_fst.pl
├── make_lexicon_fst_silprob.pl
├── make_phone_bigram_lang.sh
├── make_unigram_grammar.pl
├── map_arpa_lm.pl
├── mkgraph.sh
├── nnet-cpu
│ ├── make_nnet_config.pl
│ ├── make_nnet_config_block.pl
│ ├── make_nnet_config_preconditioned.pl
│ └── update_learning_rates.pl
├── nnet
│ ├── gen_dct_mat.py
│ ├── gen_hamm_mat.py
│ ├── gen_splice.py
│ ├── make_blstm_proto.py
│ ├── make_cnn2d_proto.py
│ ├── make_cnn_proto.py
│ ├── make_lstm_proto.py
│ └── make_nnet_proto.py
├── parse_options.sh
├── pbs.pl
├── perturb_data_dir_speed.sh
├── pinyin_map.pl
├── prepare_lang.sh
├── prepare_online_nnet_dist_build.sh
├── queue.pl
├── reduce_data_dir.sh
├── reduce_data_dir_by_reclist.sh
├── remove_data_links.sh
├── remove_oovs.pl
├── reverse_arpa.py
├── rnnlm_compute_scores.sh
├── run.pl
├── s2eps.pl
├── scoring
│ ├── wer_ops_details.pl
│ ├── wer_per_spk_details.pl
│ ├── wer_per_utt_details.pl
│ └── wer_report.pl
├── segmentation.pl
├── show_lattice.sh
├── shuffle_list.pl
├── slurm.pl
├── spk2utt_to_utt2spk.pl
├── split_data.sh
├── split_scp.pl
├── ssh.pl
├── subset_data_dir.sh
├── subset_data_dir_tr_cv.sh
├── subset_scp.pl
├── summarize_logs.pl
├── summarize_warnings.pl
├── sym2int.pl
├── utt2spk_to_spk2utt.pl
├── validate_data_dir.sh
├── validate_dict_dir.pl
├── validate_lang.pl
└── write_kwslist.pl
└── waves_yesno
├── 0_0_0_0_1_1_1_1.wav
├── 0_0_0_1_0_0_0_1.wav
├── 0_0_0_1_0_1_1_0.wav
├── 0_0_1_0_0_0_1_0.wav
├── 0_0_1_0_0_1_1_0.wav
├── 0_0_1_0_0_1_1_1.wav
├── 0_0_1_0_1_0_0_0.wav
├── 0_0_1_0_1_0_0_1.wav
├── 0_0_1_0_1_0_1_1.wav
├── 0_0_1_1_0_0_0_1.wav
├── 0_0_1_1_0_1_0_0.wav
├── 0_0_1_1_0_1_1_0.wav
├── 0_0_1_1_0_1_1_1.wav
├── 0_0_1_1_1_0_0_0.wav
├── 0_0_1_1_1_0_0_1.wav
├── 0_0_1_1_1_1_0_0.wav
├── 0_0_1_1_1_1_1_0.wav
├── 0_1_0_0_0_1_0_0.wav
├── 0_1_0_0_0_1_1_0.wav
├── 0_1_0_0_1_0_1_0.wav
├── 0_1_0_0_1_0_1_1.wav
├── 0_1_0_1_0_0_0_0.wav
├── 0_1_0_1_1_0_1_0.wav
├── 0_1_0_1_1_1_0_0.wav
├── 0_1_1_0_0_1_1_0.wav
├── 0_1_1_0_0_1_1_1.wav
├── 0_1_1_1_0_0_0_0.wav
├── 0_1_1_1_0_0_1_0.wav
├── 0_1_1_1_0_1_0_1.wav
├── 0_1_1_1_1_0_1_0.wav
├── 0_1_1_1_1_1_1_1.wav
├── 1_0_0_0_0_0_0_0.wav
├── 1_0_0_0_0_0_0_1.wav
├── 1_0_0_0_0_0_1_1.wav
├── 1_0_0_0_1_0_0_1.wav
├── 1_0_0_1_0_1_1_1.wav
├── 1_0_1_0_1_0_0_1.wav
├── 1_0_1_1_0_1_1_1.wav
├── 1_0_1_1_1_0_1_0.wav
├── 1_0_1_1_1_1_0_1.wav
├── 1_1_0_0_0_0_0_1.wav
├── 1_1_0_0_0_1_1_1.wav
├── 1_1_0_0_1_0_1_0.wav
├── 1_1_0_0_1_0_1_1.wav
├── 1_1_0_0_1_1_1_0.wav
├── 1_1_0_1_0_1_0_0.wav
├── 1_1_0_1_0_1_1_0.wav
├── 1_1_0_1_1_0_0_1.wav
├── 1_1_0_1_1_0_1_1.wav
├── 1_1_0_1_1_1_1_0.wav
├── 1_1_1_0_0_0_0_1.wav
├── 1_1_1_0_0_1_0_1.wav
├── 1_1_1_0_0_1_1_1.wav
├── 1_1_1_0_1_0_1_0.wav
├── 1_1_1_0_1_0_1_1.wav
├── 1_1_1_1_0_0_1_0.wav
├── 1_1_1_1_0_1_0_0.wav
├── 1_1_1_1_1_0_0_0.wav
├── 1_1_1_1_1_1_0_0.wav
├── 1_1_1_1_1_1_1_1.wav
└── README
/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false # only non-default option.
2 | --sample-frequency=8000 # Switchboard is sampled at 8kHz
3 |
--------------------------------------------------------------------------------
/conf/topo_orig.proto:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | NONSILENCEPHONES
5 |
6 | 0 0 0 0.75 1 0.25
7 | 1 1 1 0.75 2 0.25
8 | 2 2 2 0.75 3 0.25
9 | 3
10 |
11 |
12 |
13 | SILENCEPHONES
14 |
15 | 0 0 0 0.25 1 0.25 2 0.25 3 0.25
16 | 1 1 1 0.25 2 0.25 3 0.25 4 0.25
17 | 2 2 1 0.25 2 0.25 3 0.25 4 0.25
18 | 3 3 1 0.25 2 0.25 3 0.25 4 0.25
19 | 4 4 4 0.25 5 0.75
20 | 5
21 |
22 |
23 |
--------------------------------------------------------------------------------
/data_prep.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 |
3 | import os
4 | import os.path
5 | import sys
6 |
7 | zeroes = []
8 | ones = []
9 | for fn in os.listdir('waves_yesno'):
10 | if fn.startswith('0'):
11 | zeroes.append(fn) # => training set
12 | elif fn.startswith('1'):
13 | ones.append(fn) # => test set
14 |
15 | def text(filenames):
16 | results = []
17 | for filename in filenames:
18 | basename = filename.split('.')[0]
19 | transcript = basename.replace('1', 'YES').replace('0', 'NO').replace('_', " ")
20 | results.append("{} {}".format(basename.split('.')[0], transcript))
21 |
22 | return '\n'.join(sorted(results))
23 |
24 | with open('data/train_yesno/text', 'w') as train_text, open('data/test_yesno/text', 'w') as test_text:
25 | train_text.write(text(zeroes))
26 | test_text.write(text(ones))
27 |
28 | # finish this method
29 | def wav_scp():
30 | pass
31 |
32 | with open('data/train_yesno/wav.scp', 'w') as train_text, open('data/test_yesno/wav.scp', 'w') as test_text:
33 | train_text.write(wav_scp(zeroes))
34 | test_text.write(wav_scp(ones))
35 |
36 |
37 | # finish this method
38 | def utt2spk():
39 | pass
40 |
41 | with open('data/train_yesno/utt2spk', 'w') as train_text, open('data/test_yesno/utt2spk', 'w') as test_text:
42 | train_text.write(utt2spk(zeroes))
43 | test_text.write(utt2spk(ones))
44 |
45 |
46 | # finish this method
47 | # note that, spk2utt can be generated by using Kaldi util, once you have utt2spk file.
48 | def spk2utt():
49 | pass
50 |
--------------------------------------------------------------------------------
/lm/prepare_lm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | . path.sh
4 |
5 | echo Preparing language models for test
6 |
7 | for lm_suffix in tg; do
8 | test=data/lang_test_${lm_suffix}
9 |
10 | rm -rf data/lang_test_${lm_suffix}
11 | cp -r data/lang data/lang_test_${lm_suffix}
12 |
13 | arpa2fst --disambig-symbol=#0 --read-symbol-table=$test/words.txt lm/task.arpabo $test/G.fst
14 |
15 | fstisstochastic $test/G.fst
16 |
17 | # The output is like:
18 | # 9.14233e-05 -0.259833
19 | # we do expect the first of these 2 numbers to be close to zero (the second is
20 | # nonzero because the backoff weights make the states sum to >1).
21 | # Because of the fiasco for these particular LMs, the first number is not
22 | # as close to zero as it could be.
23 |
24 | # Everything below is only for diagnostic.
25 | # Checking that G has no cycles with empty words on them (e.g. , );
26 | # this might cause determinization failure of CLG.
27 | # #0 is treated as an empty word.
28 | mkdir -p tmpdir.g
29 | awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \
30 | < dict/lexicon.txt >tmpdir.g/select_empty.fst.txt
31 | fstcompile --isymbols=$test/words.txt --osymbols=$test/words.txt tmpdir.g/select_empty.fst.txt | \
32 | fstarcsort --sort_type=olabel | fstcompose - $test/G.fst > tmpdir.g/empty_words.fst
33 | fstinfo tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' &&
34 | echo "Language model has cycles with empty words" && exit 1
35 | rm -r tmpdir.g
36 | done
37 |
38 | echo "Succeeded in formatting data."
39 |
--------------------------------------------------------------------------------
/lm/yesno-unigram.arpabo:
--------------------------------------------------------------------------------
1 |
2 | \data\
3 | ngram 1=4
4 |
5 | \1-grams:
6 | -1 NO
7 | -1 YES
8 | -99
9 | -1
10 |
11 | \end\
12 |
--------------------------------------------------------------------------------
/local/score.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
3 | # Apache 2.0
4 |
5 | [ -f ./path.sh ] && . ./path.sh
6 |
7 | # begin configuration section.
8 | cmd=run.pl
9 | stage=0
10 | decode_mbr=true
11 | word_ins_penalty=0.0
12 | min_lmwt=7
13 | max_lmwt=11
14 | #end configuration section.
15 |
16 | [ -f ./path.sh ] && . ./path.sh
17 | . parse_options.sh || exit 1;
18 |
19 | if [ $# -ne 3 ]; then
20 | echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] "
21 | echo " Options:"
22 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
23 | echo " --stage (0|1|2) # start scoring script from part-way through."
24 | echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)."
25 | echo " --min_lmwt # minumum LM-weight for lattice rescoring "
26 | echo " --max_lmwt # maximum LM-weight for lattice rescoring "
27 | exit 1;
28 | fi
29 |
30 | data=$1
31 | lang_or_graph=$2
32 | dir=$3
33 |
34 | symtab=$lang_or_graph/words.txt
35 |
36 | for f in $symtab $dir/lat.1.gz $data/text; do
37 | [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
38 | done
39 |
40 | mkdir -p $dir/scoring/log
41 |
42 | cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt
43 |
44 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \
45 | lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
46 | lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \
47 | lattice-best-path --word-symbol-table=$symtab \
48 | ark:- ark,t:$dir/scoring/LMWT.tra || exit 1;
49 |
50 | # Note: the double level of quoting for the sed command
51 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
52 | cat $dir/scoring/LMWT.tra \| \
53 | utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \
54 | compute-wer --text --mode=present \
55 | ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1;
56 |
57 | exit 0;
58 |
--------------------------------------------------------------------------------
/path.sh:
--------------------------------------------------------------------------------
1 | export KALDI_ROOT=YOUR_KALDI_PATH
2 | [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
3 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
4 | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
5 | . $KALDI_ROOT/tools/config/common_path.sh
6 | export LC_ALL=C
7 |
8 |
9 |
--------------------------------------------------------------------------------
/steps/append_feats.sh:
--------------------------------------------------------------------------------
1 | paste_feats.sh
--------------------------------------------------------------------------------
/steps/cleanup/make_utterance_fsts.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | use warnings; #sed replacement for -w perl parameter
3 |
4 | # makes unigram decoding-graph FSTs specific to each utterances, where the
5 | # supplied top-n-words list together with the supervision text of the utterance are
6 | # combined.
7 |
8 | if (@ARGV != 1) {
9 | print STDERR "** Warning: this script is deprecated and will be removed. See\n" .
10 | "** steps/cleanup/make_biased_lm_graphs.sh.\n" .
11 | "Usage: make_utterance_fsts.pl top-words-file.txt < text-archive > fsts-archive\n" .
12 | "e.g.: utils/sym2int.pl -f 2- data/lang/words.txt data/train/text | \\\n" .
13 | " make_utterance_fsts.pl exp/foo/top_words.int | compile-train-graphs-fsts ... \n";
14 | exit(1);
15 | }
16 |
17 | ($top_words_file) = @ARGV;
18 |
19 | open(F, "<$top_words_file") || die "opening $top_words_file";
20 |
21 | %top_word_probs = ( );
22 |
23 | while() {
24 | @A = split;
25 | (@A == 2 && $A[0] > 0.0) || die "Bad line $_ in $top_words_file";
26 | $A[1] =~ m/^[0-9]+$/ || die "Expecting numeric word-ids in $top_words_file: $_\n";
27 | $top_word_probs{$A[1]} += $A[0];
28 | }
29 |
30 | while () {
31 | @A = split;
32 | $utterance_id = shift @A;
33 | print "$utterance_id\n";
34 | $num_words = @A + 0; # length of array @A
35 | %word_probs = %top_word_probs;
36 | foreach $w (@A) {
37 | $w =~ m/^[0-9]+$/ || die "Expecting numeric word-ids as stdin: $_";
38 | $word_probs{$w} += 1.0 / $num_words;
39 | }
40 | foreach $w (keys %word_probs) {
41 | $prob = $word_probs{$w};
42 | $prob > 0.0 || die "Word $w with bad probability $prob, utterance-id = $utterance_id\n";
43 | $cost = -log($prob);
44 | print "0 0 $w $w $cost\n";
45 | }
46 | $final_cost = -log(1.0 / $num_words);
47 | print "0 $final_cost\n";
48 | print "\n"; # Empty line terminates the FST in the text-archive format.
49 | }
50 |
--------------------------------------------------------------------------------
/steps/combine_ali_dirs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2016 Xiaohui Zhang Apache 2.0.
3 |
4 | # This srcipt operates on alignment directories, such as exp/tri4a_ali
5 | # the output is a new ali dir which has alignments from all the input ali dirs
6 |
7 | # Begin configuration section.
8 | cmd=run.pl
9 | extra_files=
10 | num_jobs=4
11 | # End configuration section.
12 | echo "$0 $@" # Print the command line for logging
13 |
14 | if [ -f path.sh ]; then . ./path.sh; fi
15 | . parse_options.sh || exit 1;
16 |
17 | if [[ $# -lt 3 ]]; then
18 | echo "Usage: $0 [options] ..."
19 | echo "e.g.: $0 --num-jobs 32 data/train exp/tri3_ali_combined exp/tri3_ali_1 exp_tri3_ali_2"
20 | echo "Options:"
21 | echo " --extra-files # specify addtional files in 'src-ali-dir1' to copy"
22 | echo " --num-jobs # number of jobs used to split the data directory."
23 | echo " Note, files that don't appear in the first source dir will not be added even if they appear in later ones."
24 | echo " Other than alignments, only files from the first src ali dir are copied."
25 | exit 1;
26 | fi
27 |
28 | data=$1;
29 | shift;
30 | dest=$1;
31 | shift;
32 | first_src=$1;
33 |
34 | mkdir -p $dest;
35 | rm $dest/{ali.*.gz,num_jobs} 2>/dev/null
36 |
37 | cp $first_src/phones.txt $dest || exit 1;
38 |
39 | export LC_ALL=C
40 |
41 | for dir in $*; do
42 | if [ ! -f $dir/ali.1.gz ]; then
43 | echo "$0: check if alignments (ali.*.gz) are present in $dir."
44 | exit 1;
45 | fi
46 | done
47 |
48 | for dir in $*; do
49 | for f in tree; do
50 | diff $first_src/$f $dir/$f 1>/dev/null 2>&1
51 | if [ $? -ne 0 ]; then
52 | echo "$0: Cannot combine alignment directories with different $f files."
53 | fi
54 | done
55 | done
56 |
57 | for f in final.mdl tree cmvn_opts num_jobs $extra_files; do
58 | if [ ! -f $first_src/$f ]; then
59 | echo "combine_ali_dir.sh: no such file $first_src/$f"
60 | exit 1;
61 | fi
62 | cp $first_src/$f $dest/
63 | done
64 |
65 | src_id=0
66 | temp_dir=$dest/temp
67 | [ -d $temp_dir ] && rm -r $temp_dir;
68 | mkdir -p $temp_dir
69 | echo "$0: dumping alignments in each source directory as single archive and index."
70 | for dir in $*; do
71 | src_id=$((src_id + 1))
72 | cur_num_jobs=$(cat $dir/num_jobs) || exit 1;
73 | alis=$(for n in $(seq $cur_num_jobs); do echo -n "$dir/ali.$n.gz "; done)
74 | $cmd $dir/log/copy_alignments.log \
75 | copy-int-vector "ark:gunzip -c $alis|" \
76 | ark,scp:$temp_dir/ali.$src_id.ark,$temp_dir/ali.$src_id.scp || exit 1;
77 | done
78 | sort -m $temp_dir/ali.*.scp > $temp_dir/ali.scp || exit 1;
79 |
80 | echo "$0: splitting data to get reference utt2spk for individual ali.JOB.gz files."
81 | utils/split_data.sh $data $num_jobs || exit 1;
82 |
83 | echo "$0: splitting the alignments to appropriate chunks according to the reference utt2spk files."
84 | utils/filter_scps.pl JOB=1:$num_jobs \
85 | $data/split$num_jobs/JOB/utt2spk $temp_dir/ali.scp $temp_dir/ali.JOB.scp
86 |
87 | for i in `seq 1 $num_jobs`; do
88 | copy-int-vector scp:$temp_dir/ali.${i}.scp "ark:|gzip -c >$dest/ali.$i.gz" || exit 1;
89 | done
90 |
91 | echo $num_jobs > $dest/num_jobs || exit 1
92 |
93 | echo "$0: checking the alignment files generated have at least 90% of the utterances."
94 | for i in `seq 1 $num_jobs`; do
95 | num_lines=`cat $temp_dir/ali.$i.scp | wc -l` || exit 1;
96 | num_lines_tot=`cat $data/split$num_jobs/$i/utt2spk | wc -l` || exit 1;
97 | python -c "import sys;
98 | percent = 100.0 * float($num_lines) / $num_lines_tot
99 | if percent < 90 :
100 | print ('$dest/ali.$i.gz {0}% utterances missing.'.format(percent))" || exit 1;
101 | done
102 | rm -r $temp_dir 2>/dev/null
103 |
104 | echo "Combined alignments and stored in $dest"
105 | exit 0
106 |
--------------------------------------------------------------------------------
/steps/conf/append_eval_to_ctm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright 2015 Brno University of Technology (author: Karel Vesely)
4 | # Apache 2.0
5 |
6 | import sys,operator
7 |
8 | # Append Levenshtein alignment of 'hypothesis' and 'reference' into 'CTM':
9 | # (i.e. the output of 'align-text' post-processed by 'wer_per_utt_details.pl')
10 |
11 | # The tags in the appended column are:
12 | # 'C' = correct
13 | # 'S' = substitution
14 | # 'I' = insertion
15 | # 'U' = unknown (not part of scored segment)
16 |
17 | if len(sys.argv) != 4:
18 | print 'Usage: %s eval-in ctm-in ctm-eval-out' % __file__
19 | sys.exit(1)
20 | dummy, eval_in, ctm_in, ctm_eval_out = sys.argv
21 |
22 | if ctm_eval_out == '-': ctm_eval_out = '/dev/stdout'
23 |
24 | # Read the evalutation,
25 | eval_vec = dict()
26 | with open(eval_in, 'r') as f:
27 | while True:
28 | # Reading 4 lines encoding one utterance,
29 | ref = f.readline()
30 | hyp = f.readline()
31 | op = f.readline()
32 | csid = f.readline()
33 | if not ref: break
34 | # Parse the input,
35 | utt,tag,hyp_vec = hyp.split(' ',2)
36 | assert(tag == 'hyp')
37 | utt,tag,op_vec = op.split(' ',2)
38 | assert(tag == 'op')
39 | hyp_vec = hyp_vec.split()
40 | op_vec = op_vec.split()
41 | # Fill create eval vector with symbols 'C', 'S', 'I',
42 | assert(utt not in eval_vec)
43 | eval_vec[utt] = []
44 | for op,hyp in zip(op_vec, hyp_vec):
45 | if hyp != '': eval_vec[utt].append(op)
46 |
47 | # Load the 'ctm' into dictionary,
48 | ctm = dict()
49 | with open(ctm_in) as f:
50 | for l in f:
51 | utt, ch, beg, dur, wrd, conf = l.split()
52 | if not utt in ctm: ctm[utt] = []
53 | ctm[utt].append((utt, ch, float(beg), float(dur), wrd, float(conf)))
54 |
55 | # Build the 'ctm' with 'eval' column added,
56 | ctm_eval = []
57 | for utt,ctm_part in ctm.iteritems():
58 | ctm_part.sort(key = operator.itemgetter(2)) # Sort by 'beg' time,
59 | # extending the 'tuple' by '+':
60 | merged = [ tup + (evl,) for tup,evl in zip(ctm_part,eval_vec[utt]) ]
61 | ctm_eval.extend(merged)
62 |
63 | # Sort again,
64 | ctm_eval.sort(key = operator.itemgetter(0,1,2))
65 |
66 | # Store,
67 | with open(ctm_eval_out,'w') as f:
68 | for tup in ctm_eval:
69 | f.write('%s %s %f %f %s %f %s\n' % tup)
70 |
71 |
--------------------------------------------------------------------------------
/steps/conf/append_prf_to_ctm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright 2015 Brno University of Technology (author: Karel Vesely)
4 | # Apache 2.0
5 |
6 | import sys
7 |
8 | # Append Levenshtein alignment of 'hypothesis' and 'reference' into 'CTM':
9 | # (parsed from the 'prf' output of 'sclite')
10 |
11 | # The tags in appended column are:
12 | # 'C' = correct
13 | # 'S' = substitution
14 | # 'I' = insertion
15 | # 'U' = unknown (not part of scored segment)
16 |
17 | # Parse options,
18 | if len(sys.argv) != 4:
19 | print "Usage: %s prf ctm_in ctm_out" % __file__
20 | sys.exit(1)
21 | prf_file, ctm_file, ctm_out_file = sys.argv[1:]
22 |
23 | if ctm_out_file == '-': ctm_out_file = '/dev/stdout'
24 |
25 | # Load the prf file,
26 | prf = []
27 | with open(prf_file) as f:
28 | for l in f:
29 | # Store the data,
30 | if l[:5] == 'File:':
31 | file_id = l.split()[1]
32 | if l[:8] == 'Channel:':
33 | chan = l.split()[1]
34 | if l[:5] == 'H_T1:':
35 | h_t1 = l
36 | if l[:5] == 'Eval:':
37 | evl = l
38 | prf.append((file_id,chan,h_t1,evl))
39 |
40 | # Parse the prf records into dictionary,
41 | prf_dict = dict()
42 | for (f,c,t,e) in prf:
43 | t_pos = 0 # position in the 't' string,
44 | while t_pos < len(t):
45 | t1 = t[t_pos:].split(' ',1)[0] # get 1st token at 't_pos'
46 | try:
47 | # get word evaluation letter 'C,S,I',
48 | evl = e[t_pos] if e[t_pos] != ' ' else 'C'
49 | # add to dictionary,
50 | key='%s,%s' % (f,c) # file,channel
51 | if key not in prf_dict: prf_dict[key] = dict()
52 | prf_dict[key][float(t1)] = evl
53 | except ValueError:
54 | pass
55 | t_pos += len(t1)+1 # advance position for parsing,
56 |
57 | # Load the ctm file (with confidences),
58 | with open(ctm_file) as f:
59 | ctm = [ l.split() for l in f ]
60 |
61 | # Append the sclite alignment tags to ctm,
62 | ctm_out = []
63 | for f, chan, beg, dur, wrd, conf in ctm:
64 | # U = unknown, C = correct, S = substitution, I = insertion,
65 | sclite_tag = 'U'
66 | try:
67 | sclite_tag = prf_dict[('%s,%s'%(f,chan)).lower()][float(beg)]
68 | except KeyError:
69 | pass
70 | ctm_out.append([f,chan,beg,dur,wrd,conf,sclite_tag])
71 |
72 | # Save the augmented ctm file,
73 | with open(ctm_out_file, 'w') as f:
74 | f.writelines([' '.join(ctm_record)+'\n' for ctm_record in ctm_out])
75 |
76 |
--------------------------------------------------------------------------------
/steps/conf/apply_calibration.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2015, Brno University of Technology (Author: Karel Vesely). Apache 2.0.
3 |
4 | # Trains logistic regression, which calibrates the per-word confidences,
5 | # which are extracted by the Minimum Bayes Risk decoding.
6 |
7 | # begin configuration section.
8 | cmd=
9 | stage=0
10 | # end configuration section.
11 |
12 | [ -f ./path.sh ] && . ./path.sh
13 | . parse_options.sh || exit 1;
14 |
15 | if [ $# -ne 5 ]; then
16 | echo "Usage: $0 [opts] "
17 | echo " Options:"
18 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
19 | exit 1;
20 | fi
21 |
22 | set -euo pipefail
23 |
24 | data=$1
25 | lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
26 | latdir=$3
27 | caldir=$4
28 | dir=$5
29 |
30 | model=$latdir/../final.mdl # assume model one level up from decoding dir.
31 | calibration=$caldir/calibration.mdl
32 | word_feats=$caldir/word_feats
33 | word_categories=$caldir/word_categories
34 |
35 | for f in $lang/words.txt $word_feats $word_categories $latdir/lat.1.gz $calibration $model; do
36 | [ ! -f $f ] && echo "$0: Missing file $f" && exit 1
37 | done
38 | [ -z "$cmd" ] && echo "$0: Missing --cmd '...'" && exit 1
39 |
40 | [ -d $dir/log ] || mkdir -p $dir/log
41 | nj=$(cat $latdir/num_jobs)
42 | lmwt=$(cat $caldir/lmwt)
43 | decode_mbr=$(cat $caldir/decode_mbr)
44 |
45 | # Store the setup,
46 | echo $lmwt >$dir/lmwt
47 | echo $decode_mbr >$dir/decode_mbr
48 | cp $calibration $dir/calibration.mdl
49 | cp $word_feats $dir/word_feats
50 | cp $word_categories $dir/word_categories
51 |
52 | # Create the ctm with raw confidences,
53 | # - we keep the timing relative to the utterance,
54 | if [ $stage -le 0 ]; then
55 | $cmd JOB=1:$nj $dir/log/get_ctm.JOB.log \
56 | lattice-scale --inv-acoustic-scale=$lmwt "ark:gunzip -c $latdir/lat.JOB.gz|" ark:- \| \
57 | lattice-limit-depth ark:- ark:- \| \
58 | lattice-push --push-strings=false ark:- ark:- \| \
59 | lattice-align-words-lexicon --max-expand=10.0 \
60 | $lang/phones/align_lexicon.int $model ark:- ark:- \| \
61 | lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \
62 | utils/int2sym.pl -f 5 $lang/words.txt \
63 | '>' $dir/JOB.ctm
64 | # Merge and clean,
65 | for ((n=1; n<=nj; n++)); do cat $dir/${n}.ctm; done > $dir/ctm
66 | rm $dir/*.ctm
67 | cat $dir/ctm | utils/sym2int.pl -f 5 $lang/words.txt >$dir/ctm_int
68 | fi
69 |
70 | # Compute lattice-depth,
71 | latdepth=$dir/lattice_frame_depth.ark
72 | if [ $stage -le 1 ]; then
73 | [ -e $latdepth ] || steps/conf/lattice_depth_per_frame.sh --cmd "$cmd" $latdir $dir
74 | fi
75 |
76 | # Create the forwarding data for logistic regression,
77 | if [ $stage -le 2 ]; then
78 | steps/conf/prepare_calibration_data.py --conf-feats $dir/forward_feats.ark \
79 | --lattice-depth $latdepth $dir/ctm_int $word_feats $word_categories
80 | fi
81 |
82 | # Apply calibration model to dev,
83 | if [ $stage -le 3 ]; then
84 | logistic-regression-eval --apply-log=false $calibration \
85 | ark:$dir/forward_feats.ark ark,t:- | \
86 | awk '{ key=$1; p_corr=$4; sub(/,.*/,"",key); gsub(/\^/," ",key); print key,p_corr }' | \
87 | utils/int2sym.pl -f 5 $lang/words.txt \
88 | >$dir/ctm_calibrated
89 | fi
90 |
91 | exit 0
92 |
--------------------------------------------------------------------------------
/steps/conf/convert_ctm_to_tra.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright 2015 Brno University of Technology (author: Karel Vesely)
4 | # Apache 2.0
5 |
6 | import sys, operator
7 |
8 | # This scripts loads a 'ctm' file and converts it into the 'tra' format:
9 | # "utt-key word1 word2 word3 ... wordN"
10 | # The 'utt-key' is the 1st column in the CTM.
11 |
12 | # Typically the CTM contains:
13 | # - utterance-relative timimng (i.e. prepared without 'utils/convert_ctm.pl')
14 | # - confidences
15 |
16 | if len(sys.argv) != 3:
17 | print 'Usage: %s ctm-in tra-out' % __file__
18 | sys.exit(1)
19 | dummy, ctm_in, tra_out = sys.argv
20 |
21 | if ctm_in == '-': ctm_in = '/dev/stdin'
22 | if tra_out == '-': tra_out = '/dev/stdout'
23 |
24 | # Load the 'ctm' into dictionary,
25 | tra = dict()
26 | with open(ctm_in) as f:
27 | for l in f:
28 | utt, ch, beg, dur, wrd, conf = l.split()
29 | if not utt in tra: tra[utt] = []
30 | tra[utt].append((float(beg),wrd))
31 |
32 | # Store the in 'tra' format,
33 | with open(tra_out,'w') as f:
34 | for utt,tuples in tra.iteritems():
35 | tuples.sort(key = operator.itemgetter(0)) # Sort by 'beg' time,
36 | f.write('%s %s\n' % (utt,' '.join([t[1] for t in tuples])))
37 |
38 |
--------------------------------------------------------------------------------
/steps/conf/lattice_depth_per_frame.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2015 Brno University of Technology (Author: Karel Vesely)
3 | # Licensed under the Apache License, Version 2.0 (the "License")
4 |
5 | # Extract lattice-depth for each frame.
6 |
7 | # Begin configuration
8 | cmd=run.pl
9 | # End configuration
10 |
11 | echo "$0 $@" # Print the command line for logging
12 |
13 | [ -f path.sh ] && . ./path.sh # source the path.
14 | . parse_options.sh || exit 1;
15 |
16 | if [ $# != 2 ]; then
17 | echo "usage: $0 [opts] "
18 | echo "main options (for others, see top of script file)"
19 | echo " --config # config containing options"
20 | echo " --cmd"
21 | exit 1;
22 | fi
23 |
24 | set -euo pipefail
25 |
26 | latdir=$1
27 | dir=$2
28 |
29 | [ ! -f $latdir/lat.1.gz ] && echo "Missing $latdir/lat.1.gz" && exit 1
30 | nj=$(cat $latdir/num_jobs)
31 |
32 | # Get the pdf-posterior vectors,
33 | $cmd JOB=1:$nj $dir/log/lattice_depth_per_frame.JOB.log \
34 | lattice-depth-per-frame "ark:gunzip -c $latdir/lat.JOB.gz |" ark,t:$dir/lattice_frame_depth.JOB.ark
35 | # Merge,
36 | for ((n=1; n<=nj; n++)); do cat $dir/lattice_frame_depth.${n}.ark; done >$dir/lattice_frame_depth.ark
37 | rm $dir/lattice_frame_depth.*.ark
38 |
39 | # Done!
40 |
--------------------------------------------------------------------------------
/steps/conf/parse_arpa_unigrams.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright 2015 Brno University of Technology (author: Karel Vesely)
4 | # Apache 2.0
5 |
6 | import sys, gzip, re
7 |
8 | # Parse options,
9 | if len(sys.argv) != 4:
10 | print "Usage: %s " % __file__
11 | sys.exit(0)
12 | words_txt, arpa_gz, unigrams_out = sys.argv[1:]
13 |
14 | if arpa_gz == '-': arpa_gz = '/dev/stdin'
15 | if unigrams_out == '-': unigrams_out = '/dev/stdout'
16 |
17 | # Load the words.txt,
18 | words = [ l.split() for l in open(words_txt) ]
19 |
20 | # Load the unigram probabilities in 10log from ARPA,
21 | wrd_log10 = dict()
22 | with gzip.open(arpa_gz,'r') as f:
23 | read = False
24 | for l in f:
25 | if l.strip() == '\\1-grams:': read = True
26 | if l.strip() == '\\2-grams:': break
27 | if read and len(l.split())>=2:
28 | log10_p_unigram, wrd = re.split('[\t ]+',l.strip(),2)[:2]
29 | wrd_log10[wrd] = float(log10_p_unigram)
30 |
31 | # Create list, 'wrd id log_p_unigram',
32 | words_unigram = [[wrd, id, (wrd_log10[wrd] if wrd in wrd_log10 else -99)] for wrd,id in words ]
33 |
34 | print >>sys.stderr, words_unigram[0]
35 | # Store,
36 | with open(unigrams_out,'w') as f:
37 | f.writelines(['%s %s %g\n' % (w,i,p) for (w,i,p) in words_unigram])
38 |
39 |
--------------------------------------------------------------------------------
/steps/conf/prepare_word_categories.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright 2015 Brno University of Technology (author: Karel Vesely)
4 | # Apache 2.0
5 |
6 | import sys
7 |
8 | from optparse import OptionParser
9 | desc = """
10 | Prepare mapping of words into categories. Each word with minimal frequency
11 | has its own category, the rest is merged into single class.
12 | """
13 | usage = "%prog [opts] words.txt ctm category_mapping"
14 | parser = OptionParser(usage=usage, description=desc)
15 | parser.add_option("--min-count", help="Minimum word-count to have a single word category. [default %default]", type='int', default=20)
16 | (o, args) = parser.parse_args()
17 |
18 | if len(args) != 3:
19 | parser.print_help()
20 | sys.exit(1)
21 | words_file, text_file, category_mapping_file = args
22 |
23 | if text_file == '-': text_file = '/dev/stdin'
24 | if category_mapping_file == '-': category_mapping_file = '/dev/stdout'
25 |
26 | # Read the words from the 'tra' file,
27 | with open(text_file) as f:
28 | text_words = [ l.split()[1:] for l in f ]
29 |
30 | # Flatten the array of arrays of words,
31 | import itertools
32 | text_words = list(itertools.chain.from_iterable(text_words))
33 |
34 | # Count the words (regardless if correct or incorrect),
35 | word_counts = dict()
36 | for w in text_words:
37 | if w not in word_counts: word_counts[w] = 0
38 | word_counts[w] += 1
39 |
40 | # Read the words.txt,
41 | with open(words_file) as f:
42 | word_id = [ l.split() for l in f ]
43 |
44 | # Append the categories,
45 | n=1
46 | word_id_cat=[]
47 | for word, idx in word_id:
48 | cat = 0
49 | if word in word_counts:
50 | if word_counts[word] > o.min_count:
51 | cat = n; n += 1
52 | word_id_cat.append([word, idx, str(cat)])
53 |
54 | # Store the mapping,
55 | with open(category_mapping_file,'w') as f:
56 | f.writelines([' '.join(record)+'\n' for record in word_id_cat])
57 |
--------------------------------------------------------------------------------
/steps/data/data_dir_manipulation_lib.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 |
3 | def RunKaldiCommand(command, wait = True):
4 | """ Runs commands frequently seen in Kaldi scripts. These are usually a
5 | sequence of commands connected by pipes, so we use shell=True """
6 | #logger.info("Running the command\n{0}".format(command))
7 | p = subprocess.Popen(command, shell = True,
8 | stdout = subprocess.PIPE,
9 | stderr = subprocess.PIPE)
10 |
11 | if wait:
12 | [stdout, stderr] = p.communicate()
13 | if p.returncode is not 0:
14 | raise Exception("There was an error while running the command {0}\n".format(command)+"-"*10+"\n"+stderr)
15 | return stdout, stderr
16 | else:
17 | return p
18 |
19 |
--------------------------------------------------------------------------------
/steps/decode_combine.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
4 |
5 | # Combine two decoding directories by composing the lattices (we
6 | # apply a weight to each of the original weights, by default 0.5 each).
7 | # Note, this is not the only combination method, or the most normal combination
8 | # method. See also egs/wsj/s5/local/score_combine.sh.
9 |
10 | # Begin configuration section.
11 | weight1=0.5 # Weight on 1st set of lattices.
12 | cmd=run.pl
13 | skip_scoring=false
14 | # End configuration section.
15 |
16 | echo "$0 $@" # Print the command line for logging
17 |
18 | [ -f ./path.sh ] && . ./path.sh; # source the path.
19 | . parse_options.sh || exit 1;
20 |
21 | if [ $# -ne 5 ]; then
22 | echo "Usage: steps/decode_combine.sh [options] "
23 | echo " e.g.: steps/decode_combine.sh data/lang data/test exp/dir1/decode exp/dir2/decode exp/combine_1_2/decode"
24 | echo "main options (for others, see top of script file)"
25 | echo " --config # config containing options"
26 | echo " --cmd # Command to run in parallel with"
27 | echo " --weight1 # Weight on 1st set of lattices (default 0.5)"
28 | exit 1;
29 | fi
30 |
31 | data=$1
32 | lang_or_graphdir=$2
33 | srcdir1=$3
34 | srcdir2=$4
35 | dir=$5
36 |
37 | for f in $data/utt2spk $lang_or_graphdir/phones.txt $srcdir1/lat.1.gz $srcdir2/lat.1.gz; do
38 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
39 | done
40 |
41 | nj1=`cat $srcdir1/num_jobs` || exit 1;
42 | nj2=`cat $srcdir2/num_jobs` || exit 1;
43 | [ $nj1 -ne $nj2 ] && echo "$0: mismatch in number of jobs $nj1 versus $nj2" && exit 1;
44 | nj=$nj1
45 |
46 | mkdir -p $dir/log
47 | echo $nj > $dir/num_jobs
48 |
49 | # The lattice-interp command does the score interpolation (with composition),
50 | # and the lattice-copy-backoff replaces the result with the 1st lattice, in
51 | # cases where the composed result was empty.
52 | $cmd JOB=1:$nj $dir/log/interp.JOB.log \
53 | lattice-interp --alpha=$weight1 "ark:gunzip -c $srcdir1/lat.JOB.gz|" \
54 | "ark,s,cs:gunzip -c $srcdir2/lat.JOB.gz|" ark:- \| \
55 | lattice-copy-backoff "ark,s,cs:gunzip -c $srcdir1/lat.JOB.gz|" ark,s,cs:- \
56 | "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
57 |
58 | cp $srcdir1/final.mdl $dir/final.mdl
59 |
60 | if ! $skip_scoring ; then
61 | [ ! -x local/score.sh ] && \
62 | echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
63 | local/score.sh --cmd "$cmd" $data $lang_or_graphdir $dir ||
64 | { echo "$0: Scoring failed. (ignore by '--skip-scoring true')"; exit 1; }
65 | fi
66 |
67 | exit 0;
68 |
--------------------------------------------------------------------------------
/steps/decode_nnet.sh:
--------------------------------------------------------------------------------
1 | nnet/decode.sh
--------------------------------------------------------------------------------
/steps/decode_si.sh:
--------------------------------------------------------------------------------
1 | decode.sh
--------------------------------------------------------------------------------
/steps/diagnostic/analyze_alignments.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright Johns Hopkins University (Author: Daniel Povey) 2016. Apache 2.0.
4 |
5 | # This script performs some analysis of alignments on disk, currently in terms
6 | # of phone lengths, including lenghts of leading and trailing silences
7 |
8 |
9 | # begin configuration section.
10 | cmd=run.pl
11 | #end configuration section.
12 |
13 | echo "$0 $@" # Print the command line for logging
14 |
15 | [ -f ./path.sh ] && . ./path.sh
16 | . parse_options.sh || exit 1;
17 |
18 | if [ $# -ne 2 ]; then
19 | echo "Usage: $0 [options] "
20 | echo " Options:"
21 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
22 | echo "e.g.:"
23 | echo "$0 data/lang exp/tri4b"
24 | echo "This script writes some diagnostics to /log/alignments.log"
25 | exit 1;
26 | fi
27 |
28 | lang=$1
29 | dir=$2
30 |
31 | model=$dir/final.mdl
32 |
33 | for f in $lang/words.txt $model $dir/ali.1.gz $dir/num_jobs; do
34 | [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
35 | done
36 |
37 | num_jobs=$(cat $dir/num_jobs) || exit 1
38 |
39 | mkdir -p $dir/log
40 |
41 | rm $dir/phone_stats.*.gz 2>/dev/null || true
42 |
43 | $cmd JOB=1:$num_jobs $dir/log/get_phone_alignments.JOB.log \
44 | set -o pipefail '&&' ali-to-phones --write-lengths=true "$model" \
45 | "ark:gunzip -c $dir/ali.JOB.gz|" ark,t:- \| \
46 | sed -E 's/^[^ ]+ //' \| \
47 | awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \
48 | sort \| uniq -c \| gzip -c '>' $dir/phone_stats.JOB.gz || exit 1
49 |
50 | if ! $cmd $dir/log/analyze_alignments.log \
51 | gunzip -c "$dir/phone_stats.*.gz" \| \
52 | steps/diagnostic/analyze_phone_length_stats.py $lang; then
53 | echo "$0: analyze_phone_length_stats.py failed, but ignoring the error (it's just for diagnostics)"
54 | fi
55 |
56 | grep WARNING $dir/log/analyze_alignments.log
57 | echo "$0: see stats in $dir/log/analyze_alignments.log"
58 |
59 | rm $dir/phone_stats.*.gz
60 |
61 | exit 0
62 |
--------------------------------------------------------------------------------
/steps/diagnostic/analyze_lats.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright Johns Hopkins University (Author: Daniel Povey) 2016. Apache 2.0.
4 |
5 | # This script does the same type of diagnostics as analyze_alignments.sh, except
6 | # it starts from lattices (so it has to convert the lattices to alignments
7 | # first).
8 |
9 | # begin configuration section.
10 | iter=final
11 | cmd=run.pl
12 | acwt=0.1
13 | #end configuration section.
14 |
15 | echo "$0 $@" # Print the command line for logging
16 |
17 | [ -f ./path.sh ] && . ./path.sh
18 | . parse_options.sh || exit 1;
19 |
20 | if [ $# -ne 2 ]; then
21 | echo "Usage: $0 [options] (|) "
22 | echo " Options:"
23 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
24 | echo " --acwt # Acoustic scale for getting best-path (default: 0.1)"
25 | echo "e.g.:"
26 | echo "$0 data/lang exp/tri4b/decode_dev"
27 | echo "This script writes some diagnostics to /log/alignments.log"
28 | exit 1;
29 | fi
30 |
31 | lang=$1
32 | dir=$2
33 |
34 | model=$dir/../${iter}.mdl
35 |
36 | for f in $lang/words.txt $model $dir/lat.1.gz $dir/num_jobs; do
37 | [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
38 | done
39 |
40 | num_jobs=$(cat $dir/num_jobs) || exit 1
41 |
42 | mkdir -p $dir/log
43 |
44 | rm $dir/phone_stats.*.gz 2>/dev/null || true
45 |
46 | # this writes two archives of depth_tmp and ali_tmp of (depth per frame, alignment per frame).
47 | $cmd JOB=1:$num_jobs $dir/log/lattice_best_path.JOB.log \
48 | lattice-depth-per-frame "ark:gunzip -c $dir/lat.JOB.gz|" "ark,t:|gzip -c > $dir/depth_tmp.JOB.gz" ark:- \| \
49 | lattice-best-path --acoustic-scale=$acwt ark:- ark:/dev/null "ark,t:|gzip -c >$dir/ali_tmp.JOB.gz" || exit 1
50 |
51 | $cmd JOB=1:$num_jobs $dir/log/get_lattice_stats.JOB.log \
52 | ali-to-phones --write-lengths=true "$model" "ark:gunzip -c $dir/ali_tmp.JOB.gz|" ark,t:- \| \
53 | sed -E 's/^[^ ]+ //' \| \
54 | awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \
55 | sort \| uniq -c \| gzip -c '>' $dir/phone_stats.JOB.gz || exit 1
56 |
57 |
58 | $cmd $dir/log/analyze_alignments.log \
59 | gunzip -c "$dir/phone_stats.*.gz" \| \
60 | steps/diagnostic/analyze_phone_length_stats.py $lang || exit 1
61 |
62 | grep WARNING $dir/log/analyze_alignments.log
63 | echo "$0: see stats in $dir/log/analyze_alignments.log"
64 |
65 |
66 | # note: below, some things that would be interpreted by the shell have to be
67 | # escaped since it needs to be passed to $cmd.
68 | # the 'paste' command will paste together the phone-indexes and the depths
69 | # so that one line will be like utt-id1 phone1 phone2 phone3 .. utt-id1 depth1 depth2 depth3 ...
70 | # the awk command computes counts of pairs (phone, lattice-depth) and outputs lines
71 | # containing 3 integers representing:
72 | # phone lattice_depth, count[phone,lattice_depth]
73 | $cmd JOB=1:$num_jobs $dir/log/lattice_best_path.JOB.log \
74 | ali-to-phones --per-frame=true "$model" "ark:gunzip -c $dir/ali_tmp.JOB.gz|" ark,t:- \| \
75 | paste /dev/stdin '<(' gunzip -c $dir/depth_tmp.JOB.gz ')' \| \
76 | awk '{ half=NF/2; for (n=2; n<=half; n++) { m=n+half; count[$n " " $m]++;}} END{for(k in count) print k, count[k]; }' \| \
77 | gzip -c '>' $dir/depth_stats_tmp.JOB.gz
78 |
79 |
80 | $cmd $dir/log/analyze_lattice_depth_stats.log \
81 | gunzip -c "$dir/depth_stats_tmp.*.gz" \| \
82 | steps/diagnostic/analyze_lattice_depth_stats.py $lang || exit 1
83 |
84 | grep Overall $dir/log/analyze_lattice_depth_stats.log
85 | echo "$0: see stats in $dir/log/analyze_lattice_depth_stats.log"
86 |
87 |
88 | rm $dir/phone_stats.*.gz
89 | rm $dir/depth_tmp.*.gz
90 | rm $dir/depth_stats_tmp.*.gz
91 | rm $dir/ali_tmp.*.gz
92 |
93 | exit 0
94 |
--------------------------------------------------------------------------------
/steps/get_ctm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0.
3 |
4 | # This script produces CTM files from a decoding directory that has lattices
5 | # present.
6 |
7 |
8 | # begin configuration section.
9 | cmd=run.pl
10 | stage=0
11 | frame_shift=0.01
12 | min_lmwt=5
13 | max_lmwt=20
14 | use_segments=true # if we have a segments file, use it to convert
15 | # the segments to be relative to the original files.
16 | print_silence=false
17 | #end configuration section.
18 |
19 | echo "$0 $@" # Print the command line for logging
20 |
21 | [ -f ./path.sh ] && . ./path.sh
22 | . parse_options.sh || exit 1;
23 |
24 | if [ $# -ne 3 ]; then
25 | echo "Usage: $0 [options] "
26 | echo " Options:"
27 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
28 | echo " --stage (0|1|2) # start scoring script from part-way through."
29 | echo " --use-segments (true|false) # use segments and reco2file_and_channel files "
30 | echo " # to produce a ctm relative to the original audio"
31 | echo " # files, with channel information (typically needed"
32 | echo " # for NIST scoring)."
33 | echo " --frame-shift (default=0.01) # specify this if your lattices have a frame-shift"
34 | echo " # not equal to 0.01 seconds"
35 | echo "e.g.:"
36 | echo "$0 data/train data/lang exp/tri4a/decode/"
37 | echo "See also: steps/get_train_ctm.sh"
38 | exit 1;
39 | fi
40 |
41 | data=$1
42 | lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
43 | dir=$3
44 |
45 | model=$dir/../final.mdl # assume model one level up from decoding dir.
46 |
47 |
48 | for f in $lang/words.txt $model $dir/lat.1.gz; do
49 | [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
50 | done
51 |
52 | name=`basename $data`; # e.g. eval2000
53 |
54 | mkdir -p $dir/scoring/log
55 |
56 | if [ $stage -le 0 ]; then
57 | if [ -f $data/segments ] && $use_segments; then
58 | f=$data/reco2file_and_channel
59 | [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
60 | filter_cmd="utils/convert_ctm.pl $data/segments $data/reco2file_and_channel"
61 | else
62 | filter_cmd=cat
63 | fi
64 |
65 | nj=$(cat $dir/num_jobs)
66 | lats=$(for n in $(seq $nj); do echo -n "$dir/lat.$n.gz "; done)
67 | if [ -f $lang/phones/word_boundary.int ]; then
68 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \
69 | set -o pipefail '&&' mkdir -p $dir/score_LMWT/ '&&' \
70 | lattice-1best --lm-scale=LMWT "ark:gunzip -c $lats|" ark:- \| \
71 | lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \
72 | nbest-to-ctm --frame-shift=$frame_shift --print-silence=$print_silence ark:- - \| \
73 | utils/int2sym.pl -f 5 $lang/words.txt \| \
74 | $filter_cmd '>' $dir/score_LMWT/$name.ctm || exit 1;
75 | elif [ -f $lang/phones/align_lexicon.int ]; then
76 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \
77 | set -o pipefail '&&' mkdir -p $dir/score_LMWT/ '&&' \
78 | lattice-1best --lm-scale=LMWT "ark:gunzip -c $lats|" ark:- \| \
79 | lattice-align-words-lexicon $lang/phones/align_lexicon.int $model ark:- ark:- \| \
80 | nbest-to-ctm --frame-shift=$frame_shift --print-silence=$print_silence ark:- - \| \
81 | utils/int2sym.pl -f 5 $lang/words.txt \| \
82 | $filter_cmd '>' $dir/score_LMWT/$name.ctm || exit 1;
83 | else
84 | echo "$0: neither $lang/phones/word_boundary.int nor $lang/phones/align_lexicon.int exists: cannot align."
85 | exit 1;
86 | fi
87 | fi
88 |
89 |
90 |
--------------------------------------------------------------------------------
/steps/lmrescore_const_arpa.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2014 Guoguo Chen
4 | # Apache 2.0
5 |
6 | # This script rescores lattices with the ConstArpaLm format language model.
7 |
8 | # Begin configuration section.
9 | cmd=run.pl
10 | skip_scoring=false
11 | stage=1
12 | scoring_opts=
13 | # End configuration section.
14 |
15 | echo "$0 $@" # Print the command line for logging
16 |
17 | . ./utils/parse_options.sh
18 |
19 | if [ $# != 5 ]; then
20 | echo "Does language model rescoring of lattices (remove old LM, add new LM)"
21 | echo "Usage: $0 [options] \\"
22 | echo " "
23 | echo "options: [--cmd (run.pl|queue.pl [queue opts])]"
24 | exit 1;
25 | fi
26 |
27 | [ -f path.sh ] && . ./path.sh;
28 |
29 | oldlang=$1
30 | newlang=$2
31 | data=$3
32 | indir=$4
33 | outdir=$5
34 |
35 | oldlm=$oldlang/G.fst
36 | newlm=$newlang/G.carpa
37 | ! cmp $oldlang/words.txt $newlang/words.txt &&\
38 | echo "$0: Warning: vocabularies may be incompatible."
39 | [ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1;
40 | [ ! -f $newlm ] && echo "$0: Missing file $newlm" && exit 1;
41 | ! ls $indir/lat.*.gz >/dev/null &&\
42 | echo "$0: No lattices input directory $indir" && exit 1;
43 |
44 | if ! cmp -s $oldlang/words.txt $newlang/words.txt; then
45 | echo "$0: $oldlang/words.txt and $newlang/words.txt differ: make sure you know what you are doing.";
46 | fi
47 |
48 | oldlmcommand="fstproject --project_output=true $oldlm |"
49 |
50 | mkdir -p $outdir/log
51 | nj=`cat $indir/num_jobs` || exit 1;
52 | cp $indir/num_jobs $outdir
53 |
54 | if [ $stage -le 1 ]; then
55 | $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
56 | lattice-lmrescore --lm-scale=-1.0 \
57 | "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlmcommand" ark:- \| \
58 | lattice-lmrescore-const-arpa --lm-scale=1.0 \
59 | ark:- "$newlm" "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
60 | fi
61 |
62 | if ! $skip_scoring && [ $stage -le 2 ]; then
63 | err_msg="Not scoring because local/score.sh does not exist or not executable."
64 | [ ! -x local/score.sh ] && echo $err_msg && exit 1;
65 | local/score.sh --cmd "$cmd" $scoring_opts $data $newlang $outdir
66 | else
67 | echo "Not scoring because requested so..."
68 | fi
69 |
70 | exit 0;
71 |
--------------------------------------------------------------------------------
/steps/lmrescore_rnnlm_lat.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2015 Guoguo Chen
4 | # Apache 2.0
5 |
6 | # This script rescores lattices with RNNLM. See also rnnlmrescore.sh which is
7 | # an older script using n-best lists.
8 |
9 | # Begin configuration section.
10 | cmd=run.pl
11 | skip_scoring=false
12 | max_ngram_order=4
13 | N=10
14 | inv_acwt=12
15 | weight=1.0 # Interpolation weight for RNNLM.
16 | # End configuration section.
17 |
18 | echo "$0 $@" # Print the command line for logging
19 |
20 | . ./utils/parse_options.sh
21 |
22 | if [ $# != 5 ]; then
23 | echo "Does language model rescoring of lattices (remove old LM, add new LM)"
24 | echo "with RNNLM."
25 | echo ""
26 | echo "Usage: $0 [options] \\"
27 | echo " "
28 | echo " e.g.: $0 ./rnnlm data/lang_tg data/test \\"
29 | echo " exp/tri3/test_tg exp/tri3/test_rnnlm"
30 | echo "options: [--cmd (run.pl|queue.pl [queue opts])]"
31 | exit 1;
32 | fi
33 |
34 | [ -f path.sh ] && . ./path.sh;
35 |
36 | oldlang=$1
37 | rnnlm_dir=$2
38 | data=$3
39 | indir=$4
40 | outdir=$5
41 |
42 | oldlm=$oldlang/G.fst
43 | if [ -f $oldlang/G.carpa ]; then
44 | oldlm=$oldlang/G.carpa
45 | elif [ ! -f $oldlm ]; then
46 | echo "$0: expecting either $oldlang/G.fst or $oldlang/G.carpa to exist" &&\
47 | exit 1;
48 | fi
49 |
50 | [ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1;
51 | [ ! -f $rnnlm_dir/rnnlm ] && echo "$0: Missing file $rnnlm_dir/rnnlm" && exit 1;
52 | [ ! -f $rnnlm_dir/unk.probs ] &&\
53 | echo "$0: Missing file $rnnlm_dir/unk.probs" && exit 1;
54 | [ ! -f $oldlang/words.txt ] &&\
55 | echo "$0: Missing file $oldlang/words.txt" && exit 1;
56 | ! ls $indir/lat.*.gz >/dev/null &&\
57 | echo "$0: No lattices input directory $indir" && exit 1;
58 | awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) {
59 | print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \
60 | || exit 1;
61 |
62 | oldlm_command="fstproject --project_output=true $oldlm |"
63 |
64 | acwt=`perl -e "print (1.0/$inv_acwt);"`
65 |
66 | mkdir -p $outdir/log
67 | nj=`cat $indir/num_jobs` || exit 1;
68 | cp $indir/num_jobs $outdir
69 |
70 | oldlm_weight=`perl -e "print -1.0 * $weight;"`
71 | if [ "$oldlm" == "$oldlang/G.fst" ]; then
72 | $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
73 | lattice-lmrescore --lm-scale=$oldlm_weight \
74 | "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:- \| \
75 | lattice-lmrescore-rnnlm --lm-scale=$weight \
76 | --max-ngram-order=$max_ngram_order ark:$rnnlm_dir/unk.probs \
77 | $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \
78 | "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
79 | else
80 | $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
81 | lattice-lmrescore-const-arpa --lm-scale=$oldlm_weight \
82 | "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:- \| \
83 | lattice-lmrescore-rnnlm --lm-scale=$weight \
84 | --max-ngram-order=$max_ngram_order ark:$rnnlm_dir/unk.probs \
85 | $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \
86 | "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
87 | fi
88 |
89 | if ! $skip_scoring ; then
90 | err_msg="Not scoring because local/score.sh does not exist or not executable."
91 | [ ! -x local/score.sh ] && echo $err_msg && exit 1;
92 | local/score.sh --cmd "$cmd" $data $oldlang $outdir
93 | else
94 | echo "Not scoring because requested so..."
95 | fi
96 |
97 | exit 0;
98 |
--------------------------------------------------------------------------------
/steps/nnet2/adjust_priors.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
4 | # Copyright (c) 2015, Johns Hopkins University (Yenda Trmal )
5 | # License: Apache 2.0
6 |
7 | # Begin configuration section.
8 | cmd=run.pl
9 | iter=final
10 | # End configuration section
11 |
12 |
13 | echo "$0 $@" # Print the command line for logging
14 |
15 | if [ -f path.sh ]; then . ./path.sh; fi
16 | . parse_options.sh || exit 1;
17 |
18 | if [ $# != 2 ]; then
19 | echo "Usage: $0 [opts] "
20 | echo " e.g.: $0 exp/tri4_mpe_degs exp/tri4_mpe"
21 | echo ""
22 | echo "Performs priors adjustment either on the final iteration"
23 | echo "or iteration of choice of the training. The adjusted model"
24 | echo "filename will be suffixed by \"adj\", i.e. for the final"
25 | echo "iteration final.mdl will become final.adj.mdl"
26 | echo ""
27 | echo "Main options (for others, see top of script file)"
28 | echo " --config # config file containing options"
29 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs."
30 | echo " --iter # which iteration to be adjusted"
31 | exit 1;
32 | fi
33 |
34 | degs_dir=$1
35 | dir=$2
36 |
37 | src_model=$dir/${iter}.mdl
38 |
39 | if [ ! -f $src_model ]; then
40 | echo "$0: Expecting $src_model to exist."
41 | exit 1
42 | fi
43 |
44 | if [ ! -f $degs_dir/priors_egs.1.ark ]; then
45 | echo "$0: Expecting $degs_dir/priors_egs.1.ark to exist."
46 | exit 1
47 | fi
48 |
49 | num_archives_priors=`cat $degs_dir/info/num_archives_priors` || {
50 | echo "Could not find $degs_dir/info/num_archives_priors.";
51 | exit 1;
52 | }
53 |
54 | $cmd JOB=1:$num_archives_priors $dir/log/get_post.${iter}.JOB.log \
55 | nnet-compute-from-egs "nnet-to-raw-nnet $src_model -|" \
56 | ark:$degs_dir/priors_egs.JOB.ark ark:- \| \
57 | matrix-sum-rows ark:- ark:- \| \
58 | vector-sum ark:- $dir/post.${iter}.JOB.vec || {
59 | echo "Error in getting posteriors for adjusting priors."
60 | echo "See $dir/log/get_post.${iter}.*.log";
61 | exit 1;
62 | }
63 |
64 |
65 | $cmd $dir/log/sum_post.${iter}.log \
66 | vector-sum $dir/post.${iter}.*.vec $dir/post.${iter}.vec || {
67 | echo "Error in summing posteriors. See $dir/log/sum_post.${iter}.log";
68 | exit 1;
69 | }
70 |
71 | rm -f $dir/post.${iter}.*.vec
72 |
73 | echo "Re-adjusting priors based on computed posteriors for iter $iter"
74 | $cmd $dir/log/adjust_priors.${iter}.log \
75 | nnet-adjust-priors $src_model $dir/post.${iter}.vec $dir/${iter}.adj.mdl || {
76 | echo "Error in adjusting priors. See $dir/log/adjust_priors.${iter}.log";
77 | exit 1;
78 | }
79 |
80 | echo "Done adjusting priors (on $src_model)"
81 |
--------------------------------------------------------------------------------
/steps/nnet2/convert_nnet1_to_nnet2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2014 Johns Hopkins University (Author: Daniel Povey).
4 | # Apache 2.0.
5 |
6 | # This script converts nnet1 into nnet2 models.
7 | # Note, it doesn't support all possible types of nnet1 models.
8 |
9 | # Begin configuration section
10 | cleanup=true
11 | cmd=run.pl
12 | # End configuration section.
13 |
14 | echo "$0 $@" # Print the command line for logging
15 |
16 | [ -f ./path.sh ] && . ./path.sh; # source the path.
17 | . parse_options.sh || exit 1;
18 |
19 |
20 | if [ $# -ne 2 ]; then
21 | echo "Usage: $0 [options] "
22 | echo "e.g.: $0 exp/dnn4b_pretrain-dbn_dnn exp/dnn4b_nnet2"
23 | exit 1;
24 | fi
25 |
26 | src=$1
27 | dir=$2
28 |
29 | mkdir -p $dir/log || exit 1;
30 |
31 | for f in $src/final.mdl $src/final.feature_transform $src/ali_train_pdf.counts; do
32 | [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
33 | done
34 |
35 | cp $src/phones.txt $dir/phones.txt || exit 1;
36 | $cmd $dir/log/convert_feature_transform.log \
37 | nnet1-to-raw-nnet $src/final.feature_transform $dir/0.raw || exit 1;
38 |
39 |
40 | if [ -f $src/final.nnet ]; then
41 | echo "$0: $src/final.nnet exists, using it as input."
42 | $cmd $dir/log/convert_model.log \
43 | nnet1-to-raw-nnet $src/final.nnet $dir/1.raw || exit 1;
44 | elif [ -f $src/final.dbn ]; then
45 | echo "$0: $src/final.dbn exists, using it as input."
46 | num_leaves=$(am-info $src/final.mdl | grep -w pdfs | awk '{print $NF}') || exit 1;
47 | dbn_output_dim=$(nnet-info exp/dnn4b_pretrain-dbn/6.dbn | grep component | tail -n 1 | sed s:,::g | awk '{print $NF}') || exit 1;
48 | [ -z "$dbn_output_dim" ] && exit 1;
49 |
50 | cat > $dir/final_layer.conf < "
22 | echo "where will typically be a normal neural net from another corpus,"
23 | echo "and will usually be a single-layer neural net trained on top of it by"
24 | echo "dumping the activations (e.g. using steps/online/nnet2/dump_nnet_activations.sh, I"
25 | echo "think no such script exists for non-online), and then training using"
26 | echo "steps/nnet2/retrain_fast.sh."
27 | echo "e.g.: $0 ../../swbd/s5b/exp/nnet2_online/nnet_gpu_online exp/nnet2_swbd_online/nnet_gpu_online exp/nnet2_swbd_online/nnet_gpu_online_combined"
28 | fi
29 |
30 |
31 | src1=$1
32 | src2=$2
33 | dir=$3
34 |
35 | for f in $src1/final.mdl $src2/tree $src2/final.mdl; do
36 | [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1;
37 | done
38 |
39 |
40 | mkdir -p $dir/log
41 |
42 | info=$dir/nnet_info
43 | nnet-am-info $src1/final.mdl >$info
44 | nc=$(grep num-components $info | awk '{print $2}');
45 | if grep SumGroupComponent $info >/dev/null; then
46 | nc_truncate=$[$nc-3] # we did mix-up: remove AffineComponent,
47 | # SumGroupComponent, SoftmaxComponent
48 | else
49 | # we didn't mix-up:
50 | nc_truncate=$[$nc-2] # remove AffineComponent, SoftmaxComponent
51 | fi
52 |
53 | $cmd $dir/log/get_raw_nnet.log \
54 | nnet-to-raw-nnet --truncate=$nc_truncate $src1/final.mdl $dir/first_nnet.raw || exit 1;
55 |
56 | $cmd $dir/log/append_nnet.log \
57 | nnet-insert --randomize-next-component=false --insert-at=0 \
58 | $src2/final.mdl $dir/first_nnet.raw $dir/final.mdl || exit 1;
59 |
60 | $cleanup && rm $dir/first_nnet.raw
61 |
62 | # Copy the tree etc.,
63 |
64 | cp $src2/tree $dir || exit 1;
65 |
66 | # Copy feature-related things from src1 where we built the initial model.
67 | # Note: if you've done anything like mess with the feature-extraction configs,
68 | # or changed the feature type, you have to keep track of that yourself.
69 | for f in final.mat cmvn_opts splice_opts; do
70 | if [ -f $src1/$f ]; then
71 | cp $src1/$f $dir || exit 1;
72 | fi
73 | done
74 |
75 | echo "$0: created appended model in $dir"
76 |
--------------------------------------------------------------------------------
/steps/nnet2/get_num_frames.sh:
--------------------------------------------------------------------------------
1 | ../../utils/data/get_num_frames.sh
--------------------------------------------------------------------------------
/steps/nnet2/relabel_egs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2014 Vimal Manohar. Apache 2.0.
4 | # This script, which will generally be called during the neural-net training
5 | # relabels existing examples with better labels obtained by realigning the data
6 | # with the current nnet model
7 |
8 | # Begin configuration section
9 | cmd=run.pl
10 | stage=0
11 | extra_egs= # Names of additional egs files that need to relabelled
12 | # other than egs.*.*.ark, combine.egs, train_diagnostic.egs,
13 | # valid_diagnostic.egs
14 | iter=final
15 | echo "$0 $@" # Print the command line for logging
16 |
17 | if [ -f path.sh ]; then . ./path.sh; fi
18 | . parse_options.sh || exit 1;
19 |
20 | if [ $# != 3 ]; then
21 | echo "Usage: steps/nnet2/relabel_egs.sh [opts] "
22 | echo " e.g: steps/nnet2/relabel_egs.sh exp/tri6_nnet/ali_1.5 exp/tri6_nnet/egs exp/tri6_nnet/egs_1.5"
23 | echo ""
24 | echo "Main options (for others, see top of script file)"
25 | echo " --config # config file containing options"
26 | echo " --cmd (utils/run.pl;utils/queue.pl ) # how to run jobs."
27 |
28 | exit 1;
29 | fi
30 |
31 | alidir=$1
32 | egs_in_dir=$2
33 | dir=$3
34 |
35 | model=$alidir/$iter.mdl
36 |
37 | # Check some files.
38 |
39 | for f in $alidir/ali.1.gz $model $egs_in_dir/egs.1.0.ark $egs_in_dir/combine.egs \
40 | $egs_in_dir/valid_diagnostic.egs $egs_in_dir/train_diagnostic.egs \
41 | $egs_in_dir/num_jobs_nnet $egs_in_dir/iters_per_epoch $egs_in_dir/samples_per_iter; do
42 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
43 | done
44 |
45 | num_jobs_nnet=`cat $egs_in_dir/num_jobs_nnet`
46 | iters_per_epoch=`cat $egs_in_dir/iters_per_epoch`
47 | samples_per_iter_real=`cat $egs_in_dir/samples_per_iter`
48 | num_jobs_align=`cat $alidir/num_jobs`
49 |
50 | mkdir -p $dir/log
51 |
52 | echo $num_jobs_nnet > $dir/num_jobs_nnet
53 | echo $iters_per_epoch > $dir/iters_per_epoch
54 | echo $samples_per_iter_real > $dir/samples_per_iter
55 |
56 | alignments=$(for n in $(seq $num_jobs_align); do echo -n "$alidir/ali.$n.gz "; done)
57 |
58 | if [ $stage -le 0 ]; then
59 | egs_in=
60 | egs_out=
61 | for x in `seq 1 $num_jobs_nnet`; do
62 | for y in `seq 0 $[$iters_per_epoch-1]`; do
63 | utils/create_data_link.pl $dir/egs.$x.$y.ark
64 | if [ $x -eq 1 ]; then
65 | egs_in="$egs_in ark:$egs_in_dir/egs.JOB.$y.ark "
66 | egs_out="$egs_out ark:$dir/egs.JOB.$y.ark "
67 | fi
68 | done
69 | done
70 |
71 | $cmd JOB=1:$num_jobs_nnet $dir/log/relabel_egs.JOB.log \
72 | nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \
73 | $egs_in $egs_out || exit 1
74 | fi
75 |
76 | if [ $stage -le 1 ]; then
77 | egs_in=
78 | egs_out=
79 | for x in combine.egs valid_diagnostic.egs train_diagnostic.egs $extra_egs; do
80 | utils/create_data_link.pl $dir/$x
81 | egs_in="$egs_in ark:$egs_in_dir/$x"
82 | egs_out="$egs_out ark:$dir/$x"
83 | done
84 |
85 | $cmd $dir/log/relabel_egs_extra.log \
86 | nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \
87 | $egs_in $egs_out || exit 1
88 | fi
89 |
90 | echo "$0: Finished relabeling training examples"
91 |
--------------------------------------------------------------------------------
/steps/nnet2/relabel_egs2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2014 Vimal Manohar.
4 | # 2014 Johns Hopkins University (author: Daniel Povey)
5 | # Apache 2.0.
6 | #
7 | # This script, which will generally be called during the neural-net training
8 | # relabels existing examples with better labels obtained by realigning the data
9 | # with the current nnet model.
10 | # This script is as relabel_egs.sh, but is adapted to work with the newer
11 | # egs format that is written by get_egs2.sh
12 |
13 | # Begin configuration section
14 | cmd=run.pl
15 | stage=0
16 | extra_egs= # Names of additional egs files that need to relabelled
17 | # other than egs.*.*.ark, combine.egs, train_diagnostic.egs,
18 | # valid_diagnostic.egs
19 | iter=final
20 | parallel_opts=
21 | echo "$0 $@" # Print the command line for logging
22 |
23 | if [ -f path.sh ]; then . ./path.sh; fi
24 | . parse_options.sh || exit 1;
25 |
26 | if [ $# != 3 ]; then
27 | echo "Usage: steps/nnet2/relabel_egs.sh [opts] "
28 | echo " e.g: steps/nnet2/relabel_egs.sh exp/tri6_nnet/ali_1.5 exp/tri6_nnet/egs exp/tri6_nnet/egs_1.5"
29 | echo ""
30 | echo "Main options (for others, see top of script file)"
31 | echo " --config # config file containing options"
32 | echo " --cmd (utils/run.pl;utils/queue.pl ) # how to run jobs."
33 |
34 | exit 1;
35 | fi
36 |
37 | alidir=$1
38 | egs_in_dir=$2
39 | dir=$3
40 |
41 | model=$alidir/$iter.mdl
42 |
43 | # Check some files.
44 |
45 | [ -f $egs_in_dir/iters_per_epoch ] && \
46 | echo "$0: this script does not work with the old egs directory format" && exit 1;
47 |
48 | for f in $alidir/ali.1.gz $model $egs_in_dir/egs.1.ark $egs_in_dir/combine.egs \
49 | $egs_in_dir/valid_diagnostic.egs $egs_in_dir/train_diagnostic.egs \
50 | $egs_in_dir/info/num_archives; do
51 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
52 | done
53 |
54 | num_archives=$(cat $egs_in_dir/info/num_archives) || exit 1;
55 | num_jobs_align=$(cat $alidir/num_jobs) || exit 1;
56 |
57 | mkdir -p $dir/log
58 |
59 | mkdir -p $dir/info
60 | cp -r $egs_in_dir/info/* $dir/info
61 |
62 | alignments=$(for n in $(seq $num_jobs_align); do echo $alidir/ali.$n.gz; done)
63 |
64 | if [ $stage -le 0 ]; then
65 | for x in $(seq $num_archives); do
66 | # if $dir/storage exists, make the soft links that we'll
67 | # use to distribute the data across machines
68 | utils/create_data_link.pl $dir/egs.$x.ark
69 | done
70 |
71 | $cmd $parallel_opts JOB=1:$num_archives $dir/log/relabel_egs.JOB.log \
72 | nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \
73 | ark:$egs_in_dir/egs.JOB.ark ark:$dir/egs.JOB.ark || exit 1
74 | fi
75 |
76 | if [ $stage -le 1 ]; then
77 | egs_in=
78 | egs_out=
79 | for x in combine.egs valid_diagnostic.egs train_diagnostic.egs $extra_egs; do
80 | utils/create_data_link.pl $dir/$x
81 | egs_in="$egs_in ark:$egs_in_dir/$x"
82 | egs_out="$egs_out ark:$dir/$x"
83 | done
84 |
85 | $cmd $dir/log/relabel_egs_extra.log \
86 | nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \
87 | $egs_in $egs_out || exit 1
88 | fi
89 |
90 | echo "$0: Finished relabeling training examples"
91 |
--------------------------------------------------------------------------------
/steps/nnet2/remove_egs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2014 Johns Hopkins University (Author: Daniel Povey).
4 | # Apache 2.0.
5 |
6 | # This script removes the examples in an egs/ directory, e.g.
7 | # steps/nnet2/remove_egs.sh exp/nnet4b/egs/
8 | # We give it its own script because we need to be careful about
9 | # things that are soft links to something in storage/ (i.e. remove the
10 | # data that's linked to as well as the soft link), and we want to not
11 | # delete the examples if someone has done "touch $dir/egs/.nodelete".
12 |
13 |
14 | if [ $# != 1 ]; then
15 | echo "Usage: $0 "
16 | echo "e.g.: $0 data/nnet4b/egs/"
17 | echo "e.g.: $0 data/nnet4b_mpe/degs/"
18 | echo "This script is usually equivalent to 'rm /egs.* /degs.*' but it follows"
19 | echo "soft links to /storage/; and it avoids deleting anything in the directory if"
20 | echo "someone did 'touch /.nodelete"
21 | exit 1;
22 | fi
23 |
24 | egs=$1
25 |
26 | if [ ! -d $egs ]; then
27 | echo "$0: expected directory $egs to exist"
28 | exit 1;
29 | fi
30 |
31 | if [ -f $egs/.nodelete ]; then
32 | echo "$0: not deleting egs in $egs since $egs/.nodelete exists"
33 | exit 0;
34 | fi
35 |
36 |
37 |
38 | for f in $egs/egs.*.ark $egs/degs.*.ark $egs/cegs.*.ark; do
39 | if [ -L $f ]; then
40 | rm $(dirname $f)/$(readlink $f) # this will print a warning if it fails.
41 | fi
42 | rm $f 2>/dev/null
43 | done
44 |
45 |
46 | echo "$0: Finished deleting examples in $egs"
47 |
--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 |
5 | # Generate a topology file. This allows control of the number of states in the
6 | # non-silence HMMs, and in the silence HMMs. This is a modified version of
7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
8 | # believe should be useful in the 'chain' model. Note: right now it doesn't
9 | # have any real options, and it treats silence and nonsilence the same. The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 |
13 | if (@ARGV != 2) {
14 | print STDERR "Usage: utils/gen_topo.pl \n";
15 | print STDERR "e.g.: utils/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n";
16 | exit (1);
17 | }
18 |
19 | ($nonsil_phones, $sil_phones) = @ARGV;
20 |
21 | $nonsil_phones =~ s/:/ /g;
22 | $sil_phones =~ s/:/ /g;
23 | $nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n";
24 | $sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n";
25 |
26 | print "\n";
27 | print "\n";
28 | print "\n";
29 | print "$nonsil_phones $sil_phones\n";
30 | print "\n";
31 | # The next two lines may look like a bug, but they are as intended. State 0 has
32 | # no self-loop, it happens exactly once. And it can go either to state 1 (with
33 | # a self-loop) or to state 2, so we can have zero or more instances of state 1
34 | # following state 0.
35 | # We make the transition-probs 0.5 so they normalize, to keep the code happy.
36 | # In fact, we always set the transition probability scale to 0.0 in the 'chain'
37 | # code, so they are never used.
38 | print " 0 0 1 0.5 2 0.5 \n";
39 | print " 1 1 1 0.5 2 0.5 \n";
40 | print " 2 \n";
41 | print "\n";
42 | print "\n";
43 |
--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 |
5 | # Generate a topology file. This allows control of the number of states in the
6 | # non-silence HMMs, and in the silence HMMs. This is a modified version of
7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
8 | # believe should be useful in the 'chain' model. Note: right now it doesn't
9 | # have any real options, and it treats silence and nonsilence the same. The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 |
13 | from __future__ import print_function
14 | import argparse
15 |
16 |
17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
18 | " "
19 | "e.g.: steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
20 | epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
21 | parser.add_argument("nonsilence_phones", type=str,
22 | help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
23 | parser.add_argument("silence_phones", type=str,
24 | help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
25 |
26 | args = parser.parse_args()
27 |
28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
30 | all_phones = silence_phones + nonsilence_phones
31 |
32 | print("")
33 | print("")
34 | print("")
35 | print(" ".join([str(x) for x in all_phones]))
36 | print("")
37 | # The next two lines may look like a bug, but they are as intended. State 0 has
38 | # no self-loop, it happens exactly once. And it can go either to state 1 (with
39 | # a self-loop) or to state 2, so we can have zero or more instances of state 1
40 | # following state 0.
41 | # We make the transition-probs 0.5 so they normalize, to keep the code happy.
42 | # In fact, we always set the transition probability scale to 0.0 in the 'chain'
43 | # code, so they are never used.
44 | print(" 0 0 1 0.5 2 0.5 ")
45 | print(" 1 1 1 0.5 2 0.5 ")
46 | print(" 2 ")
47 | print("")
48 | print("")
49 |
50 |
--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 |
5 | # Generate a topology file. This allows control of the number of states in the
6 | # non-silence HMMs, and in the silence HMMs. This is a modified version of
7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
8 | # believe should be useful in the 'chain' model. Note: right now it doesn't
9 | # have any real options, and it treats silence and nonsilence the same. The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 |
13 | from __future__ import print_function
14 | import argparse
15 |
16 |
17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
18 | " "
19 | "e.g.: steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
20 | epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
21 | parser.add_argument("nonsilence_phones", type=str,
22 | help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
23 | parser.add_argument("silence_phones", type=str,
24 | help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
25 |
26 | args = parser.parse_args()
27 |
28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
30 | all_phones = silence_phones + nonsilence_phones
31 |
32 | print("")
33 | print("")
34 | print("")
35 | print(" ".join([str(x) for x in all_phones]))
36 | print("")
37 |
38 | # the pdf-classes are as follows:
39 | # pdf-class 0 is in a 1-frame sequence, the initial and final state.
40 | # pdf-class 1 is in a sequence with >=3 frames, the 'middle' states. (important that
41 | # it be numbered 1, which is the default list of pdf-classes used in 'cluster-phones').
42 | # pdf-class 2 is the initial-state in a sequence with >= 2 frames.
43 | # pdf-class 3 is the final-state in a sequence with >= 2 frames.
44 | # state 0 is nonemitting in this topology.
45 |
46 | print(" 0 1 0.5 2 0.5 ") # initial nonemitting state.
47 | print(" 1 0 5 1.0 ") # 1-frame sequence.
48 | print(" 2 2 3 0.5 4 0.5 ") # 2 or more frames
49 | print(" 3 1 3 0.5 4 0.5 ") # 3 or more frames
50 | print(" 4 3 5 1.0 ") # 2 or more frames.
51 | print(" 5 ") # final nonemitting state
52 |
53 | print("")
54 | print("")
55 |
56 |
--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo3.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 |
5 | # Generate a topology file. This allows control of the number of states in the
6 | # non-silence HMMs, and in the silence HMMs. This is a modified version of
7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
8 | # believe should be useful in the 'chain' model. Note: right now it doesn't
9 | # have any real options, and it treats silence and nonsilence the same. The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 |
13 | from __future__ import print_function
14 | import argparse
15 |
16 |
17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
18 | " "
19 | "e.g.: steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
20 | epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
21 | parser.add_argument("nonsilence_phones", type=str,
22 | help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
23 | parser.add_argument("silence_phones", type=str,
24 | help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
25 |
26 | args = parser.parse_args()
27 |
28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
30 | all_phones = silence_phones + nonsilence_phones
31 |
32 | print("")
33 | print("")
34 | print("")
35 | print(" ".join([str(x) for x in all_phones]))
36 | print("")
37 | print(" 0 0 0 0.5 1 0.5 ")
38 | print(" 1 ")
39 | print("")
40 | print("")
41 |
42 |
--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo4.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 |
5 | # Generate a topology file. This allows control of the number of states in the
6 | # non-silence HMMs, and in the silence HMMs. This is a modified version of
7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
8 | # believe should be useful in the 'chain' model. Note: right now it doesn't
9 | # have any real options, and it treats silence and nonsilence the same. The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 |
13 | from __future__ import print_function
14 | import argparse
15 |
16 |
17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
18 | " "
19 | "e.g.: steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
20 | epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
21 | parser.add_argument("nonsilence_phones", type=str,
22 | help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
23 | parser.add_argument("silence_phones", type=str,
24 | help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
25 |
26 | args = parser.parse_args()
27 |
28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
30 | all_phones = silence_phones + nonsilence_phones
31 |
32 | print("")
33 | print("")
34 | print("")
35 | print(" ".join([str(x) for x in all_phones]))
36 | print("")
37 | # state 0 is obligatory (occurs once)
38 | print(" 0 0 1 0.3333 2 0.3333 3 0.3333 ")
39 | # state 1 is used only when >2 frames
40 | print(" 1 1 1 0.5 2 0.5 ")
41 | # state 2 is used only when >=2 frames (and occurs once)
42 | print(" 2 2 3 1.0 ")
43 | print(" 3 ") # final nonemitting state
44 | print("")
45 | print("")
46 |
47 |
--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo5.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 |
5 | # Generate a topology file. This allows control of the number of states in the
6 | # non-silence HMMs, and in the silence HMMs. This is a modified version of
7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
8 | # believe should be useful in the 'chain' model. Note: right now it doesn't
9 | # have any real options, and it treats silence and nonsilence the same. The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 |
13 | from __future__ import print_function
14 | import argparse
15 |
16 |
17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
18 | " "
19 | "e.g.: steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
20 | epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
21 | parser.add_argument("nonsilence_phones", type=str,
22 | help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
23 | parser.add_argument("silence_phones", type=str,
24 | help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
25 |
26 | args = parser.parse_args()
27 |
28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
30 | all_phones = silence_phones + nonsilence_phones
31 |
32 | print("")
33 | print("")
34 | print("")
35 | print(" ".join([str(x) for x in all_phones]))
36 | print("")
37 | # state 0 is nonemitting
38 | print(" 0 1 0.5 2 0.5 ")
39 | # state 1 is for when we traverse it in 1 state
40 | print(" 1 0 4 1.0 ")
41 | # state 2 is for when we traverse it in >1 state, for the first state.
42 | print(" 2 2 3 1.0 ")
43 | # state 3 is for the self-loop. Use pdf-class 1 here so that the default
44 | # phone-class clustering (which uses only pdf-class 1 by default) gets only
45 | # stats from longer phones.
46 | print(" 3 1 3 0.5 4 0.5 ")
47 | print(" 4 ")
48 | print("")
49 | print("")
50 |
51 |
--------------------------------------------------------------------------------
/steps/nnet3/get_successful_models.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from __future__ import print_function
4 | import re
5 | import os
6 | import argparse
7 | import sys
8 | import warnings
9 | import copy
10 | import glob
11 |
12 |
13 | if __name__ == "__main__":
14 | # we add compulsory arguments as named arguments for readability
15 | parser = argparse.ArgumentParser(description="Create a list of models suitable for averaging "
16 | "based on their train objf values.",
17 | epilog="See steps/nnet3/lstm/train.sh for example.")
18 |
19 | parser.add_argument("--difference-threshold", type=float,
20 | help="The threshold for discarding models, "
21 | "when objf of the model differs more than this value from the best model "
22 | "it is discarded.",
23 | default=1.0)
24 |
25 | parser.add_argument("num_models", type=int,
26 | help="Number of models.")
27 |
28 | parser.add_argument("logfile_pattern", type=str,
29 | help="Pattern for identifying the log-file names. "
30 | "It specifies the entire log file name, except for the job number, "
31 | "which is replaced with '%'. e.g. exp/nneet3/tdnn_sp/log/train.4.%.log")
32 |
33 |
34 | args = parser.parse_args()
35 |
36 | assert(args.num_models > 0)
37 |
38 | parse_regex = re.compile("LOG .* Overall average objective function for 'output' is ([0-9e.\-+]+) over ([0-9e.\-+]+) frames")
39 | loss = []
40 | for i in range(args.num_models):
41 | model_num = i + 1
42 | logfile = re.sub('%', str(model_num), args.logfile_pattern)
43 | lines = open(logfile, 'r').readlines()
44 | this_loss = -100000
45 | for line_num in range(1, len(lines) + 1):
46 | # we search from the end as this would result in
47 | # lesser number of regex searches. Python regex is slow !
48 | mat_obj = parse_regex.search(lines[-1*line_num])
49 | if mat_obj is not None:
50 | this_loss = float(mat_obj.groups()[0])
51 | break;
52 | loss.append(this_loss);
53 | max_index = loss.index(max(loss))
54 | accepted_models = []
55 | for i in range(args.num_models):
56 | if (loss[max_index] - loss[i]) <= args.difference_threshold:
57 | accepted_models.append(i+1)
58 |
59 | model_list = " ".join(map(lambda x: str(x), accepted_models))
60 | print(model_list)
61 |
62 | if len(accepted_models) != args.num_models:
63 | print("WARNING: Only {0}/{1} of the models have been accepted for averaging, based on log files {2}.".format(len(accepted_models), args.num_models, args.logfile_pattern), file=sys.stderr)
64 | print(" Using models {0}".format(model_list), file=sys.stderr)
65 |
--------------------------------------------------------------------------------
/steps/nnet3/nnet3_to_dot.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # script showing use of nnet3_to_dot.py
4 | # Copyright 2015 Johns Hopkins University (Author: Vijayaditya Peddinti).
5 |
6 | # Begin configuration section.
7 | component_attributes="name,type"
8 | node_prefixes=""
9 | info_bin=nnet3-am-info
10 | echo "$0 $@" # Print the command line for logging
11 |
12 | [ -f ./path.sh ] && . ./path.sh; # source the path.
13 | . parse_options.sh || exit 1;
14 |
15 | if [ $# != 3 ]; then
16 | echo "Usage: $0 [opts] "
17 | echo " e.g.: $0 exp/sdm1/nnet3/lstm_sp/0.mdl lstm.dot lstm.png"
18 | echo ""
19 | echo "Main options (for others, see top of script file)"
20 | echo " --component-attributes # attributes to be printed in nnet3 components"
21 | echo " --node-prefixes # list of prefixes. Nnet3 components/component-nodes with the same prefix"
22 | echo " # will be clustered together in the dot-graph"
23 |
24 |
25 | exit 1;
26 | fi
27 |
28 | model=$1
29 | dot_file=$2
30 | output_file=$3
31 |
32 | attr=${node_prefixes:+ --node-prefixes "$node_prefixes"}
33 | $info_bin $model | \
34 | steps/nnet3/dot/nnet3_to_dot.py \
35 | --component-attributes "$component_attributes" \
36 | $attr $dot_file
37 |
38 | command -v dot >/dev/null 2>&1 || { echo >&2 "This script requires dot but it's not installed. Please compile $dot_file with dot"; exit 1; }
39 | dot -Tpdf $dot_file -o $output_file
40 |
--------------------------------------------------------------------------------
/steps/online/nnet2/align.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2012 Brno University of Technology (Author: Karel Vesely)
3 | # 2013-2014 Johns Hopkins University (Author: Daniel Povey)
4 | # Apache 2.0
5 |
6 | # Computes training alignments using DNN. This takes as input a directory
7 | # prepared as for online-nnet2 decoding (e.g. by
8 | # steps/online/nnet2/prepare_online_decoding.sh), and it computes the features
9 | # directly from the wav.scp instead of relying on features dumped on disk;
10 | # this avoids the hassle of having to dump suitably matched features.
11 |
12 |
13 | # Begin configuration section.
14 | nj=4
15 | cmd=run.pl
16 | # Begin configuration.
17 | scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
18 | beam=10
19 | retry_beam=40
20 | iter=final
21 | use_gpu=no
22 |
23 | echo "$0 $@" # Print the command line for logging
24 |
25 | [ -f path.sh ] && . ./path.sh # source the path.
26 | . parse_options.sh || exit 1;
27 |
28 | if [ $# != 4 ]; then
29 | echo "Usage: $0 "
30 | echo "e.g.: $0 data/train data/lang exp/nnet4 exp/nnet4_ali"
31 | echo "main options (for others, see top of script file)"
32 | echo " --config # config containing options"
33 | echo " --nj # number of parallel jobs"
34 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs."
35 | exit 1;
36 | fi
37 |
38 | data=$1
39 | lang=$2
40 | srcdir=$3
41 | dir=$4
42 |
43 | oov=`cat $lang/oov.int` || exit 1;
44 | mkdir -p $dir/log
45 | echo $nj > $dir/num_jobs
46 | sdata=$data/split$nj
47 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
48 |
49 |
50 | for f in $srcdir/tree $srcdir/${iter}.mdl $data/wav.scp $lang/L.fst \
51 | $srcdir/conf/online_nnet2_decoding.conf; do
52 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
53 | done
54 |
55 | utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1;
56 | cp $srcdir/phones.txt $dir || exit 1;
57 | cp $srcdir/{tree,${iter}.mdl} $dir || exit 1;
58 |
59 | grep -v '^--endpoint' $srcdir/conf/online_nnet2_decoding.conf >$dir/feature.conf || exit 1;
60 |
61 |
62 | if [ -f $data/segments ]; then
63 | # note: in the feature extraction, because the program online2-wav-dump-features is sensitive to the
64 | # previous utterances within a speaker, we do the filtering after extracting the features.
65 | echo "$0 [info]: segments file exists: using that."
66 | feats="ark,s,cs:extract-segments scp:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt ark,s,cs:- ark:- |"
67 | else
68 | echo "$0 [info]: no segments file exists, using wav.scp."
69 | feats="ark,s,cs:online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt scp:$sdata/JOB/wav.scp ark:- |"
70 | fi
71 |
72 | echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
73 |
74 | tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
75 |
76 | $cmd JOB=1:$nj $dir/log/align.JOB.log \
77 | compile-train-graphs $dir/tree $srcdir/${iter}.mdl $lang/L.fst "$tra" ark:- \| \
78 | nnet-align-compiled $scale_opts --use-gpu=$use_gpu --beam=$beam --retry-beam=$retry_beam \
79 | $srcdir/${iter}.mdl ark:- "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
80 |
81 | echo "$0: done aligning data."
82 |
83 |
--------------------------------------------------------------------------------
/steps/online/nnet2/copy_data_dir.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2013-2014 Johns Hopkins University (author: Daniel Povey)
4 | # Apache 2.0
5 |
6 | # Warning, this script is deprecated, please use utils/data/modify_speaker_info.sh
7 |
8 | # This script is as utils/copy_data_dir.sh in that it copies a data-dir,
9 | # but it supports the --utts-per-spk-max option. If nonzero, it modifies
10 | # the utt2spk and spk2utt files by splitting each speaker into multiple
11 | # versions, so that each speaker has no more than --utts-per-spk-max
12 | # utterances.
13 |
14 | # begin configuration section
15 | utts_per_spk_max=-1
16 | # end configuration section
17 |
18 | . utils/parse_options.sh
19 |
20 | if [ $# != 2 ]; then
21 | echo "Usage: "
22 | echo " $0 [options] "
23 | echo "e.g.:"
24 | echo " $0 --utts-per-spk-max 2 data/train data/train-max2"
25 | echo "Options"
26 | echo " --utts-per-spk-max # number of utterances per speaker maximum,"
27 | echo " # default -1 (meaning no maximum). E.g. 2."
28 | exit 1;
29 | fi
30 |
31 |
32 | echo "$0: this script is deprecated, please use utils/data/modify_speaker_info.sh."
33 |
34 | export LC_ALL=C
35 |
36 | srcdir=$1
37 | destdir=$2
38 |
39 | if [ ! -f $srcdir/utt2spk ]; then
40 | echo "$0: no such file $srcdir/utt2spk"
41 | exit 1;
42 | fi
43 |
44 | set -e;
45 | set -o pipefail
46 |
47 | mkdir -p $destdir
48 |
49 |
50 | if [ "$utts_per_spk_max" != -1 ]; then
51 | # create spk2utt file with reduced number of utterances per speaker.
52 | awk -v max=$utts_per_spk_max '{ n=2; count=0;
53 | while(n<=NF) {
54 | int_max=int(max)+ (rand() < (max-int(max))?1:0);
55 | nmax=n+int_max; count++; printf("%s-%06x", $1, count);
56 | for (;n$destdir/spk2utt
58 | utils/spk2utt_to_utt2spk.pl <$destdir/spk2utt >$destdir/utt2spk
59 |
60 | if [ -f $srcdir/cmvn.scp ]; then
61 | # below, the first apply_map command outputs a cmvn.scp indexed by utt;
62 | # the second one outputs a cmvn.scp indexed by new speaker-id.
63 | utils/apply_map.pl -f 2 $srcdir/cmvn.scp <$srcdir/utt2spk | \
64 | utils/apply_map.pl -f 1 $destdir/utt2spk | sort | uniq > $destdir/cmvn.scp
65 | echo "$0: mapping cmvn.scp, but you may want to recompute it if it's needed,"
66 | echo " as it would probably change."
67 | fi
68 | if [ -f $srcdir/spk2gender ]; then
69 | utils/apply_map.pl -f 2 $srcdir/spk2gender <$srcdir/utt2spk | \
70 | utils/apply_map.pl -f 1 $destdir/utt2spk | sort | uniq >$destdir/spk2gender
71 | fi
72 | else
73 | cp $srcdir/spk2utt $srcdir/utt2spk $destdir/
74 | [ -f $srcdir/spk2gender ] && cp $srcdir/spk2gender $destdir/
75 | [ -f $srcdir/cmvn.scp ] && cp $srcdir/cmvn.scp $destdir/
76 | fi
77 |
78 |
79 | for f in feats.scp segments wav.scp reco2file_and_channel text stm glm ctm; do
80 | [ -f $srcdir/$f ] && cp $srcdir/$f $destdir/
81 | done
82 |
83 | echo "$0: copied data from $srcdir to $destdir, with --utts-per-spk-max $utts_per_spk_max"
84 | opts=
85 | [ ! -f $srcdir/feats.scp ] && opts="--no-feats"
86 | [ ! -f $srcdir/text ] && opts="$opts --no-text"
87 | [ ! -f $srcdir/wav.scp ] && opts="$opts --no-wav"
88 |
89 | utils/validate_data_dir.sh $opts $destdir
90 |
--------------------------------------------------------------------------------
/steps/online/nnet2/prepare_online_decoding_transfer.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2014 Johns Hopkins University (Author: Daniel Povey)
4 | # Apache 2.0
5 |
6 | # This is as prepare_online_decoding.sh, but for transfer learning-- the case where
7 | # you have an existing online-decoding directory where you have all the feature
8 | # stuff, that you don't want to change, but
9 |
10 | # Begin configuration.
11 | stage=0 # This allows restarting after partway, when something when wrong.
12 | cmd=run.pl
13 | iter=final
14 | # End configuration.
15 |
16 | echo "$0 $@" # Print the command line for logging
17 |
18 | [ -f path.sh ] && . ./path.sh;
19 | . parse_options.sh || exit 1;
20 |
21 | if [ $# -ne 4 ]; then
22 | echo "Usage: $0 [options] "
23 | echo "e.g.: $0 exp_other/nnet2_online/nnet_a_online data/lang exp/nnet2_online/nnet_a exp/nnet2_online/nnet_a_online"
24 | echo "main options (for others, see top of script file)"
25 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs."
26 | echo " --config # config containing options"
27 | echo " --stage # stage to do partial re-run from."
28 | exit 1;
29 | fi
30 |
31 | online_src=$1
32 | lang=$2
33 | nnet_src=$3
34 | dir=$4
35 |
36 | for f in $online_src/conf/online_nnet2_decoding.conf $nnet_src/final.mdl $nnet_src/tree $lang/words.txt; do
37 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
38 | done
39 |
40 |
41 | dir_as_given=$dir
42 | dir=$(readlink -f $dir) # Convert $dir to an absolute pathname, so that the
43 | # configuration files we write will contain absolute
44 | # pathnames.
45 | mkdir -p $dir/conf $dir/log
46 |
47 | utils/lang/check_phones_compatible.sh $lang/phones.txt $nnet_src/phones.txt || exit 1;
48 | cp $nnet_src/phones.txt $dir || exit 1;
49 |
50 | cp $nnet_src/tree $dir/ || exit 1;
51 |
52 | cp $nnet_src/$iter.mdl $dir/ || exit 1;
53 |
54 |
55 | # There are a bunch of files that we will need to copy from $online_src, because
56 | # we're aiming to have one self-contained directory that has everything in it.
57 | mkdir -p $dir/ivector_extractor
58 | cp -r $online_src/ivector_extractor/* $dir/ivector_extractor
59 |
60 | [ ! -d $online_src/conf ] && \
61 | echo "Expected directory $online_src/conf to exist" && exit 1;
62 |
63 | for x in $online_src/conf/*conf; do
64 | # Replace directory name starting $online_src with those starting with $dir.
65 | # We actually replace any directory names ending in /ivector_extractor/ or /conf/
66 | # with $dir/ivector_extractor/ or $dir/conf/
67 | cat $x | perl -ape "s:=(.+)/(ivector_extractor|conf)/:=$dir/\$2/:;" > $dir/conf/$(basename $x)
68 | done
69 |
70 |
71 | # modify the silence-phones in the config; these are only used for the
72 | # endpointing code.
73 | cp $dir/conf/online_nnet2_decoding.conf{,.tmp}
74 | silphones=$(cat $lang/phones/silence.csl) || exit 1;
75 | cat $dir/conf/online_nnet2_decoding.conf.tmp | \
76 | sed s/silence-phones=.\\+/silence-phones=$silphones/ > $dir/conf/online_nnet2_decoding.conf
77 | rm $dir/conf/online_nnet2_decoding.conf.tmp
78 |
79 | echo "$0: formatted neural net for online decoding in $dir_as_given"
80 |
--------------------------------------------------------------------------------
/steps/paste_feats.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2014 Brno University of Technology (Author: Karel Vesely)
4 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
5 | # Apache 2.0
6 | # This script appends the features in two or more data directories.
7 |
8 | # To be run from .. (one directory up from here)
9 | # see ../run.sh for example
10 |
11 | # Begin configuration section.
12 | cmd=run.pl
13 | nj=4
14 | length_tolerance=10 # length tolerance in frames (trim to shortest)
15 | compress=true
16 | # End configuration section.
17 |
18 | echo "$0 $@" # Print the command line for logging
19 |
20 | if [ -f path.sh ]; then . ./path.sh; fi
21 | . parse_options.sh || exit 1;
22 |
23 | if [ $# -lt 5 ]; then
24 | echo "usage: $0 [options] [] ";
25 | echo "e.g.: $0 data/train_mfcc data/train_bottleneck data/train_combined exp/append_mfcc_plp mfcc"
26 | echo "options: "
27 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs."
28 | exit 1;
29 | fi
30 |
31 | data_src_arr=(${@:1:$(($#-3))}) #array of source data-dirs
32 | data=${@: -3: 1}
33 | logdir=${@: -2: 1}
34 | ark_dir=${@: -1: 1} #last arg.
35 |
36 | data_src_first=${data_src_arr[0]} # get 1st src dir
37 |
38 | # make $ark_dir an absolute pathname.
39 | ark_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $ark_dir ${PWD}`
40 |
41 | for data_src in ${data_src_arr[@]}; do
42 | utils/split_data.sh $data_src $nj || exit 1;
43 | done
44 |
45 | mkdir -p $ark_dir $logdir
46 |
47 | mkdir -p $data
48 | cp $data_src_first/* $data/ 2>/dev/null # so we get the other files, such as utt2spk.
49 | rm $data/cmvn.scp 2>/dev/null
50 | rm $data/feats.scp 2>/dev/null
51 |
52 | # use "name" as part of name of the archive.
53 | name=`basename $data`
54 |
55 | # get list of source scp's for pasting
56 | data_src_args=
57 | for data_src in ${data_src_arr[@]}; do
58 | data_src_args="$data_src_args scp:$data_src/split$nj/JOB/feats.scp"
59 | done
60 |
61 | for n in $(seq $nj); do
62 | # the next command does nothing unless $ark_dir/storage/ exists, see
63 | # utils/create_data_link.pl for more info.
64 | utils/create_data_link.pl $ark_dir/pasted_$name.$n.ark
65 | done
66 |
67 | $cmd JOB=1:$nj $logdir/append.JOB.log \
68 | paste-feats --length-tolerance=$length_tolerance $data_src_args ark:- \| \
69 | copy-feats --compress=$compress ark:- \
70 | ark,scp:$ark_dir/pasted_$name.JOB.ark,$ark_dir/pasted_$name.JOB.scp || exit 1;
71 |
72 | # concatenate the .scp files together.
73 | for ((n=1; n<=nj; n++)); do
74 | cat $ark_dir/pasted_$name.$n.scp >> $data/feats.scp || exit 1;
75 | done > $data/feats.scp || exit 1;
76 |
77 |
78 | nf=`cat $data/feats.scp | wc -l`
79 | nu=`cat $data/utt2spk | wc -l`
80 | if [ $nf -ne $nu ]; then
81 | echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
82 | echo "consider using utils/fix_data_dir.sh $data"
83 | fi
84 |
85 | echo "Succeeded pasting features for $name into $data"
86 |
--------------------------------------------------------------------------------
/steps/score_kaldi_compare.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2016 Nicolas Serrano
3 | # Apache 2.0
4 |
5 | [ -f ./path.sh ] && . ./path.sh
6 |
7 | # begin configuration section.
8 | cmd=run.pl
9 | replications=10000
10 | #end configuration section.
11 |
12 | echo "$0 $@" # Print the command line for logging
13 | [ -f ./path.sh ] && . ./path.sh
14 | . parse_options.sh || exit 1;
15 |
16 | if [ $# -ne 3 ]; then
17 | echo "Usage: local/score_compare.sh [--cmd (run.pl|queue.pl...)] "
18 | echo " Options:"
19 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
20 | echo " --replications # number of bootstrap evaluation to compute confidence."
21 | exit 1;
22 | fi
23 |
24 | dir1=$1
25 | dir2=$2
26 | dir_compare=$3
27 |
28 | mkdir -p $dir_compare/log
29 |
30 | for d in $dir1 $dir2; do
31 | for f in test_filt.txt best_wer; do
32 | [ ! -f $d/$f ] && echo "score_compare.sh: no such file $d/$f" && exit 1;
33 | done
34 | done
35 |
36 |
37 | best_wer_file1=$(awk '{print $NF}' $dir1/best_wer)
38 | best_transcript_file1=$(echo $best_wer_file1 | sed -e 's=.*/wer_==' | \
39 | awk -v FS='_' -v dir=$dir1 '{print dir"/penalty_"$2"/"$1".txt"}')
40 |
41 | best_wer_file2=$(awk '{print $NF}' $dir2/best_wer)
42 | best_transcript_file2=$(echo $best_wer_file2 | sed -e 's=.*/wer_==' | \
43 | awk -v FS='_' -v dir=$dir2 '{print dir"/penalty_"$2"/"$1".txt"}')
44 |
45 | $cmd $dir_compare/log/score_compare.log \
46 | compute-wer-bootci --replications=$replications \
47 | ark:$dir1/test_filt.txt ark:$best_transcript_file1 ark:$best_transcript_file2 \
48 | '>' $dir_compare/wer_bootci_comparison || exit 1;
49 |
50 | exit 0;
51 |
--------------------------------------------------------------------------------
/steps/search_index.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen)
4 | # Apache 2.0
5 |
6 | # Begin configuration section.
7 | cmd=run.pl
8 | nbest=-1
9 | strict=true
10 | indices_dir=
11 | # End configuration section.
12 |
13 | echo "$0 $@" # Print the command line for logging
14 |
15 | [ -f ./path.sh ] && . ./path.sh; # source the path.
16 | . parse_options.sh || exit 1;
17 |
18 | if [ $# != 2 ]; then
19 | echo "Usage: steps/search_index.sh [options] "
20 | echo " e.g.: steps/search_index.sh data/kws exp/sgmm2_5a_mmi/decode/kws/"
21 | echo ""
22 | echo "main options (for others, see top of script file)"
23 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs."
24 | echo " --nbest # return n best results. (-1 means all)"
25 | echo " --indices-dir # where the indices should be stored, by default it will be in "
26 | exit 1;
27 | fi
28 |
29 |
30 | kwsdatadir=$1;
31 | kwsdir=$2;
32 |
33 | if [ -z $indices_dir ] ; then
34 | indices_dir=$kwsdir
35 | fi
36 |
37 | mkdir -p $kwsdir/log;
38 | nj=`cat $indices_dir/num_jobs` || exit 1;
39 | keywords=$kwsdatadir/keywords.fsts;
40 |
41 | for f in $indices_dir/index.1.gz $keywords; do
42 | [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1;
43 | done
44 |
45 | $cmd JOB=1:$nj $kwsdir/log/search.JOB.log \
46 | kws-search --strict=$strict --negative-tolerance=-1 \
47 | "ark:gzip -cdf $indices_dir/index.JOB.gz|" ark:$keywords \
48 | "ark,t:|int2sym.pl -f 2 $kwsdatadir/utter_id > $kwsdir/result.JOB" || exit 1;
49 |
50 | exit 0;
51 |
--------------------------------------------------------------------------------
/steps/select_feats.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2014 Johns Hopkins University (Author: Daniel Povey)
4 | # Apache 2.0
5 | # This script selects some specified dimensions of the features in the
6 | # input data directory.
7 |
8 | # To be run from .. (one directory up from here)
9 | # see ../run.sh for example
10 |
11 | # Begin configuration section.
12 | cmd=run.pl
13 | nj=4
14 | compress=true
15 | # End configuration section.
16 |
17 | echo "$0 $@" # Print the command line for logging
18 |
19 | if [ -f path.sh ]; then . ./path.sh; fi
20 | . parse_options.sh || exit 1;
21 |
22 | if [ $# -ne 5 ]; then
23 | echo "usage: $0 [options] ";
24 | echo "e.g.: $0 0-12 data/train_mfcc_pitch data/train_mfcconly exp/select_pitch_train mfcc"
25 | echo "options: "
26 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs."
27 | exit 1;
28 | fi
29 |
30 | selector="$1"
31 | data_in=$2
32 | data=$3
33 | logdir=$4
34 | ark_dir=$5
35 |
36 | # make $ark_dir an absolute pathname.
37 | ark_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $ark_dir ${PWD}`
38 |
39 |
40 | utils/split_data.sh $data_in $nj || exit 1;
41 |
42 | mkdir -p $ark_dir $logdir
43 | mkdir -p $data
44 |
45 | cp $data_in/* $data/ 2>/dev/null # so we get the other files, such as utt2spk.
46 | rm $data/cmvn.scp 2>/dev/null
47 | rm $data/feats.scp 2>/dev/null
48 |
49 | # use "name" as part of name of the archive.
50 | name=`basename $data`
51 |
52 | for j in $(seq $nj); do
53 | # the next command does nothing unless $mfccdir/storage/ exists, see
54 | # utils/create_data_link.pl for more info.
55 | utils/create_data_link.pl $ark_dir/selected_$name.$j.ark
56 | done
57 |
58 | $cmd JOB=1:$nj $logdir/append.JOB.log \
59 | select-feats "$selector" scp:$data_in/split$nj/JOB/feats.scp ark:- \| \
60 | copy-feats --compress=$compress ark:- \
61 | ark,scp:$ark_dir/selected_$name.JOB.ark,$ark_dir/selected_$name.JOB.scp || exit 1;
62 |
63 | # concatenate the .scp files together.
64 | for ((n=1; n<=nj; n++)); do
65 | cat $ark_dir/selected_$name.$n.scp >> $data/feats.scp || exit 1;
66 | done > $data/feats.scp || exit 1;
67 |
68 |
69 | nf=`cat $data/feats.scp | wc -l`
70 | nu=`cat $data/utt2spk | wc -l`
71 | if [ $nf -ne $nu ]; then
72 | echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
73 | exit 1;
74 | fi
75 |
76 | echo "Succeeded selecting features for $name into $data"
77 |
--------------------------------------------------------------------------------
/steps/shift_feats.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2016 Vimal Manohar
4 | # Apache 2.0
5 |
6 | # This script shifts the feats in the input data directory and creates a
7 | # new directory _fs with shifted feats.
8 | # If the shift is negative, the initial frames get truncated.
9 | # If the shift is positive, the first frame is repeated.
10 | # Usually applicable for sequence training
11 |
12 | # To be run from .. (one directory up from here)
13 | # see ../run.sh for example
14 |
15 | # Begin configuration section.
16 | cmd=run.pl
17 | nj=4
18 | compress=true
19 | # End configuration section.
20 |
21 | echo "$0 $@" # Print the command line for logging
22 |
23 | if [ -f path.sh ]; then . ./path.sh; fi
24 | . parse_options.sh || exit 1;
25 |
26 | if [ $# -ne 4 ]; then
27 | echo "usage: $0 [options] ";
28 | echo "e.g.: $0 -1 data/train exp/shift-1_train mfcc"
29 | echo "options: "
30 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs."
31 | exit 1;
32 | fi
33 |
34 | num_frames_shift=$1
35 | data_in=$2
36 | logdir=$3
37 | featdir=$4
38 |
39 | utt_prefix="fs$num_frames_shift-"
40 | spk_prefix="fs$num_frames_shift-"
41 |
42 | # make $featdir an absolute pathname.
43 | featdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $featdir ${PWD}`
44 |
45 | utils/split_data.sh $data_in $nj || exit 1;
46 |
47 | data=${data_in}_fs$num_frames_shift
48 |
49 | mkdir -p $featdir $logdir
50 | mkdir -p $data
51 |
52 | utils/copy_data_dir.sh --utt-prefix $utt_prefix --spk-prefix $spk_prefix \
53 | $data_in $data
54 |
55 | rm $data/feats.scp 2>/dev/null
56 |
57 | # use "name" as part of name of the archive.
58 | name=`basename $data`
59 |
60 | for j in $(seq $nj); do
61 | # the next command does nothing unless $mfccdir/storage/ exists, see
62 | # utils/create_data_link.pl for more info.
63 | utils/create_data_link.pl $featdir/raw_feats_$name.$j.ark
64 | done
65 |
66 | $cmd JOB=1:$nj $logdir/shift.JOB.log \
67 | shift-feats --shift=$num_frames_shift \
68 | scp:$data_in/split$nj/JOB/feats.scp ark:- \| \
69 | copy-feats --compress=$compress ark:- \
70 | ark,scp:$featdir/raw_feats_$name.JOB.ark,$featdir/raw_feats_$name.JOB.scp || exit 1;
71 |
72 | # concatenate the .scp files together.
73 | for ((n=1; n<=nj; n++)); do
74 | cat $featdir/raw_feats_$name.$n.scp
75 | done | awk -v nfs=$num_frames_shift '{print "fs"nfs"-"$0}'>$data/feats.scp || exit 1;
76 |
77 | nf=`cat $data/feats.scp | wc -l`
78 | nu=`cat $data/utt2spk | wc -l`
79 | if [ $nf -ne $nu ]; then
80 | echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
81 | exit 1;
82 | fi
83 |
84 | echo "Succeeded shifting features for $name into $data"
85 |
86 |
--------------------------------------------------------------------------------
/steps/tandem/decode_si.sh:
--------------------------------------------------------------------------------
1 | decode.sh
--------------------------------------------------------------------------------
/steps/train_nnet.sh:
--------------------------------------------------------------------------------
1 | nnet/train.sh
--------------------------------------------------------------------------------
/steps/word_align_lattices.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright Johns Hopkins University (Author: Daniel Povey) 2012
4 | # Apache 2.0.
5 |
6 | # Begin configuration section.
7 | silence_label=0
8 | cmd=run.pl
9 | # End configuration section.
10 |
11 | echo "$0 $@" # Print the command line for logging
12 |
13 | for x in `seq 2`; do
14 | [ "$1" == "--silence-label" ] && silence_label=$2 && shift 2;
15 | [ "$1" == "--cmd" ] && cmd="$2" && shift 2;
16 | done
17 |
18 | if [ $# != 3 ]; then
19 | echo "Word-align lattices (make the arcs sync up with words)"
20 | echo ""
21 | echo "Usage: $0 [options] "
22 | echo "options: [--cmd (run.pl|queue.pl [queue opts])] [--silence-label ]"
23 | exit 1;
24 | fi
25 |
26 | . ./path.sh || exit 1;
27 |
28 | lang=$1
29 | indir=$2
30 | outdir=$3
31 |
32 | mdl=`dirname $indir`/final.mdl
33 | wbfile=$lang/phones/word_boundary.int
34 |
35 | for f in $mdl $wbfile $indir/num_jobs; do
36 | [ ! -f $f ] && echo "word_align_lattices.sh: no such file $f" && exit 1;
37 | done
38 |
39 | mkdir -p $outdir/log
40 |
41 |
42 | cp $indir/num_jobs $outdir;
43 | nj=`cat $indir/num_jobs`
44 |
45 | $cmd JOB=1:$nj $outdir/log/align.JOB.log \
46 | lattice-align-words --silence-label=$silence_label --test=true \
47 | $wbfile $mdl "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c >$outdir/lat.JOB.gz" || exit 1;
48 |
49 |
--------------------------------------------------------------------------------
/utils/add_disambig.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 |
18 | # Adds some specified number of disambig symbols to a symbol table.
19 | # Adds these as #1, #2, etc.
20 | # If the --include-zero option is specified, includes an extra one
21 | # #0.
22 |
23 | $include_zero = 0;
24 | if($ARGV[0] eq "--include-zero") {
25 | $include_zero = 1;
26 | shift @ARGV;
27 | }
28 |
29 | if(@ARGV != 2) {
30 | die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
31 | }
32 |
33 |
34 | $input = $ARGV[0];
35 | $nsyms = $ARGV[1];
36 |
37 | open(F, "<$input") || die "Opening file $input";
38 |
39 | while() {
40 | @A = split(" ", $_);
41 | @A == 2 || die "Bad line $_";
42 | $lastsym = $A[1];
43 | print;
44 | }
45 |
46 | if(!defined($lastsym)){
47 | die "Empty symbol file?";
48 | }
49 |
50 | if($include_zero) {
51 | $lastsym++;
52 | print "#0 $lastsym\n";
53 | }
54 |
55 | for($n = 1; $n <= $nsyms; $n++) {
56 | $y = $n + $lastsym;
57 | print "#$n $y\n";
58 | }
59 |
--------------------------------------------------------------------------------
/utils/analyze_segments.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2015 GoVivace Inc. (Author: Nagendra Kumar Goel)
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Analyze a segments file and print important stats on it.
18 |
19 | $dur = $total = 0;
20 | $maxDur = 0;
21 | $minDur = 9999999999;
22 | $n = 0;
23 | while(<>){
24 | chomp;
25 | @t = split(/\s+/);
26 | $dur = $t[3] - $t[2];
27 | $total += $dur;
28 | if ($dur > $maxDur) {
29 | $maxSegId = $t[0];
30 | $maxDur = $dur;
31 | }
32 | if ($dur < $minDur) {
33 | $minSegId = $t[0];
34 | $minDur = $dur;
35 | }
36 | $n++;
37 | }
38 | $avg=$total/$n;
39 | $hrs = $total/3600;
40 | print "Total $hrs hours of data\n";
41 | print "Average segment length $avg seconds\n";
42 | print "Segment $maxSegId has length of $maxDur seconds\n";
43 | print "Segment $minSegId has length of $minDur seconds\n";
44 |
--------------------------------------------------------------------------------
/utils/apply_map.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | use warnings; #sed replacement for -w perl parameter
3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
4 | # Apache 2.0.
5 |
6 | # This program is a bit like ./sym2int.pl in that it applies a map
7 | # to things in a file, but it's a bit more general in that it doesn't
8 | # assume the things being mapped to are single tokens, they could
9 | # be sequences of tokens. See the usage message.
10 |
11 |
12 | if (@ARGV > 0 && $ARGV[0] eq "-f") {
13 | shift @ARGV;
14 | $field_spec = shift @ARGV;
15 | if ($field_spec =~ m/^\d+$/) {
16 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
17 | }
18 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
19 | if ($1 ne "") {
20 | $field_begin = $1 - 1; # Change to zero-based indexing.
21 | }
22 | if ($2 ne "") {
23 | $field_end = $2 - 1; # Change to zero-based indexing.
24 | }
25 | }
26 | if (!defined $field_begin && !defined $field_end) {
27 | die "Bad argument to -f option: $field_spec";
28 | }
29 | }
30 |
31 | # Mapping is obligatory
32 | $permissive = 0;
33 | if (@ARGV > 0 && $ARGV[0] eq '--permissive') {
34 | shift @ARGV;
35 | # Mapping is optional (missing key is printed to output)
36 | $permissive = 1;
37 | }
38 |
39 | if(@ARGV != 1) {
40 | print STDERR "Invalid usage: " . join(" ", @ARGV) . "\n";
41 | print STDERR "Usage: apply_map.pl [options] map output\n" .
42 | "options: [-f ]\n" .
43 | "Applies the map 'map' to all input text, where each line of the map\n" .
44 | "is interpreted as a map from the first field to the list of the other fields\n" .
45 | "Note: can look like 4-5, or 4-, or 5-, or 1, it means the field\n" .
46 | "range in the input to apply the map to.\n" .
47 | "e.g.: echo A B | apply_map.pl a.txt\n" .
48 | "where a.txt is:\n" .
49 | "A a1 a2\n" .
50 | "B b\n" .
51 | "will produce:\n" .
52 | "a1 a2 b\n";
53 | exit(1);
54 | }
55 |
56 | ($map) = @ARGV;
57 | open(M, "<$map") || die "Error opening map file $map: $!";
58 |
59 | while () {
60 | @A = split(" ", $_);
61 | @A >= 1 || die "apply_map.pl: empty line.";
62 | $i = shift @A;
63 | $o = join(" ", @A);
64 | $map{$i} = $o;
65 | }
66 |
67 | while() {
68 | @A = split(" ", $_);
69 | for ($x = 0; $x < @A; $x++) {
70 | if ( (!defined $field_begin || $x >= $field_begin)
71 | && (!defined $field_end || $x <= $field_end)) {
72 | $a = $A[$x];
73 | if (!defined $map{$a}) {
74 | if (!$permissive) {
75 | die "apply_map.pl: undefined key $a\n";
76 | } else {
77 | print STDERR "apply_map.pl: warning! missing key $a\n";
78 | }
79 | } else {
80 | $A[$x] = $map{$a};
81 | }
82 | }
83 | }
84 | print join(" ", @A) . "\n";
85 | }
86 |
--------------------------------------------------------------------------------
/utils/best_wer.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright 2010-2011 Microsoft Corporation
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | # To be run from one directory above this script.
19 |
20 | perl -e 'while(<>){
21 | s/\|(\d)/\| $1/g; s/(\d)\|/$1 \|/g;
22 | if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool.
23 | elsif (m: (Mean|Sum/Avg|)\s*\|\s*\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|:
24 | && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } } # sclite.
25 | if (defined $bestline){ print $bestline; } ' | \
26 | awk 'BEGIN{ FS="%WER"; } { if(NF == 2) { print FS$2" "$1; } else { print $0; }}' | \
27 | awk 'BEGIN{ FS="Sum/Avg"; } { if(NF == 2) { print $2" "$1; } else { print $0; }}' | \
28 | awk '{ if($1!~/%WER/) { print "%WER "$9" "$0; } else { print $0; }}' | \
29 | sed -e 's|\s\s*| |g' -e 's|\:$||' -e 's|\:\s*\|\s*$||'
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/utils/build_const_arpa_lm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2014 Guoguo Chen
4 | # Apache 2.0
5 |
6 | # This script reads in an Arpa format language model, and converts it into the
7 | # ConstArpaLm format language model.
8 |
9 | # begin configuration section
10 | # end configuration section
11 |
12 | [ -f path.sh ] && . ./path.sh;
13 |
14 | . utils/parse_options.sh
15 |
16 | if [ $# != 3 ]; then
17 | echo "Usage: "
18 | echo " $0 [options] "
19 | echo "e.g.:"
20 | echo " $0 data/local/lm/3-gram.full.arpa.gz data/lang/ data/lang_test_tgmed"
21 | echo "Options"
22 | exit 1;
23 | fi
24 |
25 | export LC_ALL=C
26 |
27 | arpa_lm=$1
28 | old_lang=$2
29 | new_lang=$3
30 |
31 | mkdir -p $new_lang
32 |
33 | mkdir -p $new_lang
34 | cp -r $old_lang/* $new_lang
35 |
36 | unk=`cat $new_lang/oov.int`
37 | bos=`grep "" $new_lang/words.txt | awk '{print $2}'`
38 | eos=`grep "" $new_lang/words.txt | awk '{print $2}'`
39 | if [[ -z $bos || -z $eos ]]; then
40 | echo "$0: and symbols are not in $new_lang/words.txt"
41 | exit 1
42 | fi
43 |
44 |
45 | arpa-to-const-arpa --bos-symbol=$bos \
46 | --eos-symbol=$eos --unk-symbol=$unk \
47 | "gunzip -c $arpa_lm | utils/map_arpa_lm.pl $new_lang/words.txt|" $new_lang/G.carpa || exit 1;
48 |
49 | exit 0;
50 |
--------------------------------------------------------------------------------
/utils/convert_ctm.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
4 |
5 | # This takes as standard input a ctm file that's "relative to the utterance",
6 | # i.e. times are measured relative to the beginning of the segments, and it
7 | # uses a "segments" file (format:
8 | # utterance-id recording-id start-time end-time
9 | # ) and a "reco2file_and_channel" file (format:
10 | # recording-id basename-of-file
11 |
12 | $skip_unknown=undef;
13 | if ( $ARGV[0] eq "--skip-unknown" ) {
14 | $skip_unknown=1;
15 | shift @ARGV;
16 | }
17 |
18 | if (@ARGV < 2 || @ARGV > 3) {
19 | print STDERR "Usage: convert_ctm.pl [] > real-ctm\n";
20 | exit(1);
21 | }
22 |
23 | $segments = shift @ARGV;
24 | $reco2file_and_channel = shift @ARGV;
25 |
26 | open(S, "<$segments") || die "opening segments file $segments";
27 | while() {
28 | @A = split(" ", $_);
29 | @A == 4 || die "Bad line in segments file: $_";
30 | ($utt, $recording_id, $begin_time, $end_time) = @A;
31 | $utt2reco{$utt} = $recording_id;
32 | $begin{$utt} = $begin_time;
33 | $end{$utt} = $end_time;
34 | }
35 | close(S);
36 | open(R, "<$reco2file_and_channel") || die "open reco2file_and_channel file $reco2file_and_channel";
37 | while() {
38 | @A = split(" ", $_);
39 | @A == 3 || die "Bad line in reco2file_and_channel file: $_";
40 | ($recording_id, $file, $channel) = @A;
41 | $reco2file{$recording_id} = $file;
42 | $reco2channel{$recording_id} = $channel;
43 | }
44 |
45 |
46 | # Now process the ctm file, which is either the standard input or the third
47 | # command-line argument.
48 | $num_done = 0;
49 | while(<>) {
50 | @A= split(" ", $_);
51 | ( @A == 5 || @A == 6 ) || die "Unexpected ctm format: $_";
52 | # lines look like:
53 | # 1 [ confidence ]
54 | ($utt, $one, $wbegin, $wlen, $w, $conf) = @A;
55 | $reco = $utt2reco{$utt};
56 | if (!defined $reco) {
57 | next if defined $skip_unknown;
58 | die "Utterance-id $utt not defined in segments file $segments";
59 | }
60 | $file = $reco2file{$reco};
61 | $channel = $reco2channel{$reco};
62 | if (!defined $file || !defined $channel) {
63 | die "Recording-id $reco not defined in reco2file_and_channel file $reco2file_and_channel";
64 | }
65 | $b = $begin{$utt};
66 | $e = $end{$utt};
67 | $wbegin_r = $wbegin + $b; # Make it relative to beginning of the recording.
68 | $wbegin_r = sprintf("%.2f", $wbegin_r);
69 | $wlen = sprintf("%.2f", $wlen);
70 | if (defined $conf) {
71 | $line = "$file $channel $wbegin_r $wlen $w $conf\n";
72 | } else {
73 | $line = "$file $channel $wbegin_r $wlen $w\n";
74 | }
75 | if ($wbegin_r + $wlen > $e + 0.01) {
76 | print STDERR "Warning: word appears to be past end of recording; line is $line";
77 | }
78 | print $line; # goes to stdout.
79 | $num_done++;
80 | }
81 |
82 | if ($num_done == 0) { exit 1; } else { exit 0; }
83 |
84 | __END__
85 |
86 | # Test example [also test it without the 0.5's]
87 | echo utt reco 10.0 20.0 > segments
88 | echo reco file A > reco2file_and_channel
89 | echo utt 1 8.0 1.0 word 0.5 > ctm_in
90 | echo file A 18.00 1.00 word 0.5 > ctm_out
91 | utils/convert_ctm.pl segments reco2file_and_channel ctm_in | cmp - ctm_out || echo error
92 | rm segments reco2file_and_channel ctm_in ctm_out
93 |
94 |
95 |
96 |
97 |
--------------------------------------------------------------------------------
/utils/convert_slf_parallel.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright Brno University of Technology (Author: Karel Vesely) 2014. Apache 2.0.
3 |
4 | # This script converts lattices to HTK format compatible with other toolkits.
5 | # We can choose to put words to nodes or arcs, as both is valid in the SLF format.
6 |
7 | # begin configuration section.
8 | cmd=run.pl
9 | dirname=lats-in-htk-slf
10 | parallel_opts="-tc 50" # We should limit disk stress
11 | word_to_node=false # Words in arcs or nodes? [default:arcs]
12 | #end configuration section.
13 |
14 | echo "$0 $@"
15 |
16 | [ -f ./path.sh ] && . ./path.sh
17 | . parse_options.sh || exit 1;
18 |
19 | if [ $# -ne 3 ]; then
20 | echo "Usage: $0 [options] "
21 | echo " Options:"
22 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
23 | echo " --word-to-link (true|false) # put word symbols on links or nodes."
24 | echo " --parallel-opts STR # parallelization options (def.: '-tc 50')."
25 | echo "e.g.:"
26 | echo "$0 data/dev data/lang exp/tri4a/decode_dev"
27 | exit 1;
28 | fi
29 |
30 | data=$1
31 | lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
32 | dir=$3
33 |
34 | model=$(dirname $dir)/final.mdl # assume model one level up from decoding dir.
35 |
36 | for f in $lang/words.txt $lang/phones/align_lexicon.int $model $dir/lat.1.gz; do
37 | [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
38 | done
39 |
40 | [ ! -d $dir/$dirname/log ] && mkdir -p $dir/$dirname
41 |
42 | echo "$0: Converting lattices into '$dir/$dirname'"
43 |
44 | # Words in arcs or nodes? [default:nodes]
45 | word_to_link_arg=
46 | $word_to_node && word_to_node_arg="--word-to-node"
47 |
48 | nj=$(cat $dir/num_jobs)
49 |
50 | # convert the lattices (individually, gzipped)
51 | $cmd $parallel_opts JOB=1:$nj $dir/$dirname/log/lat_convert.JOB.log \
52 | mkdir -p $dir/$dirname/JOB/ '&&' \
53 | lattice-align-words-lexicon --output-error-lats=true --output-if-empty=true \
54 | $lang/phones/align_lexicon.int $model "ark:gunzip -c $dir/lat.JOB.gz |" ark,t:- \| \
55 | utils/int2sym.pl -f 3 $lang/words.txt \| \
56 | utils/convert_slf.pl $word_to_node_arg - $dir/$dirname/JOB/ || exit 1
57 |
58 | # make list of lattices
59 | find -L $PWD/$dir/$dirname -name *.lat.gz > $dir/$dirname/lat_htk.scp || exit 1
60 |
61 | # check number of lattices:
62 | nseg=$(cat $data/segments | wc -l)
63 | nlat_out=$(cat $dir/$dirname/lat_htk.scp | wc -l)
64 | echo "segments $nseg, saved-lattices $nlat_out"
65 | #
66 | [ $nseg -ne $nlat_out ] && echo "WARNING: missing $((nseg-nlat_out)) lattices for some segments!" \
67 | && exit 1
68 |
69 | echo "success, converted lats to HTK : $PWD/$dir/$dirname/lat_htk.scp"
70 | exit 0
71 |
72 |
--------------------------------------------------------------------------------
/utils/create_split_dir.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | # Copyright 2013 Guoguo Chen
4 | # Apache 2.0.
5 | #
6 | # This script creates storage directories on different file systems, and creates
7 | # symbolic links to those directories. For example, a command
8 | #
9 | # utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage
10 | #
11 | # will mkdir -p all of those directories, and will create links
12 | #
13 | # egs/storage/1 -> /export/gpu-03/egs/storage
14 | # egs/storage/2 -> /export/gpu-03/egs/storage
15 | # ...
16 | #
17 | use strict;
18 | use warnings;
19 | use File::Spec;
20 | use Getopt::Long;
21 |
22 | my $Usage = <
28 | e.g.: utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage
29 |
30 | Allowed options:
31 | --suffix : Common suffix to (string, default = "")
32 |
33 | See also create_data_link.pl, which is intended to work with the resulting
34 | directory structure, and remove_data_links.sh
35 | EOU
36 |
37 | my $suffix="";
38 | GetOptions('suffix=s' => \$suffix);
39 |
40 | if (@ARGV < 2) {
41 | die $Usage;
42 | }
43 |
44 | my $ans = 1;
45 |
46 | my $dir = pop(@ARGV);
47 | system("mkdir -p $dir 2>/dev/null");
48 |
49 | my @all_actual_storage = ();
50 | foreach my $file (@ARGV) {
51 | push @all_actual_storage, File::Spec->rel2abs($file . "/" . $suffix);
52 | }
53 |
54 | my $index = 1;
55 | foreach my $actual_storage (@all_actual_storage) {
56 | my $pseudo_storage = "$dir/$index";
57 |
58 | # If the symbolic link already exists, delete it.
59 | if (-l $pseudo_storage) {
60 | print STDERR "$0: link $pseudo_storage already exists, not overwriting.\n";
61 | $index++;
62 | next;
63 | }
64 |
65 | # Create the destination directory and make the link.
66 | system("mkdir -p $actual_storage 2>/dev/null");
67 | if ($? != 0) {
68 | print STDERR "$0: error creating directory $actual_storage\n";
69 | exit(1);
70 | }
71 | { # create a README file for easier deletion.
72 | open(R, ">$actual_storage/README.txt");
73 | my $storage_dir = File::Spec->rel2abs($dir);
74 | print R "# This directory is linked from $storage_dir, as part of Kaldi striped data\n";
75 | print R "# The full list of directories where this data resides is:\n";
76 | foreach my $d (@all_actual_storage) {
77 | print R "$d\n";
78 | }
79 | close(R);
80 | }
81 | my $ret = symlink($actual_storage, $pseudo_storage);
82 |
83 | # Process the returned values
84 | $ans = $ans && $ret;
85 | if (! $ret) {
86 | print STDERR "Error linking $actual_storage to $pseudo_storage\n";
87 | }
88 |
89 | $index++;
90 | }
91 |
92 | exit($ans == 1 ? 0 : 1);
93 |
--------------------------------------------------------------------------------
/utils/data/combine_data.sh:
--------------------------------------------------------------------------------
1 | ../combine_data.sh
--------------------------------------------------------------------------------
/utils/data/copy_data_dir.sh:
--------------------------------------------------------------------------------
1 | ../copy_data_dir.sh
--------------------------------------------------------------------------------
/utils/data/fix_data_dir.sh:
--------------------------------------------------------------------------------
1 | ../fix_data_dir.sh
--------------------------------------------------------------------------------
/utils/data/get_frame_shift.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2016 Johns Hopkins University (author: Daniel Povey)
4 | # Apache 2.0
5 |
6 | # This script takes as input a data directory, such as data/train/, preferably
7 | # with utt2dur file already existing (or the utt2dur file will be created if
8 | # not), and it attempts to work out the approximate frame shift by comparing the
9 | # utt2dur with the output of feat-to-len on the feats.scp. It prints it out.
10 | # if the shift is very close to, but above, 0.01 (the normal frame shift) it
11 | # rounds it down.
12 |
13 | . utils/parse_options.sh
14 | . ./path.sh
15 |
16 | if [ $# != 1 ]; then
17 | echo "Usage: $0 "
18 | echo "e.g.:"
19 | echo " $0 data/train"
20 | echo "This script prints the frame-shift (e.g. 0.01) to the standard out."
21 | echo "If does not contain utt2dur, this script may call utils/data/get_utt2dur.sh,"
22 | echo "which will require write permission to "
23 | exit 1
24 | fi
25 |
26 | export LC_ALL=C
27 |
28 | dir=$1
29 |
30 |
31 | if [ ! -s $dir/utt2dur ]; then
32 | if [ ! -e $dir/wav.scp ] && [ ! -s $dir/segments ]; then
33 | echo "$0: neither $dir/wav.scp nor $dir/segments exist; assuming a frame shift of 0.01." 1>&2
34 | echo 0.01
35 | exit 0
36 | fi
37 | echo "$0: $dir/utt2dur does not exist: creating it" 1>&2
38 | utils/data/get_utt2dur.sh $dir 1>&2
39 | fi
40 |
41 | if [ ! -f $dir/feats.scp ]; then
42 | echo "$0: $dir/feats.scp does not exist" 1>&2
43 | exit 1
44 | fi
45 |
46 | temp=$(mktemp /tmp/tmp.XXXX)
47 |
48 | feat-to-len scp:$dir/feats.scp ark,t:- | head -n 10 > $temp
49 |
50 | if [ -z $temp ]; then
51 | echo "$0: error running feat-to-len" 1>&2
52 | exit 1
53 | fi
54 |
55 | head -n 10 $dir/utt2dur | paste - $temp | \
56 | awk '{ dur += $2; frames += $4; } END { shift = dur / frames; if (shift > 0.01 && shift < 0.0102) shift = 0.01; print shift; }' || exit 1;
57 |
58 | rm $temp
59 |
60 | exit 0
61 |
--------------------------------------------------------------------------------
/utils/data/get_num_frames.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script works out the approximate number of frames in a training directory.
4 | # This is sometimes needed by higher-level scripts
5 |
6 |
7 | if [ -f path.sh ]; then . ./path.sh; fi
8 | . parse_options.sh || exit 1;
9 |
10 | if [ $# -ne 1 ]; then
11 | (
12 | echo "Usage: $0 "
13 | echo "Prints the number of frames of data in the data-dir"
14 | ) 1>&2
15 | fi
16 |
17 | data=$1
18 |
19 | if [ ! -f $data/utt2dur ]; then
20 | utils/data/get_utt2dur.sh $data 1>&2 || exit 1
21 | fi
22 |
23 | frame_shift=$(utils/data/get_frame_shift.sh $data) || exit 1
24 |
25 | awk -v s=$frame_shift '{n += $2} END{print int(n / s)}' <$data/utt2dur
26 |
--------------------------------------------------------------------------------
/utils/data/get_segments_for_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script operates on a data directory, such as in data/train/,
4 | # and writes new segments to stdout. The file 'segments' maps from
5 | # utterance to time offsets into a recording, with the format:
6 | #
7 | # This script assumes utterance and recording ids are the same (i.e., that
8 | # wav.scp is indexed by utterance), and uses durations from 'utt2dur',
9 | # created if necessary by get_utt2dur.sh.
10 |
11 | . ./path.sh
12 |
13 | if [ $# != 1 ]; then
14 | echo "Usage: $0 [options] "
15 | echo "e.g.:"
16 | echo " $0 data/train > data/train/segments"
17 | exit 1
18 | fi
19 |
20 | data=$1
21 |
22 | if [ ! -f $data/utt2dur ]; then
23 | utils/data/get_utt2dur.sh $data 1>&2 || exit 1;
24 | fi
25 |
26 | # 0
27 | awk '{ print $1, $1, 0, $2 }' $data/utt2dur
28 |
29 | exit 0
30 |
--------------------------------------------------------------------------------
/utils/data/perturb_data_dir_speed.sh:
--------------------------------------------------------------------------------
1 | ../perturb_data_dir_speed.sh
--------------------------------------------------------------------------------
/utils/data/perturb_data_dir_speed_3way.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2016 Johns Hopkins University (author: Daniel Povey)
4 |
5 | # Apache 2.0
6 |
7 | # This script does the standard 3-way speed perturbing of
8 | # a data directory (it operates on the wav.scp).
9 |
10 | . utils/parse_options.sh
11 |
12 | if [ $# != 2 ]; then
13 | echo "Usage: perturb_data_dir_speed_3way.sh "
14 | echo "Applies standard 3-way speed perturbation using factors of 0.9, 1.0 and 1.1."
15 | echo "e.g.:"
16 | echo " $0 data/train data/train_sp"
17 | echo "Note: if /feats.scp already exists, this will refuse to run."
18 | exit 1
19 | fi
20 |
21 | srcdir=$1
22 | destdir=$2
23 |
24 | if [ ! -f $srcdir/wav.scp ]; then
25 | echo "$0: expected $srcdir/wav.scp to exist"
26 | exit 1
27 | fi
28 |
29 | if [ -f $destdir/feats.scp ]; then
30 | echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)"
31 | exit 1
32 | fi
33 |
34 | echo "$0: making sure the utt2dur file is present in ${srcdir}, because "
35 | echo "... obtaining it after speed-perturbing would be very slow, and"
36 | echo "... you might need it."
37 | utils/data/get_utt2dur.sh ${srcdir}
38 |
39 | utils/data/perturb_data_dir_speed.sh 0.9 ${srcdir} ${destdir}_speed0.9 || exit 1
40 | utils/data/perturb_data_dir_speed.sh 1.1 ${srcdir} ${destdir}_speed1.1 || exit 1
41 | utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1
42 |
43 | rm -r ${destdir}_speed0.9 ${destdir}_speed1.1
44 |
45 | echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir"
46 | utils/validate_data_dir.sh --no-feats $destdir
47 |
48 |
--------------------------------------------------------------------------------
/utils/data/perturb_data_dir_volume.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2016 Johns Hopkins University (author: Daniel Povey)
4 | # Apache 2.0
5 |
6 | # This script operates on a data directory, such as in data/train/, and modifies
7 | # the wav.scp to perturb the volume (typically useful for training data when
8 | # using systems that don't have cepstral mean normalization).
9 |
10 | . utils/parse_options.sh
11 |
12 | if [ $# != 1 ]; then
13 | echo "Usage: $0 "
14 | echo "e.g.:"
15 | echo " $0 data/train"
16 | exit 1
17 | fi
18 |
19 | export LC_ALL=C
20 |
21 | data=$1
22 |
23 | if [ ! -f $data/wav.scp ]; then
24 | echo "$0: Expected $data/wav.scp to exist"
25 | exit 1
26 | fi
27 |
28 | if grep -q "sox --vol" $data/wav.scp; then
29 | echo "$0: It looks like the data was already volume perturbed. Not doing anything."
30 | exit 0
31 | fi
32 |
33 | cat $data/wav.scp | python -c "
34 | import sys, os, subprocess, re, random
35 | random.seed(0)
36 | scale_low = 1.0/8
37 | scale_high = 2.0
38 | for line in sys.stdin.readlines():
39 | if len(line.strip()) == 0:
40 | continue
41 | # Handle three cases of rxfilenames appropriately; 'input piped command', 'file offset' and 'filename'
42 | if line.strip()[-1] == '|':
43 | print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high))
44 | elif re.search(':[0-9]+$', line.strip()) is not None:
45 | parts = line.split()
46 | print '{id} wav-copy {wav} - | sox --vol {vol} -t wav - -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high))
47 | else:
48 | parts = line.split()
49 | print '{id} sox --vol {vol} -t wav {wav} -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high))
50 | " > $data/wav.scp_scaled || exit 1;
51 |
52 | len1=$(cat $data/wav.scp | wc -l)
53 | len2=$(cat $data/wav.scp_scaled | wc -l)
54 | if [ "$len1" != "$len2" ]; then
55 | echo "$0: error detected: number of lines changed $len1 vs $len2";
56 | exit 1
57 | fi
58 |
59 | mv $data/wav.scp_scaled $data/wav.scp
60 |
61 | if [ -f $data/feats.scp ]; then
62 | echo "$0: $data/feats.scp exists; moving it to $data/.backup/ as it wouldn't be valid any more."
63 | mkdir -p $data/.backup/
64 | mv $data/feats.scp $data/.backup/
65 | fi
66 |
67 | echo "$0: added volume perturbation to the data in $data"
68 | exit 0
69 |
70 |
--------------------------------------------------------------------------------
/utils/data/remove_dup_utts.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Remove excess utterances once they appear more than a specified
4 | # number of times with the same transcription, in a data set.
5 | # E.g. useful for removing excess "uh-huh" from training.
6 |
7 | if [ $# != 3 ]; then
8 | echo "Usage: remove_dup_utts.sh max-count "
9 | echo "e.g.: remove_dup_utts.sh 10 data/train data/train_nodup"
10 | echo "This script is used to filter out utterances that have from over-represented"
11 | echo "transcriptions (such as 'uh-huh'), by limiting the number of repetitions of"
12 | echo "any given word-sequence to a specified value. It's often used to get"
13 | echo "subsets for early stages of training."
14 | exit 1;
15 | fi
16 |
17 | maxcount=$1
18 | srcdir=$2
19 | destdir=$3
20 | mkdir -p $destdir
21 |
22 | [ ! -f $srcdir/text ] && echo "$0: Invalid input directory $srcdir" && exit 1;
23 |
24 | ! mkdir -p $destdir && echo "$0: could not create directory $destdir" && exit 1;
25 |
26 | ! [ "$maxcount" -gt 1 ] && echo "$0: invalid max-count '$maxcount'" && exit 1;
27 |
28 | cp $srcdir/* $destdir
29 | cat $srcdir/text | \
30 | perl -e '
31 | $maxcount = shift @ARGV;
32 | @all = ();
33 | $p1 = 103349; $p2 = 71147; $k = 0;
34 | sub random { # our own random number generator: predictable.
35 | $k = ($k + $p1) % $p2;
36 | return ($k / $p2);
37 | }
38 | while(<>) {
39 | push @all, $_;
40 | @A = split(" ", $_);
41 | shift @A;
42 | $text = join(" ", @A);
43 | $count{$text} ++;
44 | }
45 | foreach $line (@all) {
46 | @A = split(" ", $line);
47 | shift @A;
48 | $text = join(" ", @A);
49 | $n = $count{$text};
50 | if ($n < $maxcount || random() < ($maxcount / $n)) {
51 | print $line;
52 | }
53 | }' $maxcount >$destdir/text
54 |
55 | echo "Reduced number of utterances from `cat $srcdir/text | wc -l` to `cat $destdir/text | wc -l`"
56 |
57 | echo "Using fix_data_dir.sh to reconcile the other files."
58 | utils/fix_data_dir.sh $destdir
59 | rm -r $destdir/.backup
60 |
61 | exit 0
62 |
--------------------------------------------------------------------------------
/utils/data/split_data.sh:
--------------------------------------------------------------------------------
1 | ../split_data.sh
--------------------------------------------------------------------------------
/utils/data/subset_data_dir.sh:
--------------------------------------------------------------------------------
1 | ../subset_data_dir.sh
--------------------------------------------------------------------------------
/utils/data/validate_data_dir.sh:
--------------------------------------------------------------------------------
1 | ../validate_data_dir.sh
--------------------------------------------------------------------------------
/utils/eps2disambig.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 | # 2015 Guoguo Chen
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | # This script replaces epsilon with #0 on the input side only, of the G.fst
19 | # acceptor.
20 |
21 | while(<>){
22 | if (/\s+#0\s+/) {
23 | print STDERR "$0: ERROR: LM has word #0, " .
24 | "which is reserved as disambiguation symbol\n";
25 | exit 1;
26 | }
27 | s:^(\d+\s+\d+\s+)\(\s+):$1#0$2:;
28 | print;
29 | }
30 |
--------------------------------------------------------------------------------
/utils/filt.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Apache 2.0
4 |
5 | import sys
6 |
7 | vocab=set()
8 | with open(sys.argv[1]) as vocabfile:
9 | for line in vocabfile:
10 | vocab.add(line.strip())
11 |
12 | with open(sys.argv[2]) as textfile:
13 | for line in textfile:
14 | print " ".join(map(lambda word: word if word in vocab else '', line.strip().split()))
15 |
--------------------------------------------------------------------------------
/utils/filter_scp.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # Copyright 2010-2012 Microsoft Corporation
3 | # Johns Hopkins University (author: Daniel Povey)
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 |
19 | # This script takes a list of utterance-ids or any file whose first field
20 | # of each line is an utterance-id, and filters an scp
21 | # file (or any file whose "n-th" field is an utterance id), printing
22 | # out only those lines whose "n-th" field is in id_list. The index of
23 | # the "n-th" field is 1, by default, but can be changed by using
24 | # the -f switch
25 |
26 | $exclude = 0;
27 | $field = 1;
28 | $shifted = 0;
29 |
30 | do {
31 | $shifted=0;
32 | if ($ARGV[0] eq "--exclude") {
33 | $exclude = 1;
34 | shift @ARGV;
35 | $shifted=1;
36 | }
37 | if ($ARGV[0] eq "-f") {
38 | $field = $ARGV[1];
39 | shift @ARGV; shift @ARGV;
40 | $shifted=1
41 | }
42 | } while ($shifted);
43 |
44 | if(@ARGV < 1 || @ARGV > 2) {
45 | die "Usage: filter_scp.pl [--exclude] [-f ] id_list [in.scp] > out.scp \n" .
46 | "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
47 | "Note: only the first field of each line in id_list matters. With --exclude, prints\n" .
48 | "only the lines that were *not* in id_list.\n" .
49 | "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
50 | "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
51 | "-f option, add 1 to the argument.\n" .
52 | "See also: utils/filter_scp.pl .\n";
53 | }
54 |
55 |
56 | $idlist = shift @ARGV;
57 | open(F, "<$idlist") || die "Could not open id-list file $idlist";
58 | while() {
59 | @A = split;
60 | @A>=1 || die "Invalid id-list file line $_";
61 | $seen{$A[0]} = 1;
62 | }
63 |
64 | if ($field == 1) { # Treat this as special case, since it is common.
65 | while(<>) {
66 | $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
67 | # $1 is what we filter on.
68 | if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
69 | print $_;
70 | }
71 | }
72 | } else {
73 | while(<>) {
74 | @A = split;
75 | @A > 0 || die "Invalid scp file line $_";
76 | @A >= $field || die "Invalid scp file line $_";
77 | if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
78 | print $_;
79 | }
80 | }
81 | }
82 |
83 | # tests:
84 | # the following should print "foo 1"
85 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
86 | # the following should print "bar 2".
87 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
88 |
--------------------------------------------------------------------------------
/utils/find_arpa_oovs.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 |
18 | if ( @ARGV < 1 && @ARGV > 2) {
19 | die "Usage: find_arpa_oovs.pl words.txt [lm.arpa]\n";
20 | # This program finds words in the arpa file that are not symbols
21 | # in the OpenFst-format symbol table words.txt. It prints them
22 | # on the standard output, one per line.
23 | }
24 |
25 | $symtab = shift @ARGV;
26 | open(S, "<$symtab") || die "Failed opening symbol table file $symtab\n";
27 | while(){
28 | @A = split(" ", $_);
29 | @A == 2 || die "Bad line in symbol table file: $_";
30 | $seen{$A[0]} = 1;
31 | }
32 |
33 | $found_data=0;
34 | $curgram=0;
35 | while(<>) { # Find the \data\ marker.
36 | if(m:^\\data\\\s*$:) { $found_data=1; last; }
37 | }
38 |
39 | if ($found_data==0) {
40 | print STDERR "find_arpa_oovs.pl: found no \\data\\ marker in the ARPA input.\n";
41 | exit(1);
42 | }
43 |
44 | while(<>) {
45 | if(m/^\\(\d+)\-grams:\s*$/) {
46 | $curgram = $1;
47 | if($curgram > 1) {
48 | last; # This is an optimization as we can get the vocab from the 1-grams
49 | }
50 | } elsif($curgram > 0) {
51 | @A = split(" ", $_);
52 | if(@A > 1) {
53 | shift @A;
54 | for($n=0;$n<$curgram;$n++) {
55 | $word = $A[$n];
56 | if(!defined $word) { print STDERR "Unusual line $_ (line $.) in arpa file.\n"; }
57 | $in_arpa{$word} = 1;
58 | }
59 | } else {
60 | if(@A > 0 && $A[0] !~ m:\\end\\:) {
61 | print STDERR "Unusual line $_ (line $.) in arpa file\n";
62 | }
63 | }
64 | }
65 | }
66 |
67 | foreach $w (keys %in_arpa) {
68 | if(!defined $seen{$w} && $w ne "" && $w ne "") {
69 | print "$w\n";
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/utils/fix_ctm.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 |
3 | stmfile=$1
4 | ctmfile=$2
5 |
6 | segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u`
7 | segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u`
8 |
9 | segments_stm_count=`echo "$segments_stm" | wc -l `
10 | segments_ctm_count=`echo "$segments_ctm" | wc -l `
11 |
12 | #echo $segments_stm_count
13 | #echo $segments_ctm_count
14 |
15 | if [ "$segments_stm_count" -gt "$segments_ctm_count" ] ; then
16 | pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g")
17 | (
18 | for elem in $pp ; do
19 | echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE"
20 | done
21 | ) >> $ctmfile
22 | echo "FIXED CTM FILE"
23 | exit 0
24 | elif [ "$segments_stm_count" -lt "$segments_ctm_count" ] ; then
25 | echo "Segment STM count: $segments_stm_count"
26 | echo "Segment CTM count: $segments_ctm_count"
27 | echo "FAILURE FIXING CTM FILE"
28 | exit 1
29 | else
30 | exit 0
31 | fi
32 |
33 |
--------------------------------------------------------------------------------
/utils/format_lm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -u
2 |
3 | # Copyright 2012 Arnab Ghoshal
4 | # Copyright 2010-2011 Microsoft Corporation
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
16 | # See the Apache 2 License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 | set -o errexit
20 |
21 | if [ $# -ne 4 ]; then
22 | printf "Usage: %s lang_dir LM lexicon out_dir\n" `basename $0`
23 | echo " Convert ARPA-format language models to FSTs.";
24 | exit 1;
25 | fi
26 |
27 | lang_dir=$1
28 | lm=$2
29 | lexicon=$3
30 | out_dir=$4
31 | mkdir -p $out_dir
32 |
33 | [ -f ./path.sh ] && . ./path.sh
34 |
35 | echo "Converting '$lm' to FST"
36 |
37 | for f in phones.txt words.txt L.fst L_disambig.fst phones/ oov.int oov.txt; do
38 | cp -r $lang_dir/$f $out_dir
39 | done
40 |
41 | lm_base=$(basename $lm '.gz')
42 | gunzip -c $lm \
43 | | arpa2fst --disambig-symbol=#0 \
44 | --read-symbol-table=$out_dir/words.txt - $out_dir/G.fst
45 | set +e
46 | fstisstochastic $out_dir/G.fst
47 | set -e
48 | # The output is like:
49 | # 9.14233e-05 -0.259833
50 | # we do expect the first of these 2 numbers to be close to zero (the second is
51 | # nonzero because the backoff weights make the states sum to >1).
52 |
53 | # Everything below is only for diagnostic.
54 | # Checking that G has no cycles with empty words on them (e.g. , );
55 | # this might cause determinization failure of CLG.
56 | # #0 is treated as an empty word.
57 | mkdir -p $out_dir/tmpdir.g
58 | awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }}
59 | END{print "0 0 #0 #0"; print "0";}' \
60 | < "$lexicon" > $out_dir/tmpdir.g/select_empty.fst.txt
61 |
62 | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
63 | $out_dir/tmpdir.g/select_empty.fst.txt \
64 | | fstarcsort --sort_type=olabel \
65 | | fstcompose - $out_dir/G.fst > $out_dir/tmpdir.g/empty_words.fst
66 |
67 | fstinfo $out_dir/tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' \
68 | && echo "Language model has cycles with empty words" && exit 1
69 |
70 | rm -r $out_dir/tmpdir.g
71 |
72 |
73 | echo "Succeeded in formatting LM: '$lm'"
74 |
--------------------------------------------------------------------------------
/utils/gen_topo.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey)
4 |
5 | # Generate a topology file. This allows control of the number of states in the
6 | # non-silence HMMs, and in the silence HMMs.
7 |
8 | if (@ARGV != 4) {
9 | print STDERR "Usage: utils/gen_topo.pl \n";
10 | print STDERR "e.g.: utils/gen_topo.pl 3 5 4:5:6:7:8:9:10 1:2:3\n";
11 | exit (1);
12 | }
13 |
14 | ($num_nonsil_states, $num_sil_states, $nonsil_phones, $sil_phones) = @ARGV;
15 |
16 | ( $num_nonsil_states >= 1 && $num_nonsil_states <= 100 ) ||
17 | die "Unexpected number of nonsilence-model states $num_nonsil_states\n";
18 | (( $num_sil_states == 1 || $num_sil_states >= 3) && $num_sil_states <= 100 ) ||
19 | die "Unexpected number of silence-model states $num_sil_states\n";
20 |
21 | $nonsil_phones =~ s/:/ /g;
22 | $sil_phones =~ s/:/ /g;
23 | $nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n";
24 | $sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n";
25 |
26 | print "\n";
27 | print "\n";
28 | print "\n";
29 | print "$nonsil_phones\n";
30 | print "\n";
31 | for ($state = 0; $state < $num_nonsil_states; $state++) {
32 | $statep1 = $state+1;
33 | print " $state $state $state 0.75 $statep1 0.25 \n";
34 | }
35 | print " $num_nonsil_states \n"; # non-emitting final state.
36 | print "\n";
37 | # Now silence phones. They have a different topology-- apart from the first and
38 | # last states, it's fully connected, as long as you have >= 3 states.
39 |
40 | if ($num_sil_states > 1) {
41 | $transp = 1.0 / ($num_sil_states-1);
42 | print "\n";
43 | print "\n";
44 | print "$sil_phones\n";
45 | print "\n";
46 | print " 0 0 ";
47 | for ($nextstate = 0; $nextstate < $num_sil_states-1; $nextstate++) { # Transitions to all but last
48 | # emitting state.
49 | print " $nextstate $transp ";
50 | }
51 | print "\n";
52 | for ($state = 1; $state < $num_sil_states-1; $state++) { # the central states all have transitions to
53 | # themselves and to the last emitting state.
54 | print " $state $state ";
55 | for ($nextstate = 1; $nextstate < $num_sil_states; $nextstate++) {
56 | print " $nextstate $transp ";
57 | }
58 | print "\n";
59 | }
60 | # Final emitting state (non-skippable).
61 | $state = $num_sil_states-1;
62 | print " $state $state $state 0.75 $num_sil_states 0.25 \n";
63 | # Final nonemitting state:
64 | print " $num_sil_states \n";
65 | print "\n";
66 | } else {
67 | print "\n";
68 | print "\n";
69 | print "$sil_phones\n";
70 | print "\n";
71 | print " 0 0 ";
72 | print " 0 0.75 ";
73 | print " 1 0.25 ";
74 | print "\n";
75 | print " $num_nonsil_states \n"; # non-emitting final state.
76 | print "\n";
77 | }
78 |
79 | print "\n";
80 |
--------------------------------------------------------------------------------
/utils/int2sym.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
3 | # Apache 2.0.
4 |
5 | undef $field_begin;
6 | undef $field_end;
7 |
8 |
9 | if ($ARGV[0] eq "-f") {
10 | shift @ARGV;
11 | $field_spec = shift @ARGV;
12 | if ($field_spec =~ m/^\d+$/) {
13 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
14 | }
15 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
16 | if ($1 ne "") {
17 | $field_begin = $1 - 1; # Change to zero-based indexing.
18 | }
19 | if ($2 ne "") {
20 | $field_end = $2 - 1; # Change to zero-based indexing.
21 | }
22 | }
23 | if (!defined $field_begin && !defined $field_end) {
24 | die "Bad argument to -f option: $field_spec";
25 | }
26 | }
27 | $symtab = shift @ARGV;
28 | if(!defined $symtab) {
29 | print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" .
30 | "options: [-f (|-)]\n" .
31 | "e.g.: -f 2, or -f 3-4\n";
32 | exit(1);
33 | }
34 |
35 | open(F, "<$symtab") || die "Error opening symbol table file $symtab";
36 | while() {
37 | @A = split(" ", $_);
38 | @A == 2 || die "bad line in symbol table file: $_";
39 | $int2sym{$A[1]} = $A[0];
40 | }
41 |
42 | sub int2sym {
43 | my $a = shift @_;
44 | my $pos = shift @_;
45 | if($a !~ m:^\d+$:) { # not all digits..
46 | $pos1 = $pos+1; # make it one-based.
47 | die "int2sym.pl: found noninteger token $a [in position $pos1]\n";
48 | }
49 | $s = $int2sym{$a};
50 | if(!defined ($s)) {
51 | die "int2sym.pl: integer $a not in symbol table $symtab.";
52 | }
53 | return $s;
54 | }
55 |
56 | $error = 0;
57 | while (<>) {
58 | @A = split(" ", $_);
59 | for ($pos = 0; $pos <= $#A; $pos++) {
60 | $a = $A[$pos];
61 | if ( (!defined $field_begin || $pos >= $field_begin)
62 | && (!defined $field_end || $pos <= $field_end)) {
63 | $a = int2sym($a, $pos);
64 | }
65 | print $a . " ";
66 | }
67 | print "\n";
68 | }
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/utils/lang/add_lex_disambig.pl:
--------------------------------------------------------------------------------
1 | ../add_lex_disambig.pl
--------------------------------------------------------------------------------
/utils/lang/check_g_properties.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | use IPC::Open2;
4 |
5 | if (@ARGV != 1) {
6 | print "Usage: $0 [options] \n";
7 | print "e.g.: $0 data/lang\n";
8 | exit(1);
9 | }
10 |
11 | $lang = shift @ARGV;
12 |
13 | # This script checks that G.fst in the lang.fst directory is OK with respect
14 | # to certain expected properties, and returns nonzero exit status if a problem was
15 | # detected. It is called from validate_lang.pl.
16 | # This only checks the properties of G that relate to disambiguation symbols,
17 | # epsilons and forbidden symbols and .
18 |
19 | if (! -e "$lang/G.fst") {
20 | print "$0: error: $lang/G.fst does not exist\n";
21 | exit(1);
22 | }
23 |
24 | open(W, "<$lang/words.txt") || die "opening $lang/words.txt";
25 | $hash_zero = -1;
26 | while () {
27 | @A = split(" ", $_);
28 | ($sym, $int) = @A;
29 | if ($sym eq "" || $sym eq "") { $is_forbidden{$int} = 1; }
30 | if ($sym eq "#0") { $hash_zero = $int; }
31 | }
32 |
33 | if (-e "$lang/phones/wdisambig_words.int") {
34 | open(F, "<$lang/phones/wdisambig_words.int") || die "opening $lang/phones/wdisambig_words.int";
35 | while () {
36 | chop;
37 | $is_disambig{$_} = 1;
38 | }
39 | } else {
40 | $is_disambig{$hash_zero} = 1;
41 | }
42 |
43 | $input_cmd = ". ./path.sh; fstprint $lang/G.fst|";
44 | open(G, $input_cmd) || die "running command $input_cmd";
45 |
46 | $info_cmd = ". ./path.sh; fstcompile | fstinfo ";
47 | open2(O, I, "$info_cmd") || die "running command $info_cmd";
48 |
49 | $has_epsilons = 0;
50 |
51 | while () {
52 | @A = split(" ", $_);
53 | if (@A >= 4) {
54 | if ($is_forbidden{$A[2]} || $is_forbidden{$A[3]}) {
55 | chop;
56 | print "$0: validating $lang: error: line $_ in G.fst contains forbidden symbol or \n";
57 | exit(1);
58 | } elsif ($is_disambig{$A[2]}) {
59 | print I $_;
60 | if ($A[3] != 0) {
61 | chop;
62 | print "$0: validating $lang: error: line $_ in G.fst has disambig on input but no epsilon on output\n";
63 | exit(1);
64 | }
65 | } elsif ($A[2] == 0) {
66 | print I $_;
67 | $has_epsilons = 1;
68 | } elsif ($A[2] != $A[3]) {
69 | chop;
70 | print "$0: validating $lang: error: line $_ in G.fst has inputs and outputs different but input is not disambig symbol.\n";
71 | exit(1);
72 | }
73 | }
74 | }
75 |
76 | close(I); # tell 'fstcompile | fstinfo' pipeline that its input is done.
77 | while () {
78 | if (m/cyclic\s+y/) {
79 | print "$0: validating $lang: error: G.fst has cycles containing only disambig symbols and epsilons. Would cause determinization failure\n";
80 | exit(1);
81 | }
82 | }
83 |
84 | if ($has_epsilons) {
85 | print "$0: warning: validating $lang: G.fst has epsilon-input arcs. We don't expect these in most setups.\n";
86 | }
87 |
88 | print "--> $0 successfully validated $lang/G.fst\n";
89 | exit(0);
90 |
--------------------------------------------------------------------------------
/utils/lang/check_phones_compatible.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2016 Hang Lyu
3 |
4 | # Licensed udner the Apache License, Version 2.0 (the "Lincense");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OF IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # This script exits with status zero if the phone symbols tables are the same
18 | # except for possible differences in disambiguation symbols (meaning that all
19 | # symbols except those beginning with a # are mapped to the same values).
20 | # Otherwise it prints a warning and exits with status 1.
21 | # For the sake of compatibility with other scripts that did not write the
22 | # phones.txt to model directories, this script exits silently with status 0
23 | # if one of the phone symbol tables does not exist.
24 | # For the sake of compatibility with other scripts that did not write the
25 | # phones.txt to model directories, this script exits silently with status 0
26 | # if one of the phone symbol tables does not exist.
27 |
28 | . utils/parse_options.sh || exit 1;
29 |
30 | if [ $# -ne 2 ]; then
31 | echo "Usage: utils/lang/check_phones_compatible.sh "
32 | echo "e.g.: utils/lang/check_phones_compatible.sh data/lang/phones.txt exp/tri3/phones.txt"
33 | exit 1;
34 | fi
35 |
36 | table_first=$1
37 | table_second=$2
38 |
39 | # check the files exist or not
40 | if [ ! -f $table_first ]; then
41 | if [ ! -f $table_second ]; then
42 | echo "$0: Error! Both of the two phones-symbol tables are absent."
43 | echo "Please check your command"
44 | exit 1;
45 | else
46 | #The phones-symbol-table1 is absent. The model directory maybe created by old script.
47 | #For back compatibility, this script exits silently with status 0.
48 | exit 0;
49 | fi
50 | elif [ ! -f $table_second ]; then
51 | #The phones-symbol-table2 is absent. The model directory maybe created by old script.
52 | #For back compatibility, this script exits silently with status 0.
53 | exit 0;
54 | fi
55 |
56 | #Check the two tables are same or not (except for possible difference in disambiguation symbols).
57 | if ! cmp -s <(grep -v "^#" $table_first) <(grep -v "^#" $table_second); then
58 | echo "$0: phone symbol tables $table_first and $table_second are not compatible."
59 | exit 1;
60 | fi
61 |
62 | exit 0;
63 |
--------------------------------------------------------------------------------
/utils/lang/prepare_lang.sh:
--------------------------------------------------------------------------------
1 | ../prepare_lang.sh
--------------------------------------------------------------------------------
/utils/lang/validate_lang.pl:
--------------------------------------------------------------------------------
1 | ../validate_lang.pl
--------------------------------------------------------------------------------
/utils/ln.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | use File::Spec;
3 |
4 | if ( @ARGV < 2 ) {
5 | print STDERR "usage: ln.pl input1 input2 dest-dir\n" .
6 | "This script does a soft link of input1, input2, etc." .
7 | "to dest-dir, using relative links where possible\n" .
8 | "Note: input-n and dest-dir may both be absolute pathnames,\n" .
9 | "or relative pathnames, relative to the current directlory.\n";
10 | exit(1);
11 | }
12 |
13 | $dir = pop @ARGV;
14 | if ( ! -d $dir ) {
15 | print STDERR "ln.pl: last argument must be a directory ($dir is not a directory)\n";
16 | exit(1);
17 | }
18 |
19 | $ans = 1; # true.
20 |
21 | $absdir = File::Spec->rel2abs($dir); # Get $dir as abs path.
22 | defined $absdir || die "No such directory $dir";
23 | foreach $file (@ARGV) {
24 | $absfile = File::Spec->rel2abs($file); # Get $file as abs path.
25 | defined $absfile || die "No such file or directory: $file";
26 | @absdir_split = split("/", $absdir);
27 | @absfile_split = split("/", $absfile);
28 |
29 | $newfile = $absdir . "/" . $absfile_split[$#absfile_split]; # we'll use this
30 | # as the destination in the link command.
31 | $num_removed = 0;
32 | while (@absdir_split > 0 && $absdir_split[0] eq $absfile_split[0]) {
33 | shift @absdir_split;
34 | shift @absfile_split;
35 | $num_removed++;
36 | }
37 | if (-l $newfile) { # newfile is already a link -> safe to delete it.
38 | unlink($newfile); # "unlink" just means delete.
39 | }
40 | if ($num_removed == 0) { # will use absolute pathnames.
41 | $oldfile = "/" . join("/", @absfile_split);
42 | $ret = symlink($oldfile, $newfile);
43 | } else {
44 | $num_dots = @absdir_split;
45 | $oldfile = join("/", @absfile_split);
46 | for ($n = 0; $n < $num_dots; $n++) {
47 | $oldfile = "../" . $oldfile;
48 | }
49 | $ret = symlink($oldfile, $newfile);
50 | }
51 | $ans = $ans && $ret;
52 | if (! $ret) {
53 | print STDERR "Error linking $oldfile to $newfile\n";
54 | }
55 | }
56 |
57 | exit ($ans == 1 ? 0 : 1);
58 |
59 |
--------------------------------------------------------------------------------
/utils/make_unigram_grammar.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # This script is used in discriminative training.
18 | # This script makes a simple unigram-loop version of G.fst
19 | # using a unigram grammar estimated from some training transcripts.
20 | # This is for MMI training.
21 | # We don't have any silences in G.fst; these are supplied by the
22 | # optional silences in the lexicon.
23 |
24 | # Note: the symbols in the transcripts become the input and output
25 | # symbols of G.txt; these can be numeric or not.
26 |
27 | if(@ARGV != 0) {
28 | die "Usage: make_unigram_grammar.pl < text-transcripts > G.txt"
29 | }
30 |
31 | $totcount = 0;
32 | $nl = 0;
33 | while (<>) {
34 | @A = split(" ", $_);
35 | foreach $a (@A) {
36 | $count{$a}++;
37 | $totcount++;
38 | }
39 | $nl++;
40 | $totcount++; # Treat end-of-sentence as a symbol for purposes of
41 | # $totcount, so the grammar is properly stochastic. This doesn't
42 | # become , it just becomes the final-prob.
43 | }
44 |
45 | foreach $a (keys %count) {
46 | $prob = $count{$a} / $totcount;
47 | $cost = -log($prob); # Negated natural-log probs.
48 | print "0\t0\t$a\t$a\t$cost\n";
49 | }
50 | # Zero final-cost.
51 | $final_prob = $nl / $totcount;
52 | $final_cost = -log($final_prob);
53 | print "0\t$final_cost\n";
54 |
55 |
--------------------------------------------------------------------------------
/utils/nnet/gen_dct_mat.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright 2012 Brno University of Technology (author: Karel Vesely)
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | # ./gen_dct_mat.py
19 | # script generates matrix with DCT transform, which is sparse
20 | # and takes into account that data-layout is along frequency axis,
21 | # while DCT is done along temporal axis.
22 |
23 | from math import *
24 | import sys
25 |
26 |
27 | from optparse import OptionParser
28 |
29 | parser = OptionParser()
30 | parser.add_option('--fea-dim', dest='dim', help='feature dimension')
31 | parser.add_option('--splice', dest='splice', help='applied splice value')
32 | parser.add_option('--dct-basis', dest='dct_basis', help='number of DCT basis')
33 | (options, args) = parser.parse_args()
34 |
35 | if(options.dim == None):
36 | parser.print_help()
37 | sys.exit(1)
38 |
39 | dim=int(options.dim)
40 | splice=int(options.splice)
41 | dct_basis=int(options.dct_basis)
42 |
43 | timeContext=2*splice+1
44 |
45 |
46 | #generate the DCT matrix
47 | M_PI = 3.1415926535897932384626433832795
48 | M_SQRT2 = 1.4142135623730950488016887
49 |
50 |
51 | #generate sparse DCT matrix
52 | print '['
53 | for k in range(dct_basis):
54 | for m in range(dim):
55 | for n in range(timeContext):
56 | if(n==0):
57 | print m*'0 ',
58 | else:
59 | print (dim-1)*'0 ',
60 | print str(sqrt(2.0/timeContext)*cos(M_PI/timeContext*k*(n+0.5))),
61 | if(n==timeContext-1):
62 | print (dim-m-1)*'0 ',
63 | print
64 | print
65 |
66 | print ']'
67 |
68 |
--------------------------------------------------------------------------------
/utils/nnet/gen_hamm_mat.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright 2012 Brno University of Technology (author: Karel Vesely)
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | # ./gen_hamm_mat.py
19 | # script generates diagonal matrix with hamming window values
20 |
21 | from math import *
22 | import sys
23 |
24 |
25 | from optparse import OptionParser
26 |
27 | parser = OptionParser()
28 | parser.add_option('--fea-dim', dest='dim', help='feature dimension')
29 | parser.add_option('--splice', dest='splice', help='applied splice value')
30 | (options, args) = parser.parse_args()
31 |
32 | if(options.dim == None):
33 | parser.print_help()
34 | sys.exit(1)
35 |
36 | dim=int(options.dim)
37 | splice=int(options.splice)
38 |
39 |
40 | #generate the diagonal matrix with hammings
41 | M_2PI = 6.283185307179586476925286766559005
42 |
43 | dim_mat=(2*splice+1)*dim
44 | timeContext=2*splice+1
45 | print '['
46 | for row in range(dim_mat):
47 | for col in range(dim_mat):
48 | if col!=row:
49 | print '0',
50 | else:
51 | i=int(row/dim)
52 | print str(0.54 - 0.46*cos((M_2PI * i) / (timeContext-1))),
53 | print
54 |
55 | print ']'
56 |
57 |
58 |
--------------------------------------------------------------------------------
/utils/nnet/gen_splice.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright 2012 Brno University of Technology (author: Karel Vesely)
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | # ./gen_splice.py
19 | # generates Component
20 |
21 | from math import *
22 | import sys
23 |
24 |
25 | from optparse import OptionParser
26 |
27 | parser = OptionParser()
28 | parser.add_option('--fea-dim', dest='dim_in', help='feature dimension')
29 | parser.add_option('--splice', dest='splice', help='number of frames to concatenate with the central frame')
30 | parser.add_option('--splice-step', dest='splice_step', help='splicing step (frames dont need to be consecutive, --splice 3 --splice-step 2 will select offsets: -6 -4 -2 0 2 4 6)', default='1' )
31 | (options, args) = parser.parse_args()
32 |
33 | if(options.dim_in == None):
34 | parser.print_help()
35 | sys.exit(1)
36 |
37 | dim_in=int(options.dim_in)
38 | splice=int(options.splice)
39 | splice_step=int(options.splice_step)
40 |
41 | dim_out=(2*splice+1)*dim_in
42 |
43 | print '', dim_out, dim_in
44 | print '[',
45 |
46 | splice_vec = range(-splice*splice_step, splice*splice_step+1, splice_step)
47 | for idx in range(len(splice_vec)):
48 | print splice_vec[idx],
49 |
50 | print ']'
51 |
52 |
--------------------------------------------------------------------------------
/utils/prepare_online_nnet_dist_build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2015 Johns Hopkins University (Author: Vijayaditya Peddinti)
4 | # Guoguo Chen
5 | # Apache 2.0
6 | # Script to prepare the distribution from the online-nnet build
7 |
8 | other_files= #other files to be included in the build
9 | other_dirs=
10 | conf_files="ivector_extractor.conf mfcc.conf online_cmvn.conf online_nnet2_decoding.conf splice.conf"
11 | ivec_extractor_files="final.dubm final.ie final.mat global_cmvn.stats online_cmvn.conf splice_opts"
12 |
13 | echo "$0 $@" # Print the command line for logging
14 | [ -f path.sh ] && . ./path.sh;
15 | . parse_options.sh || exit 1;
16 |
17 | if [ $# -ne 3 ]; then
18 | echo "Usage: $0 "
19 | echo "e.g.: $0 data/lang exp/nnet2_online/nnet_ms_a_online tedlium.tgz"
20 | exit 1;
21 | fi
22 |
23 | lang=$1
24 | modeldir=$2
25 | tgzfile=$3
26 |
27 | for f in $lang/phones.txt $other_files; do
28 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
29 | done
30 |
31 | build_files=
32 | for d in $modeldir/conf $modeldir/ivector_extractor; do
33 | [ ! -d $d ] && echo "$0: no such directory $d" && exit 1;
34 | done
35 |
36 | for f in $ivec_extractor_files; do
37 | f=$modeldir/ivector_extractor/$f
38 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
39 | build_files="$build_files $f"
40 | done
41 |
42 | # Makes a copy of the original config files, as we will change the absolute path
43 | # to relative.
44 | rm -rf $modeldir/conf_abs_path
45 | mkdir -p $modeldir/conf_abs_path
46 | cp -r $modeldir/conf/* $modeldir/conf_abs_path
47 |
48 | for f in $conf_files; do
49 | [ ! -f $modeldir/conf/$f ] && \
50 | echo "$0: no such file $modeldir/conf/$f" && exit 1;
51 | # Changes absolute path to relative path. The path entries in the config file
52 | # are generated by scripts and it is safe to assume that they have structure:
53 | # variable=path
54 | cat $modeldir/conf_abs_path/$f | perl -e '
55 | use File::Spec;
56 | while() {
57 | chomp;
58 | @col = split("=", $_);
59 | if (@col == 2 && (-f $col[1])) {
60 | $col[1] = File::Spec->abs2rel($col[1]);
61 | print "$col[0]=$col[1]\n";
62 | } else {
63 | print "$_\n";
64 | }
65 | }
66 | ' > $modeldir/conf/$f
67 | build_files="$build_files $modeldir/conf/$f"
68 | done
69 |
70 | tar -hczvf $tgzfile $lang $build_files $other_files $other_dirs \
71 | $modeldir/final.mdl $modeldir/tree >/dev/null
72 |
73 | # Changes back to absolute path.
74 | rm -rf $modeldir/conf
75 | mv $modeldir/conf_abs_path $modeldir/conf
76 |
--------------------------------------------------------------------------------
/utils/reduce_data_dir.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # koried, 10/29/2012
4 |
5 | # Reduce a data set based on a list of turn-ids
6 |
7 | if [ $# != 3 ]; then
8 | echo "usage: $0 srcdir turnlist destdir"
9 | exit 1;
10 | fi
11 |
12 | srcdir=$1
13 | reclist=$2
14 | destdir=$3
15 |
16 | if [ ! -f $srcdir/utt2spk ]; then
17 | echo "$0: no such file $srcdir/utt2spk"
18 | exit 1;
19 | fi
20 |
21 | function do_filtering {
22 | # assumes the utt2spk and spk2utt files already exist.
23 | [ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
24 | [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
25 | [ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
26 | [ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
27 | [ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
28 | if [ -f $srcdir/segments ]; then
29 | utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
30 | awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings.
31 | # The next line would override the command above for wav.scp, which would be incorrect.
32 | [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
33 | [ -f $srcdir/reco2file_and_channel ] && \
34 | utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
35 |
36 | # Filter the STM file for proper sclite scoring (this will also remove the comments lines)
37 | [ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm
38 | rm $destdir/reco
39 | fi
40 | srcutts=`cat $srcdir/utt2spk | wc -l`
41 | destutts=`cat $destdir/utt2spk | wc -l`
42 | echo "Reduced #utt from $srcutts to $destutts"
43 | }
44 |
45 | mkdir -p $destdir
46 |
47 | # filter the utt2spk based on the set of recordings
48 | utils/filter_scp.pl $reclist < $srcdir/utt2spk > $destdir/utt2spk
49 |
50 | utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
51 | do_filtering;
52 |
53 |
--------------------------------------------------------------------------------
/utils/reduce_data_dir_by_reclist.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # koried, 10/29/2012
4 |
5 | # Reduce a data set based on a list of recordings
6 |
7 | if [ $# != 3 ]; then
8 | echo "usage: $0 srcdir reclist destdir"
9 | exit 1;
10 | fi
11 |
12 | srcdir=$1
13 | reclist=$2
14 | destdir=$3
15 |
16 | if [ ! -f $srcdir/utt2spk ]; then
17 | echo "$0: no such file $srcdir/utt2spk"
18 | exit 1;
19 | fi
20 |
21 | function do_filtering {
22 | # assumes the utt2spk and spk2utt files already exist.
23 | [ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
24 | [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
25 | [ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
26 | [ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
27 | [ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
28 | if [ -f $srcdir/segments ]; then
29 | utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
30 | awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings.
31 | # The next line would override the command above for wav.scp, which would be incorrect.
32 | [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
33 | [ -f $srcdir/reco2file_and_channel ] && \
34 | utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
35 | [ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm
36 | rm $destdir/reco
37 | fi
38 | srcutts=`cat $srcdir/utt2spk | wc -l`
39 | destutts=`cat $destdir/utt2spk | wc -l`
40 | echo "Reduced #utt from $srcutts to $destutts"
41 | }
42 |
43 | mkdir -p $destdir
44 |
45 | # filter the utt2spk based on the set of recordings
46 | rm -f $destdir/utt2spk
47 | for i in `cat $reclist`; do
48 | cat $srcdir/utt2spk | grep ^$i >> $destdir/utt2spk
49 | done
50 |
51 | utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
52 | do_filtering;
53 |
54 |
--------------------------------------------------------------------------------
/utils/remove_data_links.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This program searches within a directory for soft links that
4 | # appear to be created by 'create_data_link.pl' to a 'storage/' subdirectory,
5 | # and it removes both the soft links and the things they point to.
6 | # for instance, if you have a soft link
7 | # foo/egs/1.1.egs -> storage/2/1.1.egs
8 | # it will remove both foo/egs/storage/2/1.1.egs, and foo/egs/1.1.egs.
9 |
10 | ret=0
11 |
12 | dry_run=false
13 |
14 | if [ "$1" == "--dry-run" ]; then
15 | dry_run=true
16 | shift
17 | fi
18 |
19 | if [ $# == 0 ]; then
20 | echo "Usage: $0 [--dry-run] "
21 | echo "e.g.: $0 exp/nnet4a/egs/"
22 | echo " Removes from any subdirectories of the command-line arguments, soft links that "
23 | echo " appear to have been created by utils/create_data_link.pl, as well as the things"
24 | echo " that those soft links point to. Will typically be called on a directory prior"
25 | echo " to 'rm -r' on that directory, to ensure that data that was distributed on other"
26 | echo " volumes also gets deleted."
27 | echo " With --dry-run, just prints what it would do."
28 | fi
29 |
30 | for dir in $*; do
31 | if [ ! -d $dir ]; then
32 | echo "$0: not a directory: $dir"
33 | ret=1
34 | else
35 | for subdir in $(find $dir -type d); do
36 | if [ -d $subdir/storage ]; then
37 | for x in $(ls $subdir); do
38 | f=$subdir/$x
39 | if [ -L $f ] && [[ $(readlink $f) == storage/* ]]; then
40 | target=$subdir/$(readlink $f)
41 | if $dry_run; then
42 | echo rm $f $target
43 | else
44 | rm $f $target
45 | fi
46 | fi
47 | done
48 | fi
49 | done
50 | fi
51 | done
52 |
53 | exit $ret
54 |
--------------------------------------------------------------------------------
/utils/remove_oovs.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # This script removes lines that contain these OOVs on either the
18 | # third or fourth fields of the line. It is intended to remove arcs
19 | # with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
20 |
21 | if ( @ARGV < 1 && @ARGV > 2) {
22 | die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
23 | }
24 |
25 | $unklist = shift @ARGV;
26 | open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
27 | while(){
28 | @A = split(" ", $_);
29 | @A == 1 || die "Bad line in unknown-symbol list: $_";
30 | $unk{$A[0]} = 1;
31 | }
32 |
33 | $num_removed = 0;
34 | while(<>){
35 | @A = split(" ", $_);
36 | if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
37 | $num_removed++;
38 | } else {
39 | print;
40 | }
41 | }
42 | print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
43 |
44 |
--------------------------------------------------------------------------------
/utils/rnnlm_compute_scores.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Compute scores from RNNLM. This script takes a directory
4 | # $dir (e.g. dir=local/rnnlm/rnnlm.voc30.hl30 ),
5 | # where it expects the files:
6 | # rnnlm wordlist.rnn unk.probs,
7 | # and also an input file location where it can get the sentences to score, and
8 | # an output file location to put the scores (negated logprobs) for each
9 | # sentence. This script uses the Kaldi-style "archive" format, so the input and
10 | # output files will have a first field that corresponds to some kind of
11 | # utterance-id or, in practice, utterance-id-1, utterance-id-2, etc., for the
12 | # N-best list.
13 | #
14 | # Here, "wordlist.rnn" is the set of words, like a vocabulary,
15 | # that the RNN was trained on (note, it won't include or ),
16 | # plus which is a kind of class where we put low-frequency
17 | # words; unk.probs gives the probs for words given this class, and it
18 | # has, on each line, "word prob".
19 |
20 | rnnlm_ver=rnnlm-0.3e
21 | ensure_normalized_probs=false # if true then we add the neccesary options to
22 | # normalize the probabilities of RNNLM
23 | # e.g. when using faster-rnnlm in the nce mode
24 |
25 | . ./path.sh || exit 1;
26 | . utils/parse_options.sh
27 |
28 | rnnlm=$KALDI_ROOT/tools/$rnnlm_ver/rnnlm
29 |
30 | [ ! -f $rnnlm ] && echo No such program $rnnlm && exit 1;
31 |
32 | if [ $# != 4 ]; then
33 | echo "Usage: rnnlm_compute_scores.sh "
34 | exit 1;
35 | fi
36 |
37 | dir=$1
38 | tempdir=$2
39 | text_in=$3
40 | scores_out=$4
41 |
42 | for x in rnnlm wordlist.rnn unk.probs; do
43 | if [ ! -f $dir/$x ]; then
44 | echo "rnnlm_compute_scores.sh: expected file $dir/$x to exist."
45 | exit 1;
46 | fi
47 | done
48 |
49 | mkdir -p $tempdir
50 | cat $text_in | awk '{for (x=2;x<=NF;x++) {printf("%s ", $x)} printf("\n");}' >$tempdir/text
51 | cat $text_in | awk '{print $1}' > $tempdir/ids # e.g. utterance ids.
52 | cat $tempdir/text | awk -v voc=$dir/wordlist.rnn -v unk=$dir/unk.probs \
53 | -v logprobs=$tempdir/loglikes.oov \
54 | 'BEGIN{ while((getline0) { invoc[$1]=1; } while ((getline0){ unkprob[$1]=$2;} }
55 | { logprob=0;
56 | if (NF==0) { printf ""; logprob = log(1.0e-07);
57 | print "Warning: empty sequence." | "cat 1>&2"; }
58 | for (x=1;x<=NF;x++) { w=$x;
59 | if (invoc[w]) { printf("%s ",w); } else {
60 | printf(" ");
61 | if (unkprob[w] != 0) { logprob += log(unkprob[w]); }
62 | else { print "Warning: unknown word ", w | "cat 1>&2"; logprob += log(1.0e-07); }}}
63 | printf("\n"); print logprob > logprobs } ' > $tempdir/text.nounk
64 |
65 | # OK, now we compute the scores on the text with OOVs replaced
66 | # with
67 |
68 | if [ $rnnlm_ver == "faster-rnnlm" ]; then
69 | extra_options=
70 | if [ "$ensure_normalized_probs" = true ]; then
71 | extra_options="--nce-accurate-test 1"
72 | fi
73 | $rnnlm $extra_options -independent -rnnlm $dir/rnnlm -test $tempdir/text.nounk -nbest -debug 0 | \
74 | awk '{print $1*log(10);}' > $tempdir/loglikes.rnn
75 | else
76 | # add the utterance_id as required by Mikolove's rnnlm
77 | paste $tempdir/ids $tempdir/text.nounk > $tempdir/id_text.nounk
78 |
79 | $rnnlm -independent -rnnlm $dir/rnnlm -test $tempdir/id_text.nounk -nbest -debug 0 | \
80 | awk '{print $1*log(10);}' > $tempdir/loglikes.rnn
81 | fi
82 |
83 | [ `cat $tempdir/loglikes.rnn | wc -l` -ne `cat $tempdir/loglikes.oov | wc -l` ] && \
84 | echo "rnnlm rescoring failed" && exit 1;
85 |
86 | paste $tempdir/loglikes.rnn $tempdir/loglikes.oov | awk '{print -($1+$2);}' >$tempdir/scores
87 |
88 | # scores out, with utterance-ids.
89 | paste $tempdir/ids $tempdir/scores > $scores_out
90 |
91 |
--------------------------------------------------------------------------------
/utils/s2eps.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # This script replaces and with (on both input and output sides),
18 | # for the G.fst acceptor.
19 |
20 | while(<>){
21 | @A = split(" ", $_);
22 | if ( @A >= 4 ) {
23 | if ($A[2] eq "" || $A[2] eq "") { $A[2] = ""; }
24 | if ($A[3] eq "" || $A[3] eq "") { $A[3] = ""; }
25 | }
26 | print join("\t", @A) . "\n";
27 | }
28 |
--------------------------------------------------------------------------------
/utils/scoring/wer_report.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # Copyright 2015 Johns Hopkins University (author: Jan Trmal )
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 |
18 | # This script reads per-utt table generated for example during scoring
19 | # and outpus the WER similar to the format the compute-wer utility
20 | # or the utils/best_wer.pl produces
21 | # i.e. from table containing lines in this format
22 | # SUM raw 23344 243230 176178 46771 9975 20281 77027 16463
23 | # produces something output like this
24 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ]
25 | # NB: if the STDIN stream will contain more of the SUM raw entries,
26 | # the best one will be found and printed
27 | #
28 | # If the script is called with parameters, it uses them pro provide
29 | # a description of the output
30 | # i.e.
31 | # cat per-spk-report | utils/scoring/wer_report.pl Full set
32 | # the following output will be produced
33 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] Full set
34 |
35 |
36 | while () {
37 | if ( m:SUM\s+raw:) {
38 | @F = split;
39 | if ((!defined $wer) || ($wer > $F[8])) {
40 | $corr=$F[4];
41 | $sub=$F[5];
42 | $ins=$F[6];
43 | $del=$F[7];
44 | $wer=$F[8];
45 | $words=$F[3];
46 | }
47 | }
48 | }
49 |
50 | if (defined $wer) {
51 | $wer_str = sprintf("%.2f", (100.0 * $wer) / $words);
52 | print "%WER $wer_str [ $wer / $words, $ins ins, $del del, $sub sub ]";
53 | print " " . join(" ", @ARGV) if @ARGV > 0;
54 | print "\n";
55 | }
56 |
--------------------------------------------------------------------------------
/utils/show_lattice.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | format=pdf # pdf svg
4 | mode=save # display save
5 | lm_scale=0.0
6 | acoustic_scale=0.0
7 | #end of config
8 |
9 | . utils/parse_options.sh
10 |
11 | if [ $# != 3 ]; then
12 | echo "usage: $0 [--mode display|save] [--format pdf|svg] "
13 | echo "e.g.: $0 utt-0001 \"test/lat.*.gz\" tri1/graph/words.txt"
14 | exit 1;
15 | fi
16 |
17 | . path.sh
18 |
19 | uttid=$1
20 | lat=$2
21 | words=$3
22 |
23 | tmpdir=$(mktemp -d /tmp/kaldi.XXXX); # trap "rm -r $tmpdir" EXIT # cleanup
24 |
25 | gunzip -c $lat | lattice-to-fst --lm-scale=$lm_scale --acoustic-scale=$acoustic_scale ark:- "scp,p:echo $uttid $tmpdir/$uttid.fst|" || exit 1;
26 | ! [ -s $tmpdir/$uttid.fst ] && \
27 | echo "Failed to extract lattice for utterance $uttid (not present?)" && exit 1;
28 | fstdraw --portrait=true --osymbols=$words $tmpdir/$uttid.fst | dot -T${format} > $tmpdir/$uttid.${format}
29 |
30 | if [ "$(uname)" == "Darwin" ]; then
31 | doc_open=open
32 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then
33 | doc_open=xdg-open
34 | elif [ $mode == "display" ] ; then
35 | echo "Can not automaticaly open file on your operating system"
36 | mode=save
37 | fi
38 |
39 | [ $mode == "display" ] && $doc_open $tmpdir/$uttid.${format}
40 | [[ $mode == "display" && $? -ne 0 ]] && echo "Failed to open ${format} format." && mode=save
41 | [ $mode == "save" ] && echo "Saving to $uttid.${format}" && cp $tmpdir/$uttid.${format} .
42 |
43 | exit 0
44 |
--------------------------------------------------------------------------------
/utils/shuffle_list.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | # Copyright 2013 Johns Hopkins University (author: Daniel Povey)
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 |
19 | if ($ARGV[0] eq "--srand") {
20 | $n = $ARGV[1];
21 | $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\"";
22 | srand($ARGV[1]);
23 | shift;
24 | shift;
25 | } else {
26 | srand(0); # Gives inconsistent behavior if we don't seed.
27 | }
28 |
29 | if (@ARGV > 1 || $ARGV[0] =~ m/^-.+/) { # >1 args, or an option we
30 | # don't understand.
31 | print "Usage: shuffle_list.pl [--srand N] [input file] > output\n";
32 | print "randomizes the order of lines of input.\n";
33 | exit(1);
34 | }
35 |
36 | @lines;
37 | while (<>) {
38 | push @lines, [ (rand(), $_)] ;
39 | }
40 |
41 | @lines = sort { $a->[0] cmp $b->[0] } @lines;
42 | foreach $l (@lines) {
43 | print $l->[1];
44 | }
45 |
--------------------------------------------------------------------------------
/utils/spk2utt_to_utt2spk.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 |
18 | while(<>){
19 | @A = split(" ", $_);
20 | @A > 1 || die "Invalid line in spk2utt file: $_";
21 | $s = shift @A;
22 | foreach $u ( @A ) {
23 | print "$u $s\n";
24 | }
25 | }
26 |
27 |
28 |
--------------------------------------------------------------------------------
/utils/subset_scp.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | use warnings; #sed replacement for -w perl parameter
3 | # Copyright 2010-2011 Microsoft Corporation
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | # This program selects a subset of N elements in the scp.
19 |
20 | # By default, it selects them evenly from throughout the scp, in order to avoid
21 | # selecting too many from the same speaker. It prints them on the standard
22 | # output.
23 | # With the option --first, it just selects the N first utterances.
24 | # With the option --last, it just selects the N last utterances.
25 |
26 | # Last modified by JHU & HKUST @2013
27 |
28 |
29 | $quiet = 0;
30 | $first = 0;
31 | $last = 0;
32 |
33 | if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
34 | shift;
35 | $quiet = 1;
36 | }
37 | if (@ARGV > 0 && $ARGV[0] eq "--first") {
38 | shift;
39 | $first = 1;
40 | }
41 | if (@ARGV > 0 && $ARGV[0] eq "--last") {
42 | shift;
43 | $last = 1;
44 | }
45 |
46 | if(@ARGV < 2 ) {
47 | die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
48 | " --quiet causes it to not die if N < num lines in scp.\n" .
49 | " --first and --last make it equivalent to head or tail.\n" .
50 | "See also: filter_scp.pl\n";
51 | }
52 |
53 | $N = shift @ARGV;
54 | if($N == 0) {
55 | die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
56 | }
57 | $inscp = shift @ARGV;
58 | open(I, "<$inscp") || die "Opening input scp file $inscp";
59 |
60 | @F = ();
61 | while() {
62 | push @F, $_;
63 | }
64 | $numlines = @F;
65 | if($N > $numlines) {
66 | if ($quiet) {
67 | $N = $numlines;
68 | } else {
69 | die "You requested from subset_scp.pl more elements than available: $N > $numlines";
70 | }
71 | }
72 |
73 | sub select_n {
74 | my ($start,$end,$num_needed) = @_;
75 | my $diff = $end - $start;
76 | if ($num_needed > $diff) {
77 | die "select_n: code error";
78 | }
79 | if ($diff == 1 ) {
80 | if ($num_needed > 0) {
81 | print $F[$start];
82 | }
83 | } else {
84 | my $halfdiff = int($diff/2);
85 | my $halfneeded = int($num_needed/2);
86 | select_n($start, $start+$halfdiff, $halfneeded);
87 | select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
88 | }
89 | }
90 |
91 | if ( ! $first && ! $last) {
92 | if ($N > 0) {
93 | select_n(0, $numlines, $N);
94 | }
95 | } else {
96 | if ($first) { # --first option: same as head.
97 | for ($n = 0; $n < $N; $n++) {
98 | print $F[$n];
99 | }
100 | } else { # --last option: same as tail.
101 | for ($n = @F - $N; $n < @F; $n++) {
102 | print $F[$n];
103 | }
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/utils/summarize_logs.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
4 |
5 | #scalar(@ARGV) >= 1 && print STDERR "Usage: summarize_warnings.pl \n" && exit 1;
6 |
7 | sub split_hundreds { # split list of filenames into groups of 100.
8 | my $names = shift @_;
9 | my @A = split(" ", $names);
10 | my @ans = ();
11 | while (@A > 0) {
12 | my $group = "";
13 | for ($x = 0; $x < 100 && @A>0; $x++) {
14 | $fname = pop @A;
15 | $group .= "$fname ";
16 | }
17 | push @ans, $group;
18 | }
19 | return @ans;
20 | }
21 |
22 | sub parse_accounting_entry {
23 | $entry= shift @_;
24 |
25 | @elems = split " ", $entry;
26 |
27 | $time=undef;
28 | $threads=undef;
29 | foreach $elem (@elems) {
30 | if ( $elem=~ m/time=(\d+)/ ) {
31 | $elem =~ s/time=(\d+)/$1/;
32 | $time = $elem;
33 | } elsif ( $elem=~ m/threads=(\d+)/ ) {
34 | $elem =~ s/threads=(\d+)/$1/g;
35 | $threads = $elem;
36 | } else {
37 | die "Unknown entry \"$elem\" when parsing \"$entry\" \n";
38 | }
39 | }
40 |
41 | if (defined($time) and defined($threads) ) {
42 | return ($time, $threads);
43 | } else {
44 | die "The accounting entry \"$entry\" did not contain all necessary attributes";
45 | }
46 | }
47 |
48 | foreach $dir (@ARGV) {
49 |
50 | #$dir = $ARGV[0];
51 | print $dir
52 |
53 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" ;
54 |
55 | $dir =~ s:/$::; # Remove trailing slash.
56 |
57 |
58 | # Group the files into categories where all have the same base-name.
59 | foreach $f (glob ("$dir/*.log")) {
60 | $f_category = $f;
61 | # do next expression twice; s///g doesn't work as they overlap.
62 | $f_category =~ s:\.\d+\.(?!\d+):.*.:;
63 | #$f_category =~ s:\.\d+\.:.*.:;
64 | $fmap{$f_category} .= " $f";
65 | }
66 | }
67 |
68 | foreach $c (sort (keys %fmap) ) {
69 | $n = 0;
70 | foreach $fgroup (split_hundreds($fmap{$c})) {
71 | $n += `grep -w WARNING $fgroup | wc -l`;
72 | }
73 | if ($n != 0) {
74 | print "$n warnings in $c\n"
75 | }
76 | }
77 | foreach $c (sort (keys %fmap)) {
78 | $n = 0;
79 | foreach $fgroup (split_hundreds($fmap{$c})) {
80 | $n += `grep -w ERROR $fgroup | wc -l`;
81 | }
82 | if ($n != 0) {
83 | print "$n errors in $c\n"
84 | }
85 | }
86 |
87 | $supertotal_cpu_time=0.0;
88 | $supertotal_clock_time=0.0;
89 | $supertotal_threads=0.0;
90 |
91 | foreach $c (sort (keys %fmap)) {
92 | $n = 0;
93 |
94 | $total_cpu_time=0.0;
95 | $total_clock_time=0.0;
96 | $total_threads=0.0;
97 | foreach $fgroup (split_hundreds($fmap{$c})) {
98 | $lines=`grep -a "# Accounting: " $fgroup |sed 's/.* Accounting: *//g'`;
99 |
100 | #print $lines ."\n";
101 |
102 | @entries = split "\n", $lines;
103 |
104 | foreach $line (@entries) {
105 | $time, $threads = parse_accounting_entry($line);
106 |
107 | $total_cpu_time += $time * $threads;
108 | $total_threads += $threads;
109 | if ( $time > $total_clock_time ) {
110 | $total_clock_time = $time;
111 | }
112 | }
113 | }
114 | print "total_cpu_time=$total_cpu_time clock_time=$total_clock_time total_threads=$total_threads group=$c\n";
115 |
116 | $supertotal_cpu_time += $total_cpu_time;
117 | $supertotal_clock_time += $total_clock_time;
118 | $supertotal_threads += $total_threads;
119 | }
120 | print "total_cpu_time=$supertotal_cpu_time clock_time=$supertotal_clock_time total_threads=$supertotal_threads group=all\n";
121 |
122 |
--------------------------------------------------------------------------------
/utils/summarize_warnings.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
4 |
5 | @ARGV != 1 && print STDERR "Usage: summarize_warnings.pl \n" && exit 1;
6 |
7 | $dir = $ARGV[0];
8 |
9 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" && exit 1;
10 |
11 | $dir =~ s:/$::; # Remove trailing slash.
12 |
13 |
14 | # Group the files into categories where all have the same base-name.
15 | foreach $f (glob ("$dir/*.log")) {
16 | $f_category = $f;
17 | # do next expression twice; s///g doesn't work as they overlap.
18 | $f_category =~ s:\.\d+\.:.*.:;
19 | $f_category =~ s:\.\d+\.:.*.:;
20 | $fmap{$f_category} .= " $f";
21 | }
22 |
23 | sub split_hundreds { # split list of filenames into groups of 100.
24 | my $names = shift @_;
25 | my @A = split(" ", $names);
26 | my @ans = ();
27 | while (@A > 0) {
28 | my $group = "";
29 | for ($x = 0; $x < 100 && @A>0; $x++) {
30 | $fname = pop @A;
31 | $group .= "$fname ";
32 | }
33 | push @ans, $group;
34 | }
35 | return @ans;
36 | }
37 |
38 | foreach $c (keys %fmap) {
39 | $n = 0;
40 | foreach $fgroup (split_hundreds($fmap{$c})) {
41 | $n += `grep -w WARNING $fgroup | wc -l`;
42 | }
43 | if ($n != 0) {
44 | print "$n warnings in $c\n"
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/utils/sym2int.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 |
18 | $ignore_oov = 0;
19 |
20 | for($x = 0; $x < 2; $x++) {
21 | if ($ARGV[0] eq "--map-oov") {
22 | shift @ARGV;
23 | $map_oov = shift @ARGV;
24 | if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
25 | # disallow '-f', the empty string and anything ending in words.txt as the
26 | # OOV symbol because these are likely command-line errors.
27 | die "the --map-oov option requires an argument";
28 | }
29 | }
30 | if ($ARGV[0] eq "-f") {
31 | shift @ARGV;
32 | $field_spec = shift @ARGV;
33 | if ($field_spec =~ m/^\d+$/) {
34 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
35 | }
36 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
37 | if ($1 ne "") {
38 | $field_begin = $1 - 1; # Change to zero-based indexing.
39 | }
40 | if ($2 ne "") {
41 | $field_end = $2 - 1; # Change to zero-based indexing.
42 | }
43 | }
44 | if (!defined $field_begin && !defined $field_end) {
45 | die "Bad argument to -f option: $field_spec";
46 | }
47 | }
48 | }
49 |
50 | $symtab = shift @ARGV;
51 | if (!defined $symtab) {
52 | print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
53 | "options: [--map-oov ] [-f ]\n" .
54 | "note: can look like 4-5, or 4-, or 5-, or 1.\n";
55 | }
56 | open(F, "<$symtab") || die "Error opening symbol table file $symtab";
57 | while() {
58 | @A = split(" ", $_);
59 | @A == 2 || die "bad line in symbol table file: $_";
60 | $sym2int{$A[0]} = $A[1] + 0;
61 | }
62 |
63 | if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
64 | if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
65 | $map_oov = $sym2int{$map_oov};
66 | }
67 |
68 | $num_warning = 0;
69 | $max_warning = 20;
70 |
71 | while (<>) {
72 | @A = split(" ", $_);
73 | @B = ();
74 | for ($n = 0; $n < @A; $n++) {
75 | $a = $A[$n];
76 | if ( (!defined $field_begin || $n >= $field_begin)
77 | && (!defined $field_end || $n <= $field_end)) {
78 | $i = $sym2int{$a};
79 | if (!defined ($i)) {
80 | if (defined $map_oov) {
81 | if ($num_warning++ < $max_warning) {
82 | print STDERR "sym2int.pl: replacing $a with $map_oov\n";
83 | if ($num_warning == $max_warning) {
84 | print STDERR "sym2int.pl: not warning for OOVs any more times\n";
85 | }
86 | }
87 | $i = $map_oov;
88 | } else {
89 | $pos = $n+1;
90 | die "sym2int.pl: undefined symbol $a (in position $pos)\n";
91 | }
92 | }
93 | $a = $i;
94 | }
95 | push @B, $a;
96 | }
97 | print join(" ", @B);
98 | print "\n";
99 | }
100 | if ($num_warning > 0) {
101 | print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n";
102 | }
103 |
104 | exit(0);
105 |
--------------------------------------------------------------------------------
/utils/utt2spk_to_spk2utt.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # converts an utt2spk file to a spk2utt file.
18 | # Takes input from the stdin or from a file argument;
19 | # output goes to the standard out.
20 |
21 | if ( @ARGV > 1 ) {
22 | die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
23 | }
24 |
25 | while(<>){
26 | @A = split(" ", $_);
27 | @A == 2 || die "Invalid line in utt2spk file: $_";
28 | ($u,$s) = @A;
29 | if(!$seen_spk{$s}) {
30 | $seen_spk{$s} = 1;
31 | push @spklist, $s;
32 | }
33 | push (@{$spk_hash{$s}}, "$u");
34 | }
35 | foreach $s (@spklist) {
36 | $l = join(' ',@{$spk_hash{$s}});
37 | print "$s $l\n";
38 | }
39 |
--------------------------------------------------------------------------------
/waves_yesno/0_0_0_0_1_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_0_0_1_1_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_0_1_0_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_0_1_0_0_0_1.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_0_1_0_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_0_1_0_1_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_0_0_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_0_0_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_0_0_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_0_1_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_0_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_0_1_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_0_1_0_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_1_0_0_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_0_1_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_1_0_0_1.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_0_1_0_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_1_0_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_0_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_0_0_0_1.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_0_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_0_1_0_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_0_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_0_1_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_0_1_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_1_0_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_1_0_0_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_1_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_1_0_0_1.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_1_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_1_1_0_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_1_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_1_1_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_0_0_0_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_0_0_1_0_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_0_0_0_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_0_0_1_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_0_0_1_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_0_1_0_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_0_0_1_0_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_0_1_0_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_0_1_0_0_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_1_0_0_0_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_0_1_1_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_1_1_0_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_0_1_1_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_1_1_1_0_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_1_0_0_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_0_0_1_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_1_0_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_0_0_1_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_1_1_0_0_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_0_0_0_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_1_1_0_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_0_0_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_1_1_0_1_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_0_1_0_1.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_1_1_1_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_1_0_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/0_1_1_1_1_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_1_1_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_0_0_0_0_0_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_0_0_0_0_0.wav
--------------------------------------------------------------------------------
/waves_yesno/1_0_0_0_0_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_0_0_0_0_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_0_0_0_0_0_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_0_0_0_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_0_0_0_1_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_0_1_0_0_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_0_0_1_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_1_0_1_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_0_1_0_1_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_1_0_1_0_0_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_0_1_1_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_1_1_0_1_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_0_1_1_1_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_1_1_1_0_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/1_0_1_1_1_1_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_1_1_1_1_0_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_0_0_0_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_0_0_0_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_0_0_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_0_1_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_0_0_1_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_1_0_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_0_0_1_0_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_1_0_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_0_0_1_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_1_1_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_0_1_0_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_0_1_0_0.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_0_1_0_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_0_1_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_0_1_1_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_1_0_0_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_0_1_1_0_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_1_0_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_0_1_1_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_1_1_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_1_0_0_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_0_0_0_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_1_0_0_1_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_0_1_0_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_1_0_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_0_1_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_1_0_1_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_1_0_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_1_0_1_0_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_1_0_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_1_1_0_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_0_0_1_0.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_1_1_0_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_0_1_0_0.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_1_1_1_0_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_1_0_0_0.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_1_1_1_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_1_1_0_0.wav
--------------------------------------------------------------------------------
/waves_yesno/1_1_1_1_1_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_1_1_1_1.wav
--------------------------------------------------------------------------------
/waves_yesno/README:
--------------------------------------------------------------------------------
1 | This dataset can be found at http://openslr.org/resources/1/waves_yesno.tar.gz
2 |
3 | This dataset was created for the Kaldi project (see kaldi.sf.net),
4 | by a contributor who prefes to remain anonymous. The main point of the dataset is
5 | to provide a way to test out the Kaldi scripts for free.
6 |
7 | The archive "waves_yesno.tar.gz" contains 60 .wav files, sampled at 8 kHz. All were recorded
8 | by the same male speaker, in English (although the individual is not a native speaker).
9 | In each file, the individual says 8 words; each word is either "yes" or "no", so each
10 | file is a random sequence of 8 yes-es or noes. There is no separate transcription provided; the
11 | sequence is encoded in the filename, with 1 for yes and 0 for no, for instance:
12 |
13 | # tar -xvzf waves_yesno.tar.gz
14 | waves_yesno/1_0_1_1_1_0_1_0.wav
15 | waves_yesno/0_1_1_0_0_1_1_0.wav
16 | ...
17 |
--------------------------------------------------------------------------------