├── .gitignore ├── LICENSE ├── README.md ├── conf ├── mfcc.conf └── topo_orig.proto ├── data_prep.py ├── lm ├── prepare_lm.sh └── yesno-unigram.arpabo ├── local └── score.sh ├── path.sh ├── steps ├── align_basis_fmllr.sh ├── align_fmllr.sh ├── align_fmllr_lats.sh ├── align_lvtln.sh ├── align_raw_fmllr.sh ├── align_sgmm.sh ├── align_sgmm2.sh ├── align_si.sh ├── append_feats.sh ├── cleanup │ ├── clean_and_segment_data.sh │ ├── combine_short_segments.py │ ├── create_segments_from_ctm.pl │ ├── debug_lexicon.sh │ ├── decode_segmentation.sh │ ├── find_bad_utts.sh │ ├── find_bad_utts_nnet.sh │ ├── get_ctm_edits.py │ ├── get_non_scored_words.py │ ├── internal │ │ ├── get_ctm_edits.py │ │ ├── get_non_scored_words.py │ │ ├── make_one_biased_lm.py │ │ ├── modify_ctm_edits.py │ │ ├── segment_ctm_edits.py │ │ └── taint_ctm_edits.py │ ├── lattice_oracle_align.sh │ ├── make_biased_lm_graphs.sh │ ├── make_biased_lms.py │ ├── make_one_biased_lm.py │ ├── make_segmentation_data_dir.sh │ ├── make_segmentation_graph.sh │ ├── make_utterance_fsts.pl │ ├── make_utterance_graph.sh │ ├── modify_ctm_edits.py │ ├── segment_ctm_edits.py │ ├── split_long_utterance.sh │ └── taint_ctm_edits.py ├── combine_ali_dirs.sh ├── compute_cmvn_stats.sh ├── conf │ ├── append_eval_to_ctm.py │ ├── append_prf_to_ctm.py │ ├── apply_calibration.sh │ ├── convert_ctm_to_tra.py │ ├── lattice_depth_per_frame.sh │ ├── parse_arpa_unigrams.py │ ├── prepare_calibration_data.py │ ├── prepare_word_categories.py │ └── train_calibration.sh ├── data │ ├── data_dir_manipulation_lib.py │ └── reverberate_data_dir.py ├── decode.sh ├── decode_basis_fmllr.sh ├── decode_biglm.sh ├── decode_combine.sh ├── decode_fmllr.sh ├── decode_fmllr_extra.sh ├── decode_fmmi.sh ├── decode_fromlats.sh ├── decode_lvtln.sh ├── decode_nnet.sh ├── decode_nolats.sh ├── decode_raw_fmllr.sh ├── decode_sgmm.sh ├── decode_sgmm2.sh ├── decode_sgmm2_fromlats.sh ├── decode_sgmm2_rescore.sh ├── decode_sgmm2_rescore_project.sh ├── decode_sgmm_fromlats.sh ├── decode_sgmm_rescore.sh ├── decode_si.sh ├── decode_with_map.sh ├── diagnostic │ ├── analyze_alignments.sh │ ├── analyze_lats.sh │ ├── analyze_lattice_depth_stats.py │ └── analyze_phone_length_stats.py ├── get_ctm.sh ├── get_fmllr_basis.sh ├── get_lexicon_probs.sh ├── get_prons.sh ├── get_train_ctm.sh ├── info │ ├── chain_dir_info.pl │ ├── gmm_dir_info.pl │ ├── nnet2_dir_info.pl │ └── nnet3_dir_info.pl ├── kl_hmm │ ├── build_tree.sh │ ├── decode_kl_hmm.sh │ └── train_kl_hmm.sh ├── lmrescore.sh ├── lmrescore_const_arpa.sh ├── lmrescore_rnnlm_lat.sh ├── make_denlats.sh ├── make_denlats_sgmm.sh ├── make_denlats_sgmm2.sh ├── make_fbank.sh ├── make_fbank_pitch.sh ├── make_index.sh ├── make_mfcc.sh ├── make_mfcc_pitch.sh ├── make_mfcc_pitch_online.sh ├── make_phone_graph.sh ├── make_plp.sh ├── make_plp_pitch.sh ├── mixup.sh ├── nnet │ ├── align.sh │ ├── decode.sh │ ├── make_bn_feats.sh │ ├── make_denlats.sh │ ├── make_fmllr_feats.sh │ ├── make_fmmi_feats.sh │ ├── make_priors.sh │ ├── pretrain_dbn.sh │ ├── train.sh │ ├── train_mmi.sh │ ├── train_mpe.sh │ └── train_scheduler.sh ├── nnet2 │ ├── adjust_priors.sh │ ├── align.sh │ ├── convert_lda_to_raw.sh │ ├── convert_nnet1_to_nnet2.sh │ ├── create_appended_model.sh │ ├── decode.sh │ ├── dump_bottleneck_features.sh │ ├── get_egs.sh │ ├── get_egs2.sh │ ├── get_egs_discriminative2.sh │ ├── get_lda.sh │ ├── get_lda_block.sh │ ├── get_num_frames.sh │ ├── get_perturbed_feats.sh │ ├── make_denlats.sh │ ├── make_multisplice_configs.py │ ├── relabel_egs.sh │ ├── relabel_egs2.sh │ ├── remove_egs.sh │ ├── retrain_fast.sh │ ├── retrain_simple2.sh │ ├── retrain_tanh.sh │ ├── train_block.sh │ ├── train_convnet_accel2.sh │ ├── train_discriminative.sh │ ├── train_discriminative2.sh │ ├── train_discriminative_multilang2.sh │ ├── train_more.sh │ ├── train_more2.sh │ ├── train_multilang2.sh │ ├── train_multisplice_accel2.sh │ ├── train_multisplice_ensemble.sh │ ├── train_pnorm.sh │ ├── train_pnorm_accel2.sh │ ├── train_pnorm_bottleneck_fast.sh │ ├── train_pnorm_ensemble.sh │ ├── train_pnorm_fast.sh │ ├── train_pnorm_multisplice.sh │ ├── train_pnorm_multisplice2.sh │ ├── train_pnorm_simple.sh │ ├── train_pnorm_simple2.sh │ ├── train_tanh.sh │ ├── train_tanh_bottleneck.sh │ ├── train_tanh_fast.sh │ └── update_nnet.sh ├── nnet3 │ ├── adjust_priors.sh │ ├── align.sh │ ├── chain │ │ ├── build_tree.sh │ │ ├── gen_topo.pl │ │ ├── gen_topo.py │ │ ├── gen_topo2.py │ │ ├── gen_topo3.py │ │ ├── gen_topo4.py │ │ ├── gen_topo5.py │ │ ├── get_egs.sh │ │ ├── nnet3_chain_lib.py │ │ ├── train.py │ │ └── train_tdnn.sh │ ├── components.py │ ├── decode.sh │ ├── dot │ │ ├── descriptor_parser.py │ │ └── nnet3_to_dot.py │ ├── get_egs.sh │ ├── get_egs_discriminative.sh │ ├── get_egs_targets.sh │ ├── get_successful_models.py │ ├── lstm │ │ ├── make_configs.py │ │ └── train.sh │ ├── make_denlats.sh │ ├── make_jesus_configs.py │ ├── make_tdnn_configs.py │ ├── nnet3_to_dot.sh │ ├── nnet3_train_lib.py │ ├── report │ │ ├── generate_plots.py │ │ └── nnet3_log_parse_lib.py │ ├── tdnn │ │ ├── make_configs.py │ │ ├── train.sh │ │ └── train_raw_nnet.sh │ ├── train_discriminative.sh │ ├── train_dnn.py │ ├── train_rnn.py │ └── train_tdnn.sh ├── online │ ├── decode.sh │ ├── nnet2 │ │ ├── align.sh │ │ ├── copy_data_dir.sh │ │ ├── decode.sh │ │ ├── dump_nnet_activations.sh │ │ ├── extract_ivectors.sh │ │ ├── extract_ivectors_online.sh │ │ ├── get_egs.sh │ │ ├── get_egs2.sh │ │ ├── get_egs_discriminative2.sh │ │ ├── make_denlats.sh │ │ ├── prepare_online_decoding.sh │ │ ├── prepare_online_decoding_retrain.sh │ │ ├── prepare_online_decoding_transfer.sh │ │ ├── train_diag_ubm.sh │ │ └── train_ivector_extractor.sh │ ├── nnet3 │ │ ├── decode.sh │ │ └── prepare_online_decoding.sh │ └── prepare_online_decoding.sh ├── oracle_wer.sh ├── paste_feats.sh ├── resegment_data.sh ├── resegment_text.sh ├── rnnlmrescore.sh ├── score_kaldi.sh ├── score_kaldi_compare.sh ├── search_index.sh ├── select_feats.sh ├── shift_feats.sh ├── tandem │ ├── align_fmllr.sh │ ├── align_sgmm.sh │ ├── align_sgmm2.sh │ ├── align_si.sh │ ├── decode.sh │ ├── decode_fmllr.sh │ ├── decode_sgmm.sh │ ├── decode_sgmm2.sh │ ├── decode_si.sh │ ├── make_denlats.sh │ ├── make_denlats_sgmm.sh │ ├── make_denlats_sgmm2.sh │ ├── mk_aslf_lda_mllt.sh │ ├── mk_aslf_sgmm2.sh │ ├── train_deltas.sh │ ├── train_lda_mllt.sh │ ├── train_mllt.sh │ ├── train_mmi.sh │ ├── train_mmi_sgmm.sh │ ├── train_mmi_sgmm2.sh │ ├── train_mono.sh │ ├── train_sat.sh │ ├── train_sgmm.sh │ ├── train_sgmm2.sh │ └── train_ubm.sh ├── train_deltas.sh ├── train_diag_ubm.sh ├── train_lda_mllt.sh ├── train_lvtln.sh ├── train_map.sh ├── train_mmi.sh ├── train_mmi_fmmi.sh ├── train_mmi_fmmi_indirect.sh ├── train_mmi_sgmm.sh ├── train_mmi_sgmm2.sh ├── train_mono.sh ├── train_mpe.sh ├── train_nnet.sh ├── train_quick.sh ├── train_raw_sat.sh ├── train_sat.sh ├── train_sat_basis.sh ├── train_segmenter.sh ├── train_sgmm.sh ├── train_sgmm2.sh ├── train_sgmm2_group.sh ├── train_smbr.sh ├── train_ubm.sh └── word_align_lattices.sh ├── utils ├── add_disambig.pl ├── add_lex_disambig.pl ├── analyze_segments.pl ├── apply_map.pl ├── best_wer.sh ├── build_const_arpa_lm.sh ├── combine_data.sh ├── convert_ctm.pl ├── convert_slf.pl ├── convert_slf_parallel.sh ├── copy_data_dir.sh ├── create_data_link.pl ├── create_split_dir.pl ├── data │ ├── combine_data.sh │ ├── combine_short_segments.sh │ ├── copy_data_dir.sh │ ├── extend_segment_times.py │ ├── fix_data_dir.sh │ ├── get_frame_shift.sh │ ├── get_num_frames.sh │ ├── get_segments_for_data.sh │ ├── get_utt2dur.sh │ ├── internal │ │ ├── choose_utts_to_combine.py │ │ └── modify_speaker_info.py │ ├── modify_speaker_info.sh │ ├── normalize_data_range.pl │ ├── perturb_data_dir_speed.sh │ ├── perturb_data_dir_speed_3way.sh │ ├── perturb_data_dir_volume.sh │ ├── remove_dup_utts.sh │ ├── split_data.sh │ ├── subsegment_data_dir.sh │ ├── subset_data_dir.sh │ └── validate_data_dir.sh ├── dict_dir_add_pronprobs.sh ├── eps2disambig.pl ├── filt.py ├── filter_scp.pl ├── filter_scps.pl ├── find_arpa_oovs.pl ├── fix_ctm.sh ├── fix_data_dir.sh ├── format_lm.sh ├── format_lm_sri.sh ├── gen_topo.pl ├── int2sym.pl ├── kwslist_post_process.pl ├── lang │ ├── add_lex_disambig.pl │ ├── check_g_properties.pl │ ├── check_phones_compatible.sh │ ├── prepare_lang.sh │ └── validate_lang.pl ├── ln.pl ├── make_lexicon_fst.pl ├── make_lexicon_fst_silprob.pl ├── make_phone_bigram_lang.sh ├── make_unigram_grammar.pl ├── map_arpa_lm.pl ├── mkgraph.sh ├── nnet-cpu │ ├── make_nnet_config.pl │ ├── make_nnet_config_block.pl │ ├── make_nnet_config_preconditioned.pl │ └── update_learning_rates.pl ├── nnet │ ├── gen_dct_mat.py │ ├── gen_hamm_mat.py │ ├── gen_splice.py │ ├── make_blstm_proto.py │ ├── make_cnn2d_proto.py │ ├── make_cnn_proto.py │ ├── make_lstm_proto.py │ └── make_nnet_proto.py ├── parse_options.sh ├── pbs.pl ├── perturb_data_dir_speed.sh ├── pinyin_map.pl ├── prepare_lang.sh ├── prepare_online_nnet_dist_build.sh ├── queue.pl ├── reduce_data_dir.sh ├── reduce_data_dir_by_reclist.sh ├── remove_data_links.sh ├── remove_oovs.pl ├── reverse_arpa.py ├── rnnlm_compute_scores.sh ├── run.pl ├── s2eps.pl ├── scoring │ ├── wer_ops_details.pl │ ├── wer_per_spk_details.pl │ ├── wer_per_utt_details.pl │ └── wer_report.pl ├── segmentation.pl ├── show_lattice.sh ├── shuffle_list.pl ├── slurm.pl ├── spk2utt_to_utt2spk.pl ├── split_data.sh ├── split_scp.pl ├── ssh.pl ├── subset_data_dir.sh ├── subset_data_dir_tr_cv.sh ├── subset_scp.pl ├── summarize_logs.pl ├── summarize_warnings.pl ├── sym2int.pl ├── utt2spk_to_spk2utt.pl ├── validate_data_dir.sh ├── validate_dict_dir.pl ├── validate_lang.pl └── write_kwslist.pl └── waves_yesno ├── 0_0_0_0_1_1_1_1.wav ├── 0_0_0_1_0_0_0_1.wav ├── 0_0_0_1_0_1_1_0.wav ├── 0_0_1_0_0_0_1_0.wav ├── 0_0_1_0_0_1_1_0.wav ├── 0_0_1_0_0_1_1_1.wav ├── 0_0_1_0_1_0_0_0.wav ├── 0_0_1_0_1_0_0_1.wav ├── 0_0_1_0_1_0_1_1.wav ├── 0_0_1_1_0_0_0_1.wav ├── 0_0_1_1_0_1_0_0.wav ├── 0_0_1_1_0_1_1_0.wav ├── 0_0_1_1_0_1_1_1.wav ├── 0_0_1_1_1_0_0_0.wav ├── 0_0_1_1_1_0_0_1.wav ├── 0_0_1_1_1_1_0_0.wav ├── 0_0_1_1_1_1_1_0.wav ├── 0_1_0_0_0_1_0_0.wav ├── 0_1_0_0_0_1_1_0.wav ├── 0_1_0_0_1_0_1_0.wav ├── 0_1_0_0_1_0_1_1.wav ├── 0_1_0_1_0_0_0_0.wav ├── 0_1_0_1_1_0_1_0.wav ├── 0_1_0_1_1_1_0_0.wav ├── 0_1_1_0_0_1_1_0.wav ├── 0_1_1_0_0_1_1_1.wav ├── 0_1_1_1_0_0_0_0.wav ├── 0_1_1_1_0_0_1_0.wav ├── 0_1_1_1_0_1_0_1.wav ├── 0_1_1_1_1_0_1_0.wav ├── 0_1_1_1_1_1_1_1.wav ├── 1_0_0_0_0_0_0_0.wav ├── 1_0_0_0_0_0_0_1.wav ├── 1_0_0_0_0_0_1_1.wav ├── 1_0_0_0_1_0_0_1.wav ├── 1_0_0_1_0_1_1_1.wav ├── 1_0_1_0_1_0_0_1.wav ├── 1_0_1_1_0_1_1_1.wav ├── 1_0_1_1_1_0_1_0.wav ├── 1_0_1_1_1_1_0_1.wav ├── 1_1_0_0_0_0_0_1.wav ├── 1_1_0_0_0_1_1_1.wav ├── 1_1_0_0_1_0_1_0.wav ├── 1_1_0_0_1_0_1_1.wav ├── 1_1_0_0_1_1_1_0.wav ├── 1_1_0_1_0_1_0_0.wav ├── 1_1_0_1_0_1_1_0.wav ├── 1_1_0_1_1_0_0_1.wav ├── 1_1_0_1_1_0_1_1.wav ├── 1_1_0_1_1_1_1_0.wav ├── 1_1_1_0_0_0_0_1.wav ├── 1_1_1_0_0_1_0_1.wav ├── 1_1_1_0_0_1_1_1.wav ├── 1_1_1_0_1_0_1_0.wav ├── 1_1_1_0_1_0_1_1.wav ├── 1_1_1_1_0_0_1_0.wav ├── 1_1_1_1_0_1_0_0.wav ├── 1_1_1_1_1_0_0_0.wav ├── 1_1_1_1_1_1_0_0.wav ├── 1_1_1_1_1_1_1_1.wav └── README /conf/mfcc.conf: -------------------------------------------------------------------------------- 1 | --use-energy=false # only non-default option. 2 | --sample-frequency=8000 # Switchboard is sampled at 8kHz 3 | -------------------------------------------------------------------------------- /conf/topo_orig.proto: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | NONSILENCEPHONES 5 | 6 | 0 0 0 0.75 1 0.25 7 | 1 1 1 0.75 2 0.25 8 | 2 2 2 0.75 3 0.25 9 | 3 10 | 11 | 12 | 13 | SILENCEPHONES 14 | 15 | 0 0 0 0.25 1 0.25 2 0.25 3 0.25 16 | 1 1 1 0.25 2 0.25 3 0.25 4 0.25 17 | 2 2 1 0.25 2 0.25 3 0.25 4 0.25 18 | 3 3 1 0.25 2 0.25 3 0.25 4 0.25 19 | 4 4 4 0.25 5 0.75 20 | 5 21 | 22 | 23 | -------------------------------------------------------------------------------- /data_prep.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import os 4 | import os.path 5 | import sys 6 | 7 | zeroes = [] 8 | ones = [] 9 | for fn in os.listdir('waves_yesno'): 10 | if fn.startswith('0'): 11 | zeroes.append(fn) # => training set 12 | elif fn.startswith('1'): 13 | ones.append(fn) # => test set 14 | 15 | def text(filenames): 16 | results = [] 17 | for filename in filenames: 18 | basename = filename.split('.')[0] 19 | transcript = basename.replace('1', 'YES').replace('0', 'NO').replace('_', " ") 20 | results.append("{} {}".format(basename.split('.')[0], transcript)) 21 | 22 | return '\n'.join(sorted(results)) 23 | 24 | with open('data/train_yesno/text', 'w') as train_text, open('data/test_yesno/text', 'w') as test_text: 25 | train_text.write(text(zeroes)) 26 | test_text.write(text(ones)) 27 | 28 | # finish this method 29 | def wav_scp(): 30 | pass 31 | 32 | with open('data/train_yesno/wav.scp', 'w') as train_text, open('data/test_yesno/wav.scp', 'w') as test_text: 33 | train_text.write(wav_scp(zeroes)) 34 | test_text.write(wav_scp(ones)) 35 | 36 | 37 | # finish this method 38 | def utt2spk(): 39 | pass 40 | 41 | with open('data/train_yesno/utt2spk', 'w') as train_text, open('data/test_yesno/utt2spk', 'w') as test_text: 42 | train_text.write(utt2spk(zeroes)) 43 | test_text.write(utt2spk(ones)) 44 | 45 | 46 | # finish this method 47 | # note that, spk2utt can be generated by using Kaldi util, once you have utt2spk file. 48 | def spk2utt(): 49 | pass 50 | -------------------------------------------------------------------------------- /lm/prepare_lm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . path.sh 4 | 5 | echo Preparing language models for test 6 | 7 | for lm_suffix in tg; do 8 | test=data/lang_test_${lm_suffix} 9 | 10 | rm -rf data/lang_test_${lm_suffix} 11 | cp -r data/lang data/lang_test_${lm_suffix} 12 | 13 | arpa2fst --disambig-symbol=#0 --read-symbol-table=$test/words.txt lm/task.arpabo $test/G.fst 14 | 15 | fstisstochastic $test/G.fst 16 | 17 | # The output is like: 18 | # 9.14233e-05 -0.259833 19 | # we do expect the first of these 2 numbers to be close to zero (the second is 20 | # nonzero because the backoff weights make the states sum to >1). 21 | # Because of the fiasco for these particular LMs, the first number is not 22 | # as close to zero as it could be. 23 | 24 | # Everything below is only for diagnostic. 25 | # Checking that G has no cycles with empty words on them (e.g. , ); 26 | # this might cause determinization failure of CLG. 27 | # #0 is treated as an empty word. 28 | mkdir -p tmpdir.g 29 | awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \ 30 | < dict/lexicon.txt >tmpdir.g/select_empty.fst.txt 31 | fstcompile --isymbols=$test/words.txt --osymbols=$test/words.txt tmpdir.g/select_empty.fst.txt | \ 32 | fstarcsort --sort_type=olabel | fstcompose - $test/G.fst > tmpdir.g/empty_words.fst 33 | fstinfo tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' && 34 | echo "Language model has cycles with empty words" && exit 1 35 | rm -r tmpdir.g 36 | done 37 | 38 | echo "Succeeded in formatting data." 39 | -------------------------------------------------------------------------------- /lm/yesno-unigram.arpabo: -------------------------------------------------------------------------------- 1 | 2 | \data\ 3 | ngram 1=4 4 | 5 | \1-grams: 6 | -1 NO 7 | -1 YES 8 | -99 9 | -1 10 | 11 | \end\ 12 | -------------------------------------------------------------------------------- /local/score.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) 3 | # Apache 2.0 4 | 5 | [ -f ./path.sh ] && . ./path.sh 6 | 7 | # begin configuration section. 8 | cmd=run.pl 9 | stage=0 10 | decode_mbr=true 11 | word_ins_penalty=0.0 12 | min_lmwt=7 13 | max_lmwt=11 14 | #end configuration section. 15 | 16 | [ -f ./path.sh ] && . ./path.sh 17 | . parse_options.sh || exit 1; 18 | 19 | if [ $# -ne 3 ]; then 20 | echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " 21 | echo " Options:" 22 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 23 | echo " --stage (0|1|2) # start scoring script from part-way through." 24 | echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." 25 | echo " --min_lmwt # minumum LM-weight for lattice rescoring " 26 | echo " --max_lmwt # maximum LM-weight for lattice rescoring " 27 | exit 1; 28 | fi 29 | 30 | data=$1 31 | lang_or_graph=$2 32 | dir=$3 33 | 34 | symtab=$lang_or_graph/words.txt 35 | 36 | for f in $symtab $dir/lat.1.gz $data/text; do 37 | [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; 38 | done 39 | 40 | mkdir -p $dir/scoring/log 41 | 42 | cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt 43 | 44 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \ 45 | lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ 46 | lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \ 47 | lattice-best-path --word-symbol-table=$symtab \ 48 | ark:- ark,t:$dir/scoring/LMWT.tra || exit 1; 49 | 50 | # Note: the double level of quoting for the sed command 51 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ 52 | cat $dir/scoring/LMWT.tra \| \ 53 | utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ 54 | compute-wer --text --mode=present \ 55 | ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT || exit 1; 56 | 57 | exit 0; 58 | -------------------------------------------------------------------------------- /path.sh: -------------------------------------------------------------------------------- 1 | export KALDI_ROOT=YOUR_KALDI_PATH 2 | [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh 3 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH 4 | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 5 | . $KALDI_ROOT/tools/config/common_path.sh 6 | export LC_ALL=C 7 | 8 | 9 | -------------------------------------------------------------------------------- /steps/append_feats.sh: -------------------------------------------------------------------------------- 1 | paste_feats.sh -------------------------------------------------------------------------------- /steps/cleanup/make_utterance_fsts.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use warnings; #sed replacement for -w perl parameter 3 | 4 | # makes unigram decoding-graph FSTs specific to each utterances, where the 5 | # supplied top-n-words list together with the supervision text of the utterance are 6 | # combined. 7 | 8 | if (@ARGV != 1) { 9 | print STDERR "** Warning: this script is deprecated and will be removed. See\n" . 10 | "** steps/cleanup/make_biased_lm_graphs.sh.\n" . 11 | "Usage: make_utterance_fsts.pl top-words-file.txt < text-archive > fsts-archive\n" . 12 | "e.g.: utils/sym2int.pl -f 2- data/lang/words.txt data/train/text | \\\n" . 13 | " make_utterance_fsts.pl exp/foo/top_words.int | compile-train-graphs-fsts ... \n"; 14 | exit(1); 15 | } 16 | 17 | ($top_words_file) = @ARGV; 18 | 19 | open(F, "<$top_words_file") || die "opening $top_words_file"; 20 | 21 | %top_word_probs = ( ); 22 | 23 | while() { 24 | @A = split; 25 | (@A == 2 && $A[0] > 0.0) || die "Bad line $_ in $top_words_file"; 26 | $A[1] =~ m/^[0-9]+$/ || die "Expecting numeric word-ids in $top_words_file: $_\n"; 27 | $top_word_probs{$A[1]} += $A[0]; 28 | } 29 | 30 | while () { 31 | @A = split; 32 | $utterance_id = shift @A; 33 | print "$utterance_id\n"; 34 | $num_words = @A + 0; # length of array @A 35 | %word_probs = %top_word_probs; 36 | foreach $w (@A) { 37 | $w =~ m/^[0-9]+$/ || die "Expecting numeric word-ids as stdin: $_"; 38 | $word_probs{$w} += 1.0 / $num_words; 39 | } 40 | foreach $w (keys %word_probs) { 41 | $prob = $word_probs{$w}; 42 | $prob > 0.0 || die "Word $w with bad probability $prob, utterance-id = $utterance_id\n"; 43 | $cost = -log($prob); 44 | print "0 0 $w $w $cost\n"; 45 | } 46 | $final_cost = -log(1.0 / $num_words); 47 | print "0 $final_cost\n"; 48 | print "\n"; # Empty line terminates the FST in the text-archive format. 49 | } 50 | -------------------------------------------------------------------------------- /steps/combine_ali_dirs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Xiaohui Zhang Apache 2.0. 3 | 4 | # This srcipt operates on alignment directories, such as exp/tri4a_ali 5 | # the output is a new ali dir which has alignments from all the input ali dirs 6 | 7 | # Begin configuration section. 8 | cmd=run.pl 9 | extra_files= 10 | num_jobs=4 11 | # End configuration section. 12 | echo "$0 $@" # Print the command line for logging 13 | 14 | if [ -f path.sh ]; then . ./path.sh; fi 15 | . parse_options.sh || exit 1; 16 | 17 | if [[ $# -lt 3 ]]; then 18 | echo "Usage: $0 [options] ..." 19 | echo "e.g.: $0 --num-jobs 32 data/train exp/tri3_ali_combined exp/tri3_ali_1 exp_tri3_ali_2" 20 | echo "Options:" 21 | echo " --extra-files # specify addtional files in 'src-ali-dir1' to copy" 22 | echo " --num-jobs # number of jobs used to split the data directory." 23 | echo " Note, files that don't appear in the first source dir will not be added even if they appear in later ones." 24 | echo " Other than alignments, only files from the first src ali dir are copied." 25 | exit 1; 26 | fi 27 | 28 | data=$1; 29 | shift; 30 | dest=$1; 31 | shift; 32 | first_src=$1; 33 | 34 | mkdir -p $dest; 35 | rm $dest/{ali.*.gz,num_jobs} 2>/dev/null 36 | 37 | cp $first_src/phones.txt $dest || exit 1; 38 | 39 | export LC_ALL=C 40 | 41 | for dir in $*; do 42 | if [ ! -f $dir/ali.1.gz ]; then 43 | echo "$0: check if alignments (ali.*.gz) are present in $dir." 44 | exit 1; 45 | fi 46 | done 47 | 48 | for dir in $*; do 49 | for f in tree; do 50 | diff $first_src/$f $dir/$f 1>/dev/null 2>&1 51 | if [ $? -ne 0 ]; then 52 | echo "$0: Cannot combine alignment directories with different $f files." 53 | fi 54 | done 55 | done 56 | 57 | for f in final.mdl tree cmvn_opts num_jobs $extra_files; do 58 | if [ ! -f $first_src/$f ]; then 59 | echo "combine_ali_dir.sh: no such file $first_src/$f" 60 | exit 1; 61 | fi 62 | cp $first_src/$f $dest/ 63 | done 64 | 65 | src_id=0 66 | temp_dir=$dest/temp 67 | [ -d $temp_dir ] && rm -r $temp_dir; 68 | mkdir -p $temp_dir 69 | echo "$0: dumping alignments in each source directory as single archive and index." 70 | for dir in $*; do 71 | src_id=$((src_id + 1)) 72 | cur_num_jobs=$(cat $dir/num_jobs) || exit 1; 73 | alis=$(for n in $(seq $cur_num_jobs); do echo -n "$dir/ali.$n.gz "; done) 74 | $cmd $dir/log/copy_alignments.log \ 75 | copy-int-vector "ark:gunzip -c $alis|" \ 76 | ark,scp:$temp_dir/ali.$src_id.ark,$temp_dir/ali.$src_id.scp || exit 1; 77 | done 78 | sort -m $temp_dir/ali.*.scp > $temp_dir/ali.scp || exit 1; 79 | 80 | echo "$0: splitting data to get reference utt2spk for individual ali.JOB.gz files." 81 | utils/split_data.sh $data $num_jobs || exit 1; 82 | 83 | echo "$0: splitting the alignments to appropriate chunks according to the reference utt2spk files." 84 | utils/filter_scps.pl JOB=1:$num_jobs \ 85 | $data/split$num_jobs/JOB/utt2spk $temp_dir/ali.scp $temp_dir/ali.JOB.scp 86 | 87 | for i in `seq 1 $num_jobs`; do 88 | copy-int-vector scp:$temp_dir/ali.${i}.scp "ark:|gzip -c >$dest/ali.$i.gz" || exit 1; 89 | done 90 | 91 | echo $num_jobs > $dest/num_jobs || exit 1 92 | 93 | echo "$0: checking the alignment files generated have at least 90% of the utterances." 94 | for i in `seq 1 $num_jobs`; do 95 | num_lines=`cat $temp_dir/ali.$i.scp | wc -l` || exit 1; 96 | num_lines_tot=`cat $data/split$num_jobs/$i/utt2spk | wc -l` || exit 1; 97 | python -c "import sys; 98 | percent = 100.0 * float($num_lines) / $num_lines_tot 99 | if percent < 90 : 100 | print ('$dest/ali.$i.gz {0}% utterances missing.'.format(percent))" || exit 1; 101 | done 102 | rm -r $temp_dir 2>/dev/null 103 | 104 | echo "Combined alignments and stored in $dest" 105 | exit 0 106 | -------------------------------------------------------------------------------- /steps/conf/append_eval_to_ctm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2015 Brno University of Technology (author: Karel Vesely) 4 | # Apache 2.0 5 | 6 | import sys,operator 7 | 8 | # Append Levenshtein alignment of 'hypothesis' and 'reference' into 'CTM': 9 | # (i.e. the output of 'align-text' post-processed by 'wer_per_utt_details.pl') 10 | 11 | # The tags in the appended column are: 12 | # 'C' = correct 13 | # 'S' = substitution 14 | # 'I' = insertion 15 | # 'U' = unknown (not part of scored segment) 16 | 17 | if len(sys.argv) != 4: 18 | print 'Usage: %s eval-in ctm-in ctm-eval-out' % __file__ 19 | sys.exit(1) 20 | dummy, eval_in, ctm_in, ctm_eval_out = sys.argv 21 | 22 | if ctm_eval_out == '-': ctm_eval_out = '/dev/stdout' 23 | 24 | # Read the evalutation, 25 | eval_vec = dict() 26 | with open(eval_in, 'r') as f: 27 | while True: 28 | # Reading 4 lines encoding one utterance, 29 | ref = f.readline() 30 | hyp = f.readline() 31 | op = f.readline() 32 | csid = f.readline() 33 | if not ref: break 34 | # Parse the input, 35 | utt,tag,hyp_vec = hyp.split(' ',2) 36 | assert(tag == 'hyp') 37 | utt,tag,op_vec = op.split(' ',2) 38 | assert(tag == 'op') 39 | hyp_vec = hyp_vec.split() 40 | op_vec = op_vec.split() 41 | # Fill create eval vector with symbols 'C', 'S', 'I', 42 | assert(utt not in eval_vec) 43 | eval_vec[utt] = [] 44 | for op,hyp in zip(op_vec, hyp_vec): 45 | if hyp != '': eval_vec[utt].append(op) 46 | 47 | # Load the 'ctm' into dictionary, 48 | ctm = dict() 49 | with open(ctm_in) as f: 50 | for l in f: 51 | utt, ch, beg, dur, wrd, conf = l.split() 52 | if not utt in ctm: ctm[utt] = [] 53 | ctm[utt].append((utt, ch, float(beg), float(dur), wrd, float(conf))) 54 | 55 | # Build the 'ctm' with 'eval' column added, 56 | ctm_eval = [] 57 | for utt,ctm_part in ctm.iteritems(): 58 | ctm_part.sort(key = operator.itemgetter(2)) # Sort by 'beg' time, 59 | # extending the 'tuple' by '+': 60 | merged = [ tup + (evl,) for tup,evl in zip(ctm_part,eval_vec[utt]) ] 61 | ctm_eval.extend(merged) 62 | 63 | # Sort again, 64 | ctm_eval.sort(key = operator.itemgetter(0,1,2)) 65 | 66 | # Store, 67 | with open(ctm_eval_out,'w') as f: 68 | for tup in ctm_eval: 69 | f.write('%s %s %f %f %s %f %s\n' % tup) 70 | 71 | -------------------------------------------------------------------------------- /steps/conf/append_prf_to_ctm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2015 Brno University of Technology (author: Karel Vesely) 4 | # Apache 2.0 5 | 6 | import sys 7 | 8 | # Append Levenshtein alignment of 'hypothesis' and 'reference' into 'CTM': 9 | # (parsed from the 'prf' output of 'sclite') 10 | 11 | # The tags in appended column are: 12 | # 'C' = correct 13 | # 'S' = substitution 14 | # 'I' = insertion 15 | # 'U' = unknown (not part of scored segment) 16 | 17 | # Parse options, 18 | if len(sys.argv) != 4: 19 | print "Usage: %s prf ctm_in ctm_out" % __file__ 20 | sys.exit(1) 21 | prf_file, ctm_file, ctm_out_file = sys.argv[1:] 22 | 23 | if ctm_out_file == '-': ctm_out_file = '/dev/stdout' 24 | 25 | # Load the prf file, 26 | prf = [] 27 | with open(prf_file) as f: 28 | for l in f: 29 | # Store the data, 30 | if l[:5] == 'File:': 31 | file_id = l.split()[1] 32 | if l[:8] == 'Channel:': 33 | chan = l.split()[1] 34 | if l[:5] == 'H_T1:': 35 | h_t1 = l 36 | if l[:5] == 'Eval:': 37 | evl = l 38 | prf.append((file_id,chan,h_t1,evl)) 39 | 40 | # Parse the prf records into dictionary, 41 | prf_dict = dict() 42 | for (f,c,t,e) in prf: 43 | t_pos = 0 # position in the 't' string, 44 | while t_pos < len(t): 45 | t1 = t[t_pos:].split(' ',1)[0] # get 1st token at 't_pos' 46 | try: 47 | # get word evaluation letter 'C,S,I', 48 | evl = e[t_pos] if e[t_pos] != ' ' else 'C' 49 | # add to dictionary, 50 | key='%s,%s' % (f,c) # file,channel 51 | if key not in prf_dict: prf_dict[key] = dict() 52 | prf_dict[key][float(t1)] = evl 53 | except ValueError: 54 | pass 55 | t_pos += len(t1)+1 # advance position for parsing, 56 | 57 | # Load the ctm file (with confidences), 58 | with open(ctm_file) as f: 59 | ctm = [ l.split() for l in f ] 60 | 61 | # Append the sclite alignment tags to ctm, 62 | ctm_out = [] 63 | for f, chan, beg, dur, wrd, conf in ctm: 64 | # U = unknown, C = correct, S = substitution, I = insertion, 65 | sclite_tag = 'U' 66 | try: 67 | sclite_tag = prf_dict[('%s,%s'%(f,chan)).lower()][float(beg)] 68 | except KeyError: 69 | pass 70 | ctm_out.append([f,chan,beg,dur,wrd,conf,sclite_tag]) 71 | 72 | # Save the augmented ctm file, 73 | with open(ctm_out_file, 'w') as f: 74 | f.writelines([' '.join(ctm_record)+'\n' for ctm_record in ctm_out]) 75 | 76 | -------------------------------------------------------------------------------- /steps/conf/apply_calibration.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2015, Brno University of Technology (Author: Karel Vesely). Apache 2.0. 3 | 4 | # Trains logistic regression, which calibrates the per-word confidences, 5 | # which are extracted by the Minimum Bayes Risk decoding. 6 | 7 | # begin configuration section. 8 | cmd= 9 | stage=0 10 | # end configuration section. 11 | 12 | [ -f ./path.sh ] && . ./path.sh 13 | . parse_options.sh || exit 1; 14 | 15 | if [ $# -ne 5 ]; then 16 | echo "Usage: $0 [opts] " 17 | echo " Options:" 18 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 19 | exit 1; 20 | fi 21 | 22 | set -euo pipefail 23 | 24 | data=$1 25 | lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. 26 | latdir=$3 27 | caldir=$4 28 | dir=$5 29 | 30 | model=$latdir/../final.mdl # assume model one level up from decoding dir. 31 | calibration=$caldir/calibration.mdl 32 | word_feats=$caldir/word_feats 33 | word_categories=$caldir/word_categories 34 | 35 | for f in $lang/words.txt $word_feats $word_categories $latdir/lat.1.gz $calibration $model; do 36 | [ ! -f $f ] && echo "$0: Missing file $f" && exit 1 37 | done 38 | [ -z "$cmd" ] && echo "$0: Missing --cmd '...'" && exit 1 39 | 40 | [ -d $dir/log ] || mkdir -p $dir/log 41 | nj=$(cat $latdir/num_jobs) 42 | lmwt=$(cat $caldir/lmwt) 43 | decode_mbr=$(cat $caldir/decode_mbr) 44 | 45 | # Store the setup, 46 | echo $lmwt >$dir/lmwt 47 | echo $decode_mbr >$dir/decode_mbr 48 | cp $calibration $dir/calibration.mdl 49 | cp $word_feats $dir/word_feats 50 | cp $word_categories $dir/word_categories 51 | 52 | # Create the ctm with raw confidences, 53 | # - we keep the timing relative to the utterance, 54 | if [ $stage -le 0 ]; then 55 | $cmd JOB=1:$nj $dir/log/get_ctm.JOB.log \ 56 | lattice-scale --inv-acoustic-scale=$lmwt "ark:gunzip -c $latdir/lat.JOB.gz|" ark:- \| \ 57 | lattice-limit-depth ark:- ark:- \| \ 58 | lattice-push --push-strings=false ark:- ark:- \| \ 59 | lattice-align-words-lexicon --max-expand=10.0 \ 60 | $lang/phones/align_lexicon.int $model ark:- ark:- \| \ 61 | lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \ 62 | utils/int2sym.pl -f 5 $lang/words.txt \ 63 | '>' $dir/JOB.ctm 64 | # Merge and clean, 65 | for ((n=1; n<=nj; n++)); do cat $dir/${n}.ctm; done > $dir/ctm 66 | rm $dir/*.ctm 67 | cat $dir/ctm | utils/sym2int.pl -f 5 $lang/words.txt >$dir/ctm_int 68 | fi 69 | 70 | # Compute lattice-depth, 71 | latdepth=$dir/lattice_frame_depth.ark 72 | if [ $stage -le 1 ]; then 73 | [ -e $latdepth ] || steps/conf/lattice_depth_per_frame.sh --cmd "$cmd" $latdir $dir 74 | fi 75 | 76 | # Create the forwarding data for logistic regression, 77 | if [ $stage -le 2 ]; then 78 | steps/conf/prepare_calibration_data.py --conf-feats $dir/forward_feats.ark \ 79 | --lattice-depth $latdepth $dir/ctm_int $word_feats $word_categories 80 | fi 81 | 82 | # Apply calibration model to dev, 83 | if [ $stage -le 3 ]; then 84 | logistic-regression-eval --apply-log=false $calibration \ 85 | ark:$dir/forward_feats.ark ark,t:- | \ 86 | awk '{ key=$1; p_corr=$4; sub(/,.*/,"",key); gsub(/\^/," ",key); print key,p_corr }' | \ 87 | utils/int2sym.pl -f 5 $lang/words.txt \ 88 | >$dir/ctm_calibrated 89 | fi 90 | 91 | exit 0 92 | -------------------------------------------------------------------------------- /steps/conf/convert_ctm_to_tra.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2015 Brno University of Technology (author: Karel Vesely) 4 | # Apache 2.0 5 | 6 | import sys, operator 7 | 8 | # This scripts loads a 'ctm' file and converts it into the 'tra' format: 9 | # "utt-key word1 word2 word3 ... wordN" 10 | # The 'utt-key' is the 1st column in the CTM. 11 | 12 | # Typically the CTM contains: 13 | # - utterance-relative timimng (i.e. prepared without 'utils/convert_ctm.pl') 14 | # - confidences 15 | 16 | if len(sys.argv) != 3: 17 | print 'Usage: %s ctm-in tra-out' % __file__ 18 | sys.exit(1) 19 | dummy, ctm_in, tra_out = sys.argv 20 | 21 | if ctm_in == '-': ctm_in = '/dev/stdin' 22 | if tra_out == '-': tra_out = '/dev/stdout' 23 | 24 | # Load the 'ctm' into dictionary, 25 | tra = dict() 26 | with open(ctm_in) as f: 27 | for l in f: 28 | utt, ch, beg, dur, wrd, conf = l.split() 29 | if not utt in tra: tra[utt] = [] 30 | tra[utt].append((float(beg),wrd)) 31 | 32 | # Store the in 'tra' format, 33 | with open(tra_out,'w') as f: 34 | for utt,tuples in tra.iteritems(): 35 | tuples.sort(key = operator.itemgetter(0)) # Sort by 'beg' time, 36 | f.write('%s %s\n' % (utt,' '.join([t[1] for t in tuples]))) 37 | 38 | -------------------------------------------------------------------------------- /steps/conf/lattice_depth_per_frame.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2015 Brno University of Technology (Author: Karel Vesely) 3 | # Licensed under the Apache License, Version 2.0 (the "License") 4 | 5 | # Extract lattice-depth for each frame. 6 | 7 | # Begin configuration 8 | cmd=run.pl 9 | # End configuration 10 | 11 | echo "$0 $@" # Print the command line for logging 12 | 13 | [ -f path.sh ] && . ./path.sh # source the path. 14 | . parse_options.sh || exit 1; 15 | 16 | if [ $# != 2 ]; then 17 | echo "usage: $0 [opts] " 18 | echo "main options (for others, see top of script file)" 19 | echo " --config # config containing options" 20 | echo " --cmd" 21 | exit 1; 22 | fi 23 | 24 | set -euo pipefail 25 | 26 | latdir=$1 27 | dir=$2 28 | 29 | [ ! -f $latdir/lat.1.gz ] && echo "Missing $latdir/lat.1.gz" && exit 1 30 | nj=$(cat $latdir/num_jobs) 31 | 32 | # Get the pdf-posterior vectors, 33 | $cmd JOB=1:$nj $dir/log/lattice_depth_per_frame.JOB.log \ 34 | lattice-depth-per-frame "ark:gunzip -c $latdir/lat.JOB.gz |" ark,t:$dir/lattice_frame_depth.JOB.ark 35 | # Merge, 36 | for ((n=1; n<=nj; n++)); do cat $dir/lattice_frame_depth.${n}.ark; done >$dir/lattice_frame_depth.ark 37 | rm $dir/lattice_frame_depth.*.ark 38 | 39 | # Done! 40 | -------------------------------------------------------------------------------- /steps/conf/parse_arpa_unigrams.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2015 Brno University of Technology (author: Karel Vesely) 4 | # Apache 2.0 5 | 6 | import sys, gzip, re 7 | 8 | # Parse options, 9 | if len(sys.argv) != 4: 10 | print "Usage: %s " % __file__ 11 | sys.exit(0) 12 | words_txt, arpa_gz, unigrams_out = sys.argv[1:] 13 | 14 | if arpa_gz == '-': arpa_gz = '/dev/stdin' 15 | if unigrams_out == '-': unigrams_out = '/dev/stdout' 16 | 17 | # Load the words.txt, 18 | words = [ l.split() for l in open(words_txt) ] 19 | 20 | # Load the unigram probabilities in 10log from ARPA, 21 | wrd_log10 = dict() 22 | with gzip.open(arpa_gz,'r') as f: 23 | read = False 24 | for l in f: 25 | if l.strip() == '\\1-grams:': read = True 26 | if l.strip() == '\\2-grams:': break 27 | if read and len(l.split())>=2: 28 | log10_p_unigram, wrd = re.split('[\t ]+',l.strip(),2)[:2] 29 | wrd_log10[wrd] = float(log10_p_unigram) 30 | 31 | # Create list, 'wrd id log_p_unigram', 32 | words_unigram = [[wrd, id, (wrd_log10[wrd] if wrd in wrd_log10 else -99)] for wrd,id in words ] 33 | 34 | print >>sys.stderr, words_unigram[0] 35 | # Store, 36 | with open(unigrams_out,'w') as f: 37 | f.writelines(['%s %s %g\n' % (w,i,p) for (w,i,p) in words_unigram]) 38 | 39 | -------------------------------------------------------------------------------- /steps/conf/prepare_word_categories.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2015 Brno University of Technology (author: Karel Vesely) 4 | # Apache 2.0 5 | 6 | import sys 7 | 8 | from optparse import OptionParser 9 | desc = """ 10 | Prepare mapping of words into categories. Each word with minimal frequency 11 | has its own category, the rest is merged into single class. 12 | """ 13 | usage = "%prog [opts] words.txt ctm category_mapping" 14 | parser = OptionParser(usage=usage, description=desc) 15 | parser.add_option("--min-count", help="Minimum word-count to have a single word category. [default %default]", type='int', default=20) 16 | (o, args) = parser.parse_args() 17 | 18 | if len(args) != 3: 19 | parser.print_help() 20 | sys.exit(1) 21 | words_file, text_file, category_mapping_file = args 22 | 23 | if text_file == '-': text_file = '/dev/stdin' 24 | if category_mapping_file == '-': category_mapping_file = '/dev/stdout' 25 | 26 | # Read the words from the 'tra' file, 27 | with open(text_file) as f: 28 | text_words = [ l.split()[1:] for l in f ] 29 | 30 | # Flatten the array of arrays of words, 31 | import itertools 32 | text_words = list(itertools.chain.from_iterable(text_words)) 33 | 34 | # Count the words (regardless if correct or incorrect), 35 | word_counts = dict() 36 | for w in text_words: 37 | if w not in word_counts: word_counts[w] = 0 38 | word_counts[w] += 1 39 | 40 | # Read the words.txt, 41 | with open(words_file) as f: 42 | word_id = [ l.split() for l in f ] 43 | 44 | # Append the categories, 45 | n=1 46 | word_id_cat=[] 47 | for word, idx in word_id: 48 | cat = 0 49 | if word in word_counts: 50 | if word_counts[word] > o.min_count: 51 | cat = n; n += 1 52 | word_id_cat.append([word, idx, str(cat)]) 53 | 54 | # Store the mapping, 55 | with open(category_mapping_file,'w') as f: 56 | f.writelines([' '.join(record)+'\n' for record in word_id_cat]) 57 | -------------------------------------------------------------------------------- /steps/data/data_dir_manipulation_lib.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | def RunKaldiCommand(command, wait = True): 4 | """ Runs commands frequently seen in Kaldi scripts. These are usually a 5 | sequence of commands connected by pipes, so we use shell=True """ 6 | #logger.info("Running the command\n{0}".format(command)) 7 | p = subprocess.Popen(command, shell = True, 8 | stdout = subprocess.PIPE, 9 | stderr = subprocess.PIPE) 10 | 11 | if wait: 12 | [stdout, stderr] = p.communicate() 13 | if p.returncode is not 0: 14 | raise Exception("There was an error while running the command {0}\n".format(command)+"-"*10+"\n"+stderr) 15 | return stdout, stderr 16 | else: 17 | return p 18 | 19 | -------------------------------------------------------------------------------- /steps/decode_combine.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 4 | 5 | # Combine two decoding directories by composing the lattices (we 6 | # apply a weight to each of the original weights, by default 0.5 each). 7 | # Note, this is not the only combination method, or the most normal combination 8 | # method. See also egs/wsj/s5/local/score_combine.sh. 9 | 10 | # Begin configuration section. 11 | weight1=0.5 # Weight on 1st set of lattices. 12 | cmd=run.pl 13 | skip_scoring=false 14 | # End configuration section. 15 | 16 | echo "$0 $@" # Print the command line for logging 17 | 18 | [ -f ./path.sh ] && . ./path.sh; # source the path. 19 | . parse_options.sh || exit 1; 20 | 21 | if [ $# -ne 5 ]; then 22 | echo "Usage: steps/decode_combine.sh [options] " 23 | echo " e.g.: steps/decode_combine.sh data/lang data/test exp/dir1/decode exp/dir2/decode exp/combine_1_2/decode" 24 | echo "main options (for others, see top of script file)" 25 | echo " --config # config containing options" 26 | echo " --cmd # Command to run in parallel with" 27 | echo " --weight1 # Weight on 1st set of lattices (default 0.5)" 28 | exit 1; 29 | fi 30 | 31 | data=$1 32 | lang_or_graphdir=$2 33 | srcdir1=$3 34 | srcdir2=$4 35 | dir=$5 36 | 37 | for f in $data/utt2spk $lang_or_graphdir/phones.txt $srcdir1/lat.1.gz $srcdir2/lat.1.gz; do 38 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 39 | done 40 | 41 | nj1=`cat $srcdir1/num_jobs` || exit 1; 42 | nj2=`cat $srcdir2/num_jobs` || exit 1; 43 | [ $nj1 -ne $nj2 ] && echo "$0: mismatch in number of jobs $nj1 versus $nj2" && exit 1; 44 | nj=$nj1 45 | 46 | mkdir -p $dir/log 47 | echo $nj > $dir/num_jobs 48 | 49 | # The lattice-interp command does the score interpolation (with composition), 50 | # and the lattice-copy-backoff replaces the result with the 1st lattice, in 51 | # cases where the composed result was empty. 52 | $cmd JOB=1:$nj $dir/log/interp.JOB.log \ 53 | lattice-interp --alpha=$weight1 "ark:gunzip -c $srcdir1/lat.JOB.gz|" \ 54 | "ark,s,cs:gunzip -c $srcdir2/lat.JOB.gz|" ark:- \| \ 55 | lattice-copy-backoff "ark,s,cs:gunzip -c $srcdir1/lat.JOB.gz|" ark,s,cs:- \ 56 | "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1; 57 | 58 | cp $srcdir1/final.mdl $dir/final.mdl 59 | 60 | if ! $skip_scoring ; then 61 | [ ! -x local/score.sh ] && \ 62 | echo "Not scoring because local/score.sh does not exist or not executable." && exit 1; 63 | local/score.sh --cmd "$cmd" $data $lang_or_graphdir $dir || 64 | { echo "$0: Scoring failed. (ignore by '--skip-scoring true')"; exit 1; } 65 | fi 66 | 67 | exit 0; 68 | -------------------------------------------------------------------------------- /steps/decode_nnet.sh: -------------------------------------------------------------------------------- 1 | nnet/decode.sh -------------------------------------------------------------------------------- /steps/decode_si.sh: -------------------------------------------------------------------------------- 1 | decode.sh -------------------------------------------------------------------------------- /steps/diagnostic/analyze_alignments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright Johns Hopkins University (Author: Daniel Povey) 2016. Apache 2.0. 4 | 5 | # This script performs some analysis of alignments on disk, currently in terms 6 | # of phone lengths, including lenghts of leading and trailing silences 7 | 8 | 9 | # begin configuration section. 10 | cmd=run.pl 11 | #end configuration section. 12 | 13 | echo "$0 $@" # Print the command line for logging 14 | 15 | [ -f ./path.sh ] && . ./path.sh 16 | . parse_options.sh || exit 1; 17 | 18 | if [ $# -ne 2 ]; then 19 | echo "Usage: $0 [options] " 20 | echo " Options:" 21 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 22 | echo "e.g.:" 23 | echo "$0 data/lang exp/tri4b" 24 | echo "This script writes some diagnostics to /log/alignments.log" 25 | exit 1; 26 | fi 27 | 28 | lang=$1 29 | dir=$2 30 | 31 | model=$dir/final.mdl 32 | 33 | for f in $lang/words.txt $model $dir/ali.1.gz $dir/num_jobs; do 34 | [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; 35 | done 36 | 37 | num_jobs=$(cat $dir/num_jobs) || exit 1 38 | 39 | mkdir -p $dir/log 40 | 41 | rm $dir/phone_stats.*.gz 2>/dev/null || true 42 | 43 | $cmd JOB=1:$num_jobs $dir/log/get_phone_alignments.JOB.log \ 44 | set -o pipefail '&&' ali-to-phones --write-lengths=true "$model" \ 45 | "ark:gunzip -c $dir/ali.JOB.gz|" ark,t:- \| \ 46 | sed -E 's/^[^ ]+ //' \| \ 47 | awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \ 48 | sort \| uniq -c \| gzip -c '>' $dir/phone_stats.JOB.gz || exit 1 49 | 50 | if ! $cmd $dir/log/analyze_alignments.log \ 51 | gunzip -c "$dir/phone_stats.*.gz" \| \ 52 | steps/diagnostic/analyze_phone_length_stats.py $lang; then 53 | echo "$0: analyze_phone_length_stats.py failed, but ignoring the error (it's just for diagnostics)" 54 | fi 55 | 56 | grep WARNING $dir/log/analyze_alignments.log 57 | echo "$0: see stats in $dir/log/analyze_alignments.log" 58 | 59 | rm $dir/phone_stats.*.gz 60 | 61 | exit 0 62 | -------------------------------------------------------------------------------- /steps/diagnostic/analyze_lats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright Johns Hopkins University (Author: Daniel Povey) 2016. Apache 2.0. 4 | 5 | # This script does the same type of diagnostics as analyze_alignments.sh, except 6 | # it starts from lattices (so it has to convert the lattices to alignments 7 | # first). 8 | 9 | # begin configuration section. 10 | iter=final 11 | cmd=run.pl 12 | acwt=0.1 13 | #end configuration section. 14 | 15 | echo "$0 $@" # Print the command line for logging 16 | 17 | [ -f ./path.sh ] && . ./path.sh 18 | . parse_options.sh || exit 1; 19 | 20 | if [ $# -ne 2 ]; then 21 | echo "Usage: $0 [options] (|) " 22 | echo " Options:" 23 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 24 | echo " --acwt # Acoustic scale for getting best-path (default: 0.1)" 25 | echo "e.g.:" 26 | echo "$0 data/lang exp/tri4b/decode_dev" 27 | echo "This script writes some diagnostics to /log/alignments.log" 28 | exit 1; 29 | fi 30 | 31 | lang=$1 32 | dir=$2 33 | 34 | model=$dir/../${iter}.mdl 35 | 36 | for f in $lang/words.txt $model $dir/lat.1.gz $dir/num_jobs; do 37 | [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; 38 | done 39 | 40 | num_jobs=$(cat $dir/num_jobs) || exit 1 41 | 42 | mkdir -p $dir/log 43 | 44 | rm $dir/phone_stats.*.gz 2>/dev/null || true 45 | 46 | # this writes two archives of depth_tmp and ali_tmp of (depth per frame, alignment per frame). 47 | $cmd JOB=1:$num_jobs $dir/log/lattice_best_path.JOB.log \ 48 | lattice-depth-per-frame "ark:gunzip -c $dir/lat.JOB.gz|" "ark,t:|gzip -c > $dir/depth_tmp.JOB.gz" ark:- \| \ 49 | lattice-best-path --acoustic-scale=$acwt ark:- ark:/dev/null "ark,t:|gzip -c >$dir/ali_tmp.JOB.gz" || exit 1 50 | 51 | $cmd JOB=1:$num_jobs $dir/log/get_lattice_stats.JOB.log \ 52 | ali-to-phones --write-lengths=true "$model" "ark:gunzip -c $dir/ali_tmp.JOB.gz|" ark,t:- \| \ 53 | sed -E 's/^[^ ]+ //' \| \ 54 | awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \ 55 | sort \| uniq -c \| gzip -c '>' $dir/phone_stats.JOB.gz || exit 1 56 | 57 | 58 | $cmd $dir/log/analyze_alignments.log \ 59 | gunzip -c "$dir/phone_stats.*.gz" \| \ 60 | steps/diagnostic/analyze_phone_length_stats.py $lang || exit 1 61 | 62 | grep WARNING $dir/log/analyze_alignments.log 63 | echo "$0: see stats in $dir/log/analyze_alignments.log" 64 | 65 | 66 | # note: below, some things that would be interpreted by the shell have to be 67 | # escaped since it needs to be passed to $cmd. 68 | # the 'paste' command will paste together the phone-indexes and the depths 69 | # so that one line will be like utt-id1 phone1 phone2 phone3 .. utt-id1 depth1 depth2 depth3 ... 70 | # the awk command computes counts of pairs (phone, lattice-depth) and outputs lines 71 | # containing 3 integers representing: 72 | # phone lattice_depth, count[phone,lattice_depth] 73 | $cmd JOB=1:$num_jobs $dir/log/lattice_best_path.JOB.log \ 74 | ali-to-phones --per-frame=true "$model" "ark:gunzip -c $dir/ali_tmp.JOB.gz|" ark,t:- \| \ 75 | paste /dev/stdin '<(' gunzip -c $dir/depth_tmp.JOB.gz ')' \| \ 76 | awk '{ half=NF/2; for (n=2; n<=half; n++) { m=n+half; count[$n " " $m]++;}} END{for(k in count) print k, count[k]; }' \| \ 77 | gzip -c '>' $dir/depth_stats_tmp.JOB.gz 78 | 79 | 80 | $cmd $dir/log/analyze_lattice_depth_stats.log \ 81 | gunzip -c "$dir/depth_stats_tmp.*.gz" \| \ 82 | steps/diagnostic/analyze_lattice_depth_stats.py $lang || exit 1 83 | 84 | grep Overall $dir/log/analyze_lattice_depth_stats.log 85 | echo "$0: see stats in $dir/log/analyze_lattice_depth_stats.log" 86 | 87 | 88 | rm $dir/phone_stats.*.gz 89 | rm $dir/depth_tmp.*.gz 90 | rm $dir/depth_stats_tmp.*.gz 91 | rm $dir/ali_tmp.*.gz 92 | 93 | exit 0 94 | -------------------------------------------------------------------------------- /steps/get_ctm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright Johns Hopkins University (Author: Daniel Povey) 2012. Apache 2.0. 3 | 4 | # This script produces CTM files from a decoding directory that has lattices 5 | # present. 6 | 7 | 8 | # begin configuration section. 9 | cmd=run.pl 10 | stage=0 11 | frame_shift=0.01 12 | min_lmwt=5 13 | max_lmwt=20 14 | use_segments=true # if we have a segments file, use it to convert 15 | # the segments to be relative to the original files. 16 | print_silence=false 17 | #end configuration section. 18 | 19 | echo "$0 $@" # Print the command line for logging 20 | 21 | [ -f ./path.sh ] && . ./path.sh 22 | . parse_options.sh || exit 1; 23 | 24 | if [ $# -ne 3 ]; then 25 | echo "Usage: $0 [options] " 26 | echo " Options:" 27 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 28 | echo " --stage (0|1|2) # start scoring script from part-way through." 29 | echo " --use-segments (true|false) # use segments and reco2file_and_channel files " 30 | echo " # to produce a ctm relative to the original audio" 31 | echo " # files, with channel information (typically needed" 32 | echo " # for NIST scoring)." 33 | echo " --frame-shift (default=0.01) # specify this if your lattices have a frame-shift" 34 | echo " # not equal to 0.01 seconds" 35 | echo "e.g.:" 36 | echo "$0 data/train data/lang exp/tri4a/decode/" 37 | echo "See also: steps/get_train_ctm.sh" 38 | exit 1; 39 | fi 40 | 41 | data=$1 42 | lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. 43 | dir=$3 44 | 45 | model=$dir/../final.mdl # assume model one level up from decoding dir. 46 | 47 | 48 | for f in $lang/words.txt $model $dir/lat.1.gz; do 49 | [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; 50 | done 51 | 52 | name=`basename $data`; # e.g. eval2000 53 | 54 | mkdir -p $dir/scoring/log 55 | 56 | if [ $stage -le 0 ]; then 57 | if [ -f $data/segments ] && $use_segments; then 58 | f=$data/reco2file_and_channel 59 | [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; 60 | filter_cmd="utils/convert_ctm.pl $data/segments $data/reco2file_and_channel" 61 | else 62 | filter_cmd=cat 63 | fi 64 | 65 | nj=$(cat $dir/num_jobs) 66 | lats=$(for n in $(seq $nj); do echo -n "$dir/lat.$n.gz "; done) 67 | if [ -f $lang/phones/word_boundary.int ]; then 68 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ 69 | set -o pipefail '&&' mkdir -p $dir/score_LMWT/ '&&' \ 70 | lattice-1best --lm-scale=LMWT "ark:gunzip -c $lats|" ark:- \| \ 71 | lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \ 72 | nbest-to-ctm --frame-shift=$frame_shift --print-silence=$print_silence ark:- - \| \ 73 | utils/int2sym.pl -f 5 $lang/words.txt \| \ 74 | $filter_cmd '>' $dir/score_LMWT/$name.ctm || exit 1; 75 | elif [ -f $lang/phones/align_lexicon.int ]; then 76 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \ 77 | set -o pipefail '&&' mkdir -p $dir/score_LMWT/ '&&' \ 78 | lattice-1best --lm-scale=LMWT "ark:gunzip -c $lats|" ark:- \| \ 79 | lattice-align-words-lexicon $lang/phones/align_lexicon.int $model ark:- ark:- \| \ 80 | nbest-to-ctm --frame-shift=$frame_shift --print-silence=$print_silence ark:- - \| \ 81 | utils/int2sym.pl -f 5 $lang/words.txt \| \ 82 | $filter_cmd '>' $dir/score_LMWT/$name.ctm || exit 1; 83 | else 84 | echo "$0: neither $lang/phones/word_boundary.int nor $lang/phones/align_lexicon.int exists: cannot align." 85 | exit 1; 86 | fi 87 | fi 88 | 89 | 90 | -------------------------------------------------------------------------------- /steps/lmrescore_const_arpa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Guoguo Chen 4 | # Apache 2.0 5 | 6 | # This script rescores lattices with the ConstArpaLm format language model. 7 | 8 | # Begin configuration section. 9 | cmd=run.pl 10 | skip_scoring=false 11 | stage=1 12 | scoring_opts= 13 | # End configuration section. 14 | 15 | echo "$0 $@" # Print the command line for logging 16 | 17 | . ./utils/parse_options.sh 18 | 19 | if [ $# != 5 ]; then 20 | echo "Does language model rescoring of lattices (remove old LM, add new LM)" 21 | echo "Usage: $0 [options] \\" 22 | echo " " 23 | echo "options: [--cmd (run.pl|queue.pl [queue opts])]" 24 | exit 1; 25 | fi 26 | 27 | [ -f path.sh ] && . ./path.sh; 28 | 29 | oldlang=$1 30 | newlang=$2 31 | data=$3 32 | indir=$4 33 | outdir=$5 34 | 35 | oldlm=$oldlang/G.fst 36 | newlm=$newlang/G.carpa 37 | ! cmp $oldlang/words.txt $newlang/words.txt &&\ 38 | echo "$0: Warning: vocabularies may be incompatible." 39 | [ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1; 40 | [ ! -f $newlm ] && echo "$0: Missing file $newlm" && exit 1; 41 | ! ls $indir/lat.*.gz >/dev/null &&\ 42 | echo "$0: No lattices input directory $indir" && exit 1; 43 | 44 | if ! cmp -s $oldlang/words.txt $newlang/words.txt; then 45 | echo "$0: $oldlang/words.txt and $newlang/words.txt differ: make sure you know what you are doing."; 46 | fi 47 | 48 | oldlmcommand="fstproject --project_output=true $oldlm |" 49 | 50 | mkdir -p $outdir/log 51 | nj=`cat $indir/num_jobs` || exit 1; 52 | cp $indir/num_jobs $outdir 53 | 54 | if [ $stage -le 1 ]; then 55 | $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ 56 | lattice-lmrescore --lm-scale=-1.0 \ 57 | "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlmcommand" ark:- \| \ 58 | lattice-lmrescore-const-arpa --lm-scale=1.0 \ 59 | ark:- "$newlm" "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1; 60 | fi 61 | 62 | if ! $skip_scoring && [ $stage -le 2 ]; then 63 | err_msg="Not scoring because local/score.sh does not exist or not executable." 64 | [ ! -x local/score.sh ] && echo $err_msg && exit 1; 65 | local/score.sh --cmd "$cmd" $scoring_opts $data $newlang $outdir 66 | else 67 | echo "Not scoring because requested so..." 68 | fi 69 | 70 | exit 0; 71 | -------------------------------------------------------------------------------- /steps/lmrescore_rnnlm_lat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Guoguo Chen 4 | # Apache 2.0 5 | 6 | # This script rescores lattices with RNNLM. See also rnnlmrescore.sh which is 7 | # an older script using n-best lists. 8 | 9 | # Begin configuration section. 10 | cmd=run.pl 11 | skip_scoring=false 12 | max_ngram_order=4 13 | N=10 14 | inv_acwt=12 15 | weight=1.0 # Interpolation weight for RNNLM. 16 | # End configuration section. 17 | 18 | echo "$0 $@" # Print the command line for logging 19 | 20 | . ./utils/parse_options.sh 21 | 22 | if [ $# != 5 ]; then 23 | echo "Does language model rescoring of lattices (remove old LM, add new LM)" 24 | echo "with RNNLM." 25 | echo "" 26 | echo "Usage: $0 [options] \\" 27 | echo " " 28 | echo " e.g.: $0 ./rnnlm data/lang_tg data/test \\" 29 | echo " exp/tri3/test_tg exp/tri3/test_rnnlm" 30 | echo "options: [--cmd (run.pl|queue.pl [queue opts])]" 31 | exit 1; 32 | fi 33 | 34 | [ -f path.sh ] && . ./path.sh; 35 | 36 | oldlang=$1 37 | rnnlm_dir=$2 38 | data=$3 39 | indir=$4 40 | outdir=$5 41 | 42 | oldlm=$oldlang/G.fst 43 | if [ -f $oldlang/G.carpa ]; then 44 | oldlm=$oldlang/G.carpa 45 | elif [ ! -f $oldlm ]; then 46 | echo "$0: expecting either $oldlang/G.fst or $oldlang/G.carpa to exist" &&\ 47 | exit 1; 48 | fi 49 | 50 | [ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1; 51 | [ ! -f $rnnlm_dir/rnnlm ] && echo "$0: Missing file $rnnlm_dir/rnnlm" && exit 1; 52 | [ ! -f $rnnlm_dir/unk.probs ] &&\ 53 | echo "$0: Missing file $rnnlm_dir/unk.probs" && exit 1; 54 | [ ! -f $oldlang/words.txt ] &&\ 55 | echo "$0: Missing file $oldlang/words.txt" && exit 1; 56 | ! ls $indir/lat.*.gz >/dev/null &&\ 57 | echo "$0: No lattices input directory $indir" && exit 1; 58 | awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) { 59 | print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \ 60 | || exit 1; 61 | 62 | oldlm_command="fstproject --project_output=true $oldlm |" 63 | 64 | acwt=`perl -e "print (1.0/$inv_acwt);"` 65 | 66 | mkdir -p $outdir/log 67 | nj=`cat $indir/num_jobs` || exit 1; 68 | cp $indir/num_jobs $outdir 69 | 70 | oldlm_weight=`perl -e "print -1.0 * $weight;"` 71 | if [ "$oldlm" == "$oldlang/G.fst" ]; then 72 | $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ 73 | lattice-lmrescore --lm-scale=$oldlm_weight \ 74 | "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:- \| \ 75 | lattice-lmrescore-rnnlm --lm-scale=$weight \ 76 | --max-ngram-order=$max_ngram_order ark:$rnnlm_dir/unk.probs \ 77 | $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \ 78 | "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1; 79 | else 80 | $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ 81 | lattice-lmrescore-const-arpa --lm-scale=$oldlm_weight \ 82 | "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:- \| \ 83 | lattice-lmrescore-rnnlm --lm-scale=$weight \ 84 | --max-ngram-order=$max_ngram_order ark:$rnnlm_dir/unk.probs \ 85 | $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \ 86 | "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1; 87 | fi 88 | 89 | if ! $skip_scoring ; then 90 | err_msg="Not scoring because local/score.sh does not exist or not executable." 91 | [ ! -x local/score.sh ] && echo $err_msg && exit 1; 92 | local/score.sh --cmd "$cmd" $data $oldlang $outdir 93 | else 94 | echo "Not scoring because requested so..." 95 | fi 96 | 97 | exit 0; 98 | -------------------------------------------------------------------------------- /steps/nnet2/adjust_priors.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 4 | # Copyright (c) 2015, Johns Hopkins University (Yenda Trmal ) 5 | # License: Apache 2.0 6 | 7 | # Begin configuration section. 8 | cmd=run.pl 9 | iter=final 10 | # End configuration section 11 | 12 | 13 | echo "$0 $@" # Print the command line for logging 14 | 15 | if [ -f path.sh ]; then . ./path.sh; fi 16 | . parse_options.sh || exit 1; 17 | 18 | if [ $# != 2 ]; then 19 | echo "Usage: $0 [opts] " 20 | echo " e.g.: $0 exp/tri4_mpe_degs exp/tri4_mpe" 21 | echo "" 22 | echo "Performs priors adjustment either on the final iteration" 23 | echo "or iteration of choice of the training. The adjusted model" 24 | echo "filename will be suffixed by \"adj\", i.e. for the final" 25 | echo "iteration final.mdl will become final.adj.mdl" 26 | echo "" 27 | echo "Main options (for others, see top of script file)" 28 | echo " --config # config file containing options" 29 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 30 | echo " --iter # which iteration to be adjusted" 31 | exit 1; 32 | fi 33 | 34 | degs_dir=$1 35 | dir=$2 36 | 37 | src_model=$dir/${iter}.mdl 38 | 39 | if [ ! -f $src_model ]; then 40 | echo "$0: Expecting $src_model to exist." 41 | exit 1 42 | fi 43 | 44 | if [ ! -f $degs_dir/priors_egs.1.ark ]; then 45 | echo "$0: Expecting $degs_dir/priors_egs.1.ark to exist." 46 | exit 1 47 | fi 48 | 49 | num_archives_priors=`cat $degs_dir/info/num_archives_priors` || { 50 | echo "Could not find $degs_dir/info/num_archives_priors."; 51 | exit 1; 52 | } 53 | 54 | $cmd JOB=1:$num_archives_priors $dir/log/get_post.${iter}.JOB.log \ 55 | nnet-compute-from-egs "nnet-to-raw-nnet $src_model -|" \ 56 | ark:$degs_dir/priors_egs.JOB.ark ark:- \| \ 57 | matrix-sum-rows ark:- ark:- \| \ 58 | vector-sum ark:- $dir/post.${iter}.JOB.vec || { 59 | echo "Error in getting posteriors for adjusting priors." 60 | echo "See $dir/log/get_post.${iter}.*.log"; 61 | exit 1; 62 | } 63 | 64 | 65 | $cmd $dir/log/sum_post.${iter}.log \ 66 | vector-sum $dir/post.${iter}.*.vec $dir/post.${iter}.vec || { 67 | echo "Error in summing posteriors. See $dir/log/sum_post.${iter}.log"; 68 | exit 1; 69 | } 70 | 71 | rm -f $dir/post.${iter}.*.vec 72 | 73 | echo "Re-adjusting priors based on computed posteriors for iter $iter" 74 | $cmd $dir/log/adjust_priors.${iter}.log \ 75 | nnet-adjust-priors $src_model $dir/post.${iter}.vec $dir/${iter}.adj.mdl || { 76 | echo "Error in adjusting priors. See $dir/log/adjust_priors.${iter}.log"; 77 | exit 1; 78 | } 79 | 80 | echo "Done adjusting priors (on $src_model)" 81 | -------------------------------------------------------------------------------- /steps/nnet2/convert_nnet1_to_nnet2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Johns Hopkins University (Author: Daniel Povey). 4 | # Apache 2.0. 5 | 6 | # This script converts nnet1 into nnet2 models. 7 | # Note, it doesn't support all possible types of nnet1 models. 8 | 9 | # Begin configuration section 10 | cleanup=true 11 | cmd=run.pl 12 | # End configuration section. 13 | 14 | echo "$0 $@" # Print the command line for logging 15 | 16 | [ -f ./path.sh ] && . ./path.sh; # source the path. 17 | . parse_options.sh || exit 1; 18 | 19 | 20 | if [ $# -ne 2 ]; then 21 | echo "Usage: $0 [options] " 22 | echo "e.g.: $0 exp/dnn4b_pretrain-dbn_dnn exp/dnn4b_nnet2" 23 | exit 1; 24 | fi 25 | 26 | src=$1 27 | dir=$2 28 | 29 | mkdir -p $dir/log || exit 1; 30 | 31 | for f in $src/final.mdl $src/final.feature_transform $src/ali_train_pdf.counts; do 32 | [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1 33 | done 34 | 35 | cp $src/phones.txt $dir/phones.txt || exit 1; 36 | $cmd $dir/log/convert_feature_transform.log \ 37 | nnet1-to-raw-nnet $src/final.feature_transform $dir/0.raw || exit 1; 38 | 39 | 40 | if [ -f $src/final.nnet ]; then 41 | echo "$0: $src/final.nnet exists, using it as input." 42 | $cmd $dir/log/convert_model.log \ 43 | nnet1-to-raw-nnet $src/final.nnet $dir/1.raw || exit 1; 44 | elif [ -f $src/final.dbn ]; then 45 | echo "$0: $src/final.dbn exists, using it as input." 46 | num_leaves=$(am-info $src/final.mdl | grep -w pdfs | awk '{print $NF}') || exit 1; 47 | dbn_output_dim=$(nnet-info exp/dnn4b_pretrain-dbn/6.dbn | grep component | tail -n 1 | sed s:,::g | awk '{print $NF}') || exit 1; 48 | [ -z "$dbn_output_dim" ] && exit 1; 49 | 50 | cat > $dir/final_layer.conf < " 22 | echo "where will typically be a normal neural net from another corpus," 23 | echo "and will usually be a single-layer neural net trained on top of it by" 24 | echo "dumping the activations (e.g. using steps/online/nnet2/dump_nnet_activations.sh, I" 25 | echo "think no such script exists for non-online), and then training using" 26 | echo "steps/nnet2/retrain_fast.sh." 27 | echo "e.g.: $0 ../../swbd/s5b/exp/nnet2_online/nnet_gpu_online exp/nnet2_swbd_online/nnet_gpu_online exp/nnet2_swbd_online/nnet_gpu_online_combined" 28 | fi 29 | 30 | 31 | src1=$1 32 | src2=$2 33 | dir=$3 34 | 35 | for f in $src1/final.mdl $src2/tree $src2/final.mdl; do 36 | [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1; 37 | done 38 | 39 | 40 | mkdir -p $dir/log 41 | 42 | info=$dir/nnet_info 43 | nnet-am-info $src1/final.mdl >$info 44 | nc=$(grep num-components $info | awk '{print $2}'); 45 | if grep SumGroupComponent $info >/dev/null; then 46 | nc_truncate=$[$nc-3] # we did mix-up: remove AffineComponent, 47 | # SumGroupComponent, SoftmaxComponent 48 | else 49 | # we didn't mix-up: 50 | nc_truncate=$[$nc-2] # remove AffineComponent, SoftmaxComponent 51 | fi 52 | 53 | $cmd $dir/log/get_raw_nnet.log \ 54 | nnet-to-raw-nnet --truncate=$nc_truncate $src1/final.mdl $dir/first_nnet.raw || exit 1; 55 | 56 | $cmd $dir/log/append_nnet.log \ 57 | nnet-insert --randomize-next-component=false --insert-at=0 \ 58 | $src2/final.mdl $dir/first_nnet.raw $dir/final.mdl || exit 1; 59 | 60 | $cleanup && rm $dir/first_nnet.raw 61 | 62 | # Copy the tree etc., 63 | 64 | cp $src2/tree $dir || exit 1; 65 | 66 | # Copy feature-related things from src1 where we built the initial model. 67 | # Note: if you've done anything like mess with the feature-extraction configs, 68 | # or changed the feature type, you have to keep track of that yourself. 69 | for f in final.mat cmvn_opts splice_opts; do 70 | if [ -f $src1/$f ]; then 71 | cp $src1/$f $dir || exit 1; 72 | fi 73 | done 74 | 75 | echo "$0: created appended model in $dir" 76 | -------------------------------------------------------------------------------- /steps/nnet2/get_num_frames.sh: -------------------------------------------------------------------------------- 1 | ../../utils/data/get_num_frames.sh -------------------------------------------------------------------------------- /steps/nnet2/relabel_egs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Vimal Manohar. Apache 2.0. 4 | # This script, which will generally be called during the neural-net training 5 | # relabels existing examples with better labels obtained by realigning the data 6 | # with the current nnet model 7 | 8 | # Begin configuration section 9 | cmd=run.pl 10 | stage=0 11 | extra_egs= # Names of additional egs files that need to relabelled 12 | # other than egs.*.*.ark, combine.egs, train_diagnostic.egs, 13 | # valid_diagnostic.egs 14 | iter=final 15 | echo "$0 $@" # Print the command line for logging 16 | 17 | if [ -f path.sh ]; then . ./path.sh; fi 18 | . parse_options.sh || exit 1; 19 | 20 | if [ $# != 3 ]; then 21 | echo "Usage: steps/nnet2/relabel_egs.sh [opts] " 22 | echo " e.g: steps/nnet2/relabel_egs.sh exp/tri6_nnet/ali_1.5 exp/tri6_nnet/egs exp/tri6_nnet/egs_1.5" 23 | echo "" 24 | echo "Main options (for others, see top of script file)" 25 | echo " --config # config file containing options" 26 | echo " --cmd (utils/run.pl;utils/queue.pl ) # how to run jobs." 27 | 28 | exit 1; 29 | fi 30 | 31 | alidir=$1 32 | egs_in_dir=$2 33 | dir=$3 34 | 35 | model=$alidir/$iter.mdl 36 | 37 | # Check some files. 38 | 39 | for f in $alidir/ali.1.gz $model $egs_in_dir/egs.1.0.ark $egs_in_dir/combine.egs \ 40 | $egs_in_dir/valid_diagnostic.egs $egs_in_dir/train_diagnostic.egs \ 41 | $egs_in_dir/num_jobs_nnet $egs_in_dir/iters_per_epoch $egs_in_dir/samples_per_iter; do 42 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 43 | done 44 | 45 | num_jobs_nnet=`cat $egs_in_dir/num_jobs_nnet` 46 | iters_per_epoch=`cat $egs_in_dir/iters_per_epoch` 47 | samples_per_iter_real=`cat $egs_in_dir/samples_per_iter` 48 | num_jobs_align=`cat $alidir/num_jobs` 49 | 50 | mkdir -p $dir/log 51 | 52 | echo $num_jobs_nnet > $dir/num_jobs_nnet 53 | echo $iters_per_epoch > $dir/iters_per_epoch 54 | echo $samples_per_iter_real > $dir/samples_per_iter 55 | 56 | alignments=$(for n in $(seq $num_jobs_align); do echo -n "$alidir/ali.$n.gz "; done) 57 | 58 | if [ $stage -le 0 ]; then 59 | egs_in= 60 | egs_out= 61 | for x in `seq 1 $num_jobs_nnet`; do 62 | for y in `seq 0 $[$iters_per_epoch-1]`; do 63 | utils/create_data_link.pl $dir/egs.$x.$y.ark 64 | if [ $x -eq 1 ]; then 65 | egs_in="$egs_in ark:$egs_in_dir/egs.JOB.$y.ark " 66 | egs_out="$egs_out ark:$dir/egs.JOB.$y.ark " 67 | fi 68 | done 69 | done 70 | 71 | $cmd JOB=1:$num_jobs_nnet $dir/log/relabel_egs.JOB.log \ 72 | nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \ 73 | $egs_in $egs_out || exit 1 74 | fi 75 | 76 | if [ $stage -le 1 ]; then 77 | egs_in= 78 | egs_out= 79 | for x in combine.egs valid_diagnostic.egs train_diagnostic.egs $extra_egs; do 80 | utils/create_data_link.pl $dir/$x 81 | egs_in="$egs_in ark:$egs_in_dir/$x" 82 | egs_out="$egs_out ark:$dir/$x" 83 | done 84 | 85 | $cmd $dir/log/relabel_egs_extra.log \ 86 | nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \ 87 | $egs_in $egs_out || exit 1 88 | fi 89 | 90 | echo "$0: Finished relabeling training examples" 91 | -------------------------------------------------------------------------------- /steps/nnet2/relabel_egs2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Vimal Manohar. 4 | # 2014 Johns Hopkins University (author: Daniel Povey) 5 | # Apache 2.0. 6 | # 7 | # This script, which will generally be called during the neural-net training 8 | # relabels existing examples with better labels obtained by realigning the data 9 | # with the current nnet model. 10 | # This script is as relabel_egs.sh, but is adapted to work with the newer 11 | # egs format that is written by get_egs2.sh 12 | 13 | # Begin configuration section 14 | cmd=run.pl 15 | stage=0 16 | extra_egs= # Names of additional egs files that need to relabelled 17 | # other than egs.*.*.ark, combine.egs, train_diagnostic.egs, 18 | # valid_diagnostic.egs 19 | iter=final 20 | parallel_opts= 21 | echo "$0 $@" # Print the command line for logging 22 | 23 | if [ -f path.sh ]; then . ./path.sh; fi 24 | . parse_options.sh || exit 1; 25 | 26 | if [ $# != 3 ]; then 27 | echo "Usage: steps/nnet2/relabel_egs.sh [opts] " 28 | echo " e.g: steps/nnet2/relabel_egs.sh exp/tri6_nnet/ali_1.5 exp/tri6_nnet/egs exp/tri6_nnet/egs_1.5" 29 | echo "" 30 | echo "Main options (for others, see top of script file)" 31 | echo " --config # config file containing options" 32 | echo " --cmd (utils/run.pl;utils/queue.pl ) # how to run jobs." 33 | 34 | exit 1; 35 | fi 36 | 37 | alidir=$1 38 | egs_in_dir=$2 39 | dir=$3 40 | 41 | model=$alidir/$iter.mdl 42 | 43 | # Check some files. 44 | 45 | [ -f $egs_in_dir/iters_per_epoch ] && \ 46 | echo "$0: this script does not work with the old egs directory format" && exit 1; 47 | 48 | for f in $alidir/ali.1.gz $model $egs_in_dir/egs.1.ark $egs_in_dir/combine.egs \ 49 | $egs_in_dir/valid_diagnostic.egs $egs_in_dir/train_diagnostic.egs \ 50 | $egs_in_dir/info/num_archives; do 51 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 52 | done 53 | 54 | num_archives=$(cat $egs_in_dir/info/num_archives) || exit 1; 55 | num_jobs_align=$(cat $alidir/num_jobs) || exit 1; 56 | 57 | mkdir -p $dir/log 58 | 59 | mkdir -p $dir/info 60 | cp -r $egs_in_dir/info/* $dir/info 61 | 62 | alignments=$(for n in $(seq $num_jobs_align); do echo $alidir/ali.$n.gz; done) 63 | 64 | if [ $stage -le 0 ]; then 65 | for x in $(seq $num_archives); do 66 | # if $dir/storage exists, make the soft links that we'll 67 | # use to distribute the data across machines 68 | utils/create_data_link.pl $dir/egs.$x.ark 69 | done 70 | 71 | $cmd $parallel_opts JOB=1:$num_archives $dir/log/relabel_egs.JOB.log \ 72 | nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \ 73 | ark:$egs_in_dir/egs.JOB.ark ark:$dir/egs.JOB.ark || exit 1 74 | fi 75 | 76 | if [ $stage -le 1 ]; then 77 | egs_in= 78 | egs_out= 79 | for x in combine.egs valid_diagnostic.egs train_diagnostic.egs $extra_egs; do 80 | utils/create_data_link.pl $dir/$x 81 | egs_in="$egs_in ark:$egs_in_dir/$x" 82 | egs_out="$egs_out ark:$dir/$x" 83 | done 84 | 85 | $cmd $dir/log/relabel_egs_extra.log \ 86 | nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \ 87 | $egs_in $egs_out || exit 1 88 | fi 89 | 90 | echo "$0: Finished relabeling training examples" 91 | -------------------------------------------------------------------------------- /steps/nnet2/remove_egs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Johns Hopkins University (Author: Daniel Povey). 4 | # Apache 2.0. 5 | 6 | # This script removes the examples in an egs/ directory, e.g. 7 | # steps/nnet2/remove_egs.sh exp/nnet4b/egs/ 8 | # We give it its own script because we need to be careful about 9 | # things that are soft links to something in storage/ (i.e. remove the 10 | # data that's linked to as well as the soft link), and we want to not 11 | # delete the examples if someone has done "touch $dir/egs/.nodelete". 12 | 13 | 14 | if [ $# != 1 ]; then 15 | echo "Usage: $0 " 16 | echo "e.g.: $0 data/nnet4b/egs/" 17 | echo "e.g.: $0 data/nnet4b_mpe/degs/" 18 | echo "This script is usually equivalent to 'rm /egs.* /degs.*' but it follows" 19 | echo "soft links to /storage/; and it avoids deleting anything in the directory if" 20 | echo "someone did 'touch /.nodelete" 21 | exit 1; 22 | fi 23 | 24 | egs=$1 25 | 26 | if [ ! -d $egs ]; then 27 | echo "$0: expected directory $egs to exist" 28 | exit 1; 29 | fi 30 | 31 | if [ -f $egs/.nodelete ]; then 32 | echo "$0: not deleting egs in $egs since $egs/.nodelete exists" 33 | exit 0; 34 | fi 35 | 36 | 37 | 38 | for f in $egs/egs.*.ark $egs/degs.*.ark $egs/cegs.*.ark; do 39 | if [ -L $f ]; then 40 | rm $(dirname $f)/$(readlink $f) # this will print a warning if it fails. 41 | fi 42 | rm $f 2>/dev/null 43 | done 44 | 45 | 46 | echo "$0: Finished deleting examples in $egs" 47 | -------------------------------------------------------------------------------- /steps/nnet3/chain/gen_topo.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Generate a topology file. This allows control of the number of states in the 6 | # non-silence HMMs, and in the silence HMMs. This is a modified version of 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we 8 | # believe should be useful in the 'chain' model. Note: right now it doesn't 9 | # have any real options, and it treats silence and nonsilence the same. The 10 | # intention is that you write different versions of this script, or add options, 11 | # if you experiment with it. 12 | 13 | if (@ARGV != 2) { 14 | print STDERR "Usage: utils/gen_topo.pl \n"; 15 | print STDERR "e.g.: utils/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n"; 16 | exit (1); 17 | } 18 | 19 | ($nonsil_phones, $sil_phones) = @ARGV; 20 | 21 | $nonsil_phones =~ s/:/ /g; 22 | $sil_phones =~ s/:/ /g; 23 | $nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n"; 24 | $sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n"; 25 | 26 | print "\n"; 27 | print "\n"; 28 | print "\n"; 29 | print "$nonsil_phones $sil_phones\n"; 30 | print "\n"; 31 | # The next two lines may look like a bug, but they are as intended. State 0 has 32 | # no self-loop, it happens exactly once. And it can go either to state 1 (with 33 | # a self-loop) or to state 2, so we can have zero or more instances of state 1 34 | # following state 0. 35 | # We make the transition-probs 0.5 so they normalize, to keep the code happy. 36 | # In fact, we always set the transition probability scale to 0.0 in the 'chain' 37 | # code, so they are never used. 38 | print " 0 0 1 0.5 2 0.5 \n"; 39 | print " 1 1 1 0.5 2 0.5 \n"; 40 | print " 2 \n"; 41 | print "\n"; 42 | print "\n"; 43 | -------------------------------------------------------------------------------- /steps/nnet3/chain/gen_topo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Generate a topology file. This allows control of the number of states in the 6 | # non-silence HMMs, and in the silence HMMs. This is a modified version of 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we 8 | # believe should be useful in the 'chain' model. Note: right now it doesn't 9 | # have any real options, and it treats silence and nonsilence the same. The 10 | # intention is that you write different versions of this script, or add options, 11 | # if you experiment with it. 12 | 13 | from __future__ import print_function 14 | import argparse 15 | 16 | 17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py " 18 | " " 19 | "e.g.: steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n", 20 | epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage."); 21 | parser.add_argument("nonsilence_phones", type=str, 22 | help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9"); 23 | parser.add_argument("silence_phones", type=str, 24 | help="List of silence phones as integers, separated by colons, e.g. 1:2:3"); 25 | 26 | args = parser.parse_args() 27 | 28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ] 29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ] 30 | all_phones = silence_phones + nonsilence_phones 31 | 32 | print("") 33 | print("") 34 | print("") 35 | print(" ".join([str(x) for x in all_phones])) 36 | print("") 37 | # The next two lines may look like a bug, but they are as intended. State 0 has 38 | # no self-loop, it happens exactly once. And it can go either to state 1 (with 39 | # a self-loop) or to state 2, so we can have zero or more instances of state 1 40 | # following state 0. 41 | # We make the transition-probs 0.5 so they normalize, to keep the code happy. 42 | # In fact, we always set the transition probability scale to 0.0 in the 'chain' 43 | # code, so they are never used. 44 | print(" 0 0 1 0.5 2 0.5 ") 45 | print(" 1 1 1 0.5 2 0.5 ") 46 | print(" 2 ") 47 | print("") 48 | print("") 49 | 50 | -------------------------------------------------------------------------------- /steps/nnet3/chain/gen_topo2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Generate a topology file. This allows control of the number of states in the 6 | # non-silence HMMs, and in the silence HMMs. This is a modified version of 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we 8 | # believe should be useful in the 'chain' model. Note: right now it doesn't 9 | # have any real options, and it treats silence and nonsilence the same. The 10 | # intention is that you write different versions of this script, or add options, 11 | # if you experiment with it. 12 | 13 | from __future__ import print_function 14 | import argparse 15 | 16 | 17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py " 18 | " " 19 | "e.g.: steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n", 20 | epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage."); 21 | parser.add_argument("nonsilence_phones", type=str, 22 | help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9"); 23 | parser.add_argument("silence_phones", type=str, 24 | help="List of silence phones as integers, separated by colons, e.g. 1:2:3"); 25 | 26 | args = parser.parse_args() 27 | 28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ] 29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ] 30 | all_phones = silence_phones + nonsilence_phones 31 | 32 | print("") 33 | print("") 34 | print("") 35 | print(" ".join([str(x) for x in all_phones])) 36 | print("") 37 | 38 | # the pdf-classes are as follows: 39 | # pdf-class 0 is in a 1-frame sequence, the initial and final state. 40 | # pdf-class 1 is in a sequence with >=3 frames, the 'middle' states. (important that 41 | # it be numbered 1, which is the default list of pdf-classes used in 'cluster-phones'). 42 | # pdf-class 2 is the initial-state in a sequence with >= 2 frames. 43 | # pdf-class 3 is the final-state in a sequence with >= 2 frames. 44 | # state 0 is nonemitting in this topology. 45 | 46 | print(" 0 1 0.5 2 0.5 ") # initial nonemitting state. 47 | print(" 1 0 5 1.0 ") # 1-frame sequence. 48 | print(" 2 2 3 0.5 4 0.5 ") # 2 or more frames 49 | print(" 3 1 3 0.5 4 0.5 ") # 3 or more frames 50 | print(" 4 3 5 1.0 ") # 2 or more frames. 51 | print(" 5 ") # final nonemitting state 52 | 53 | print("") 54 | print("") 55 | 56 | -------------------------------------------------------------------------------- /steps/nnet3/chain/gen_topo3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Generate a topology file. This allows control of the number of states in the 6 | # non-silence HMMs, and in the silence HMMs. This is a modified version of 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we 8 | # believe should be useful in the 'chain' model. Note: right now it doesn't 9 | # have any real options, and it treats silence and nonsilence the same. The 10 | # intention is that you write different versions of this script, or add options, 11 | # if you experiment with it. 12 | 13 | from __future__ import print_function 14 | import argparse 15 | 16 | 17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py " 18 | " " 19 | "e.g.: steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n", 20 | epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage."); 21 | parser.add_argument("nonsilence_phones", type=str, 22 | help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9"); 23 | parser.add_argument("silence_phones", type=str, 24 | help="List of silence phones as integers, separated by colons, e.g. 1:2:3"); 25 | 26 | args = parser.parse_args() 27 | 28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ] 29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ] 30 | all_phones = silence_phones + nonsilence_phones 31 | 32 | print("") 33 | print("") 34 | print("") 35 | print(" ".join([str(x) for x in all_phones])) 36 | print("") 37 | print(" 0 0 0 0.5 1 0.5 ") 38 | print(" 1 ") 39 | print("") 40 | print("") 41 | 42 | -------------------------------------------------------------------------------- /steps/nnet3/chain/gen_topo4.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Generate a topology file. This allows control of the number of states in the 6 | # non-silence HMMs, and in the silence HMMs. This is a modified version of 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we 8 | # believe should be useful in the 'chain' model. Note: right now it doesn't 9 | # have any real options, and it treats silence and nonsilence the same. The 10 | # intention is that you write different versions of this script, or add options, 11 | # if you experiment with it. 12 | 13 | from __future__ import print_function 14 | import argparse 15 | 16 | 17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py " 18 | " " 19 | "e.g.: steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n", 20 | epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage."); 21 | parser.add_argument("nonsilence_phones", type=str, 22 | help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9"); 23 | parser.add_argument("silence_phones", type=str, 24 | help="List of silence phones as integers, separated by colons, e.g. 1:2:3"); 25 | 26 | args = parser.parse_args() 27 | 28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ] 29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ] 30 | all_phones = silence_phones + nonsilence_phones 31 | 32 | print("") 33 | print("") 34 | print("") 35 | print(" ".join([str(x) for x in all_phones])) 36 | print("") 37 | # state 0 is obligatory (occurs once) 38 | print(" 0 0 1 0.3333 2 0.3333 3 0.3333 ") 39 | # state 1 is used only when >2 frames 40 | print(" 1 1 1 0.5 2 0.5 ") 41 | # state 2 is used only when >=2 frames (and occurs once) 42 | print(" 2 2 3 1.0 ") 43 | print(" 3 ") # final nonemitting state 44 | print("") 45 | print("") 46 | 47 | -------------------------------------------------------------------------------- /steps/nnet3/chain/gen_topo5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Generate a topology file. This allows control of the number of states in the 6 | # non-silence HMMs, and in the silence HMMs. This is a modified version of 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we 8 | # believe should be useful in the 'chain' model. Note: right now it doesn't 9 | # have any real options, and it treats silence and nonsilence the same. The 10 | # intention is that you write different versions of this script, or add options, 11 | # if you experiment with it. 12 | 13 | from __future__ import print_function 14 | import argparse 15 | 16 | 17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py " 18 | " " 19 | "e.g.: steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n", 20 | epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage."); 21 | parser.add_argument("nonsilence_phones", type=str, 22 | help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9"); 23 | parser.add_argument("silence_phones", type=str, 24 | help="List of silence phones as integers, separated by colons, e.g. 1:2:3"); 25 | 26 | args = parser.parse_args() 27 | 28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ] 29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ] 30 | all_phones = silence_phones + nonsilence_phones 31 | 32 | print("") 33 | print("") 34 | print("") 35 | print(" ".join([str(x) for x in all_phones])) 36 | print("") 37 | # state 0 is nonemitting 38 | print(" 0 1 0.5 2 0.5 ") 39 | # state 1 is for when we traverse it in 1 state 40 | print(" 1 0 4 1.0 ") 41 | # state 2 is for when we traverse it in >1 state, for the first state. 42 | print(" 2 2 3 1.0 ") 43 | # state 3 is for the self-loop. Use pdf-class 1 here so that the default 44 | # phone-class clustering (which uses only pdf-class 1 by default) gets only 45 | # stats from longer phones. 46 | print(" 3 1 3 0.5 4 0.5 ") 47 | print(" 4 ") 48 | print("") 49 | print("") 50 | 51 | -------------------------------------------------------------------------------- /steps/nnet3/get_successful_models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | import re 5 | import os 6 | import argparse 7 | import sys 8 | import warnings 9 | import copy 10 | import glob 11 | 12 | 13 | if __name__ == "__main__": 14 | # we add compulsory arguments as named arguments for readability 15 | parser = argparse.ArgumentParser(description="Create a list of models suitable for averaging " 16 | "based on their train objf values.", 17 | epilog="See steps/nnet3/lstm/train.sh for example.") 18 | 19 | parser.add_argument("--difference-threshold", type=float, 20 | help="The threshold for discarding models, " 21 | "when objf of the model differs more than this value from the best model " 22 | "it is discarded.", 23 | default=1.0) 24 | 25 | parser.add_argument("num_models", type=int, 26 | help="Number of models.") 27 | 28 | parser.add_argument("logfile_pattern", type=str, 29 | help="Pattern for identifying the log-file names. " 30 | "It specifies the entire log file name, except for the job number, " 31 | "which is replaced with '%'. e.g. exp/nneet3/tdnn_sp/log/train.4.%.log") 32 | 33 | 34 | args = parser.parse_args() 35 | 36 | assert(args.num_models > 0) 37 | 38 | parse_regex = re.compile("LOG .* Overall average objective function for 'output' is ([0-9e.\-+]+) over ([0-9e.\-+]+) frames") 39 | loss = [] 40 | for i in range(args.num_models): 41 | model_num = i + 1 42 | logfile = re.sub('%', str(model_num), args.logfile_pattern) 43 | lines = open(logfile, 'r').readlines() 44 | this_loss = -100000 45 | for line_num in range(1, len(lines) + 1): 46 | # we search from the end as this would result in 47 | # lesser number of regex searches. Python regex is slow ! 48 | mat_obj = parse_regex.search(lines[-1*line_num]) 49 | if mat_obj is not None: 50 | this_loss = float(mat_obj.groups()[0]) 51 | break; 52 | loss.append(this_loss); 53 | max_index = loss.index(max(loss)) 54 | accepted_models = [] 55 | for i in range(args.num_models): 56 | if (loss[max_index] - loss[i]) <= args.difference_threshold: 57 | accepted_models.append(i+1) 58 | 59 | model_list = " ".join(map(lambda x: str(x), accepted_models)) 60 | print(model_list) 61 | 62 | if len(accepted_models) != args.num_models: 63 | print("WARNING: Only {0}/{1} of the models have been accepted for averaging, based on log files {2}.".format(len(accepted_models), args.num_models, args.logfile_pattern), file=sys.stderr) 64 | print(" Using models {0}".format(model_list), file=sys.stderr) 65 | -------------------------------------------------------------------------------- /steps/nnet3/nnet3_to_dot.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # script showing use of nnet3_to_dot.py 4 | # Copyright 2015 Johns Hopkins University (Author: Vijayaditya Peddinti). 5 | 6 | # Begin configuration section. 7 | component_attributes="name,type" 8 | node_prefixes="" 9 | info_bin=nnet3-am-info 10 | echo "$0 $@" # Print the command line for logging 11 | 12 | [ -f ./path.sh ] && . ./path.sh; # source the path. 13 | . parse_options.sh || exit 1; 14 | 15 | if [ $# != 3 ]; then 16 | echo "Usage: $0 [opts] " 17 | echo " e.g.: $0 exp/sdm1/nnet3/lstm_sp/0.mdl lstm.dot lstm.png" 18 | echo "" 19 | echo "Main options (for others, see top of script file)" 20 | echo " --component-attributes # attributes to be printed in nnet3 components" 21 | echo " --node-prefixes # list of prefixes. Nnet3 components/component-nodes with the same prefix" 22 | echo " # will be clustered together in the dot-graph" 23 | 24 | 25 | exit 1; 26 | fi 27 | 28 | model=$1 29 | dot_file=$2 30 | output_file=$3 31 | 32 | attr=${node_prefixes:+ --node-prefixes "$node_prefixes"} 33 | $info_bin $model | \ 34 | steps/nnet3/dot/nnet3_to_dot.py \ 35 | --component-attributes "$component_attributes" \ 36 | $attr $dot_file 37 | 38 | command -v dot >/dev/null 2>&1 || { echo >&2 "This script requires dot but it's not installed. Please compile $dot_file with dot"; exit 1; } 39 | dot -Tpdf $dot_file -o $output_file 40 | -------------------------------------------------------------------------------- /steps/online/nnet2/align.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012 Brno University of Technology (Author: Karel Vesely) 3 | # 2013-2014 Johns Hopkins University (Author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # Computes training alignments using DNN. This takes as input a directory 7 | # prepared as for online-nnet2 decoding (e.g. by 8 | # steps/online/nnet2/prepare_online_decoding.sh), and it computes the features 9 | # directly from the wav.scp instead of relying on features dumped on disk; 10 | # this avoids the hassle of having to dump suitably matched features. 11 | 12 | 13 | # Begin configuration section. 14 | nj=4 15 | cmd=run.pl 16 | # Begin configuration. 17 | scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" 18 | beam=10 19 | retry_beam=40 20 | iter=final 21 | use_gpu=no 22 | 23 | echo "$0 $@" # Print the command line for logging 24 | 25 | [ -f path.sh ] && . ./path.sh # source the path. 26 | . parse_options.sh || exit 1; 27 | 28 | if [ $# != 4 ]; then 29 | echo "Usage: $0 " 30 | echo "e.g.: $0 data/train data/lang exp/nnet4 exp/nnet4_ali" 31 | echo "main options (for others, see top of script file)" 32 | echo " --config # config containing options" 33 | echo " --nj # number of parallel jobs" 34 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 35 | exit 1; 36 | fi 37 | 38 | data=$1 39 | lang=$2 40 | srcdir=$3 41 | dir=$4 42 | 43 | oov=`cat $lang/oov.int` || exit 1; 44 | mkdir -p $dir/log 45 | echo $nj > $dir/num_jobs 46 | sdata=$data/split$nj 47 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; 48 | 49 | 50 | for f in $srcdir/tree $srcdir/${iter}.mdl $data/wav.scp $lang/L.fst \ 51 | $srcdir/conf/online_nnet2_decoding.conf; do 52 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 53 | done 54 | 55 | utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1; 56 | cp $srcdir/phones.txt $dir || exit 1; 57 | cp $srcdir/{tree,${iter}.mdl} $dir || exit 1; 58 | 59 | grep -v '^--endpoint' $srcdir/conf/online_nnet2_decoding.conf >$dir/feature.conf || exit 1; 60 | 61 | 62 | if [ -f $data/segments ]; then 63 | # note: in the feature extraction, because the program online2-wav-dump-features is sensitive to the 64 | # previous utterances within a speaker, we do the filtering after extracting the features. 65 | echo "$0 [info]: segments file exists: using that." 66 | feats="ark,s,cs:extract-segments scp:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt ark,s,cs:- ark:- |" 67 | else 68 | echo "$0 [info]: no segments file exists, using wav.scp." 69 | feats="ark,s,cs:online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt scp:$sdata/JOB/wav.scp ark:- |" 70 | fi 71 | 72 | echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir" 73 | 74 | tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|"; 75 | 76 | $cmd JOB=1:$nj $dir/log/align.JOB.log \ 77 | compile-train-graphs $dir/tree $srcdir/${iter}.mdl $lang/L.fst "$tra" ark:- \| \ 78 | nnet-align-compiled $scale_opts --use-gpu=$use_gpu --beam=$beam --retry-beam=$retry_beam \ 79 | $srcdir/${iter}.mdl ark:- "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; 80 | 81 | echo "$0: done aligning data." 82 | 83 | -------------------------------------------------------------------------------- /steps/online/nnet2/copy_data_dir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2013-2014 Johns Hopkins University (author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # Warning, this script is deprecated, please use utils/data/modify_speaker_info.sh 7 | 8 | # This script is as utils/copy_data_dir.sh in that it copies a data-dir, 9 | # but it supports the --utts-per-spk-max option. If nonzero, it modifies 10 | # the utt2spk and spk2utt files by splitting each speaker into multiple 11 | # versions, so that each speaker has no more than --utts-per-spk-max 12 | # utterances. 13 | 14 | # begin configuration section 15 | utts_per_spk_max=-1 16 | # end configuration section 17 | 18 | . utils/parse_options.sh 19 | 20 | if [ $# != 2 ]; then 21 | echo "Usage: " 22 | echo " $0 [options] " 23 | echo "e.g.:" 24 | echo " $0 --utts-per-spk-max 2 data/train data/train-max2" 25 | echo "Options" 26 | echo " --utts-per-spk-max # number of utterances per speaker maximum," 27 | echo " # default -1 (meaning no maximum). E.g. 2." 28 | exit 1; 29 | fi 30 | 31 | 32 | echo "$0: this script is deprecated, please use utils/data/modify_speaker_info.sh." 33 | 34 | export LC_ALL=C 35 | 36 | srcdir=$1 37 | destdir=$2 38 | 39 | if [ ! -f $srcdir/utt2spk ]; then 40 | echo "$0: no such file $srcdir/utt2spk" 41 | exit 1; 42 | fi 43 | 44 | set -e; 45 | set -o pipefail 46 | 47 | mkdir -p $destdir 48 | 49 | 50 | if [ "$utts_per_spk_max" != -1 ]; then 51 | # create spk2utt file with reduced number of utterances per speaker. 52 | awk -v max=$utts_per_spk_max '{ n=2; count=0; 53 | while(n<=NF) { 54 | int_max=int(max)+ (rand() < (max-int(max))?1:0); 55 | nmax=n+int_max; count++; printf("%s-%06x", $1, count); 56 | for (;n$destdir/spk2utt 58 | utils/spk2utt_to_utt2spk.pl <$destdir/spk2utt >$destdir/utt2spk 59 | 60 | if [ -f $srcdir/cmvn.scp ]; then 61 | # below, the first apply_map command outputs a cmvn.scp indexed by utt; 62 | # the second one outputs a cmvn.scp indexed by new speaker-id. 63 | utils/apply_map.pl -f 2 $srcdir/cmvn.scp <$srcdir/utt2spk | \ 64 | utils/apply_map.pl -f 1 $destdir/utt2spk | sort | uniq > $destdir/cmvn.scp 65 | echo "$0: mapping cmvn.scp, but you may want to recompute it if it's needed," 66 | echo " as it would probably change." 67 | fi 68 | if [ -f $srcdir/spk2gender ]; then 69 | utils/apply_map.pl -f 2 $srcdir/spk2gender <$srcdir/utt2spk | \ 70 | utils/apply_map.pl -f 1 $destdir/utt2spk | sort | uniq >$destdir/spk2gender 71 | fi 72 | else 73 | cp $srcdir/spk2utt $srcdir/utt2spk $destdir/ 74 | [ -f $srcdir/spk2gender ] && cp $srcdir/spk2gender $destdir/ 75 | [ -f $srcdir/cmvn.scp ] && cp $srcdir/cmvn.scp $destdir/ 76 | fi 77 | 78 | 79 | for f in feats.scp segments wav.scp reco2file_and_channel text stm glm ctm; do 80 | [ -f $srcdir/$f ] && cp $srcdir/$f $destdir/ 81 | done 82 | 83 | echo "$0: copied data from $srcdir to $destdir, with --utts-per-spk-max $utts_per_spk_max" 84 | opts= 85 | [ ! -f $srcdir/feats.scp ] && opts="--no-feats" 86 | [ ! -f $srcdir/text ] && opts="$opts --no-text" 87 | [ ! -f $srcdir/wav.scp ] && opts="$opts --no-wav" 88 | 89 | utils/validate_data_dir.sh $opts $destdir 90 | -------------------------------------------------------------------------------- /steps/online/nnet2/prepare_online_decoding_transfer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Johns Hopkins University (Author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # This is as prepare_online_decoding.sh, but for transfer learning-- the case where 7 | # you have an existing online-decoding directory where you have all the feature 8 | # stuff, that you don't want to change, but 9 | 10 | # Begin configuration. 11 | stage=0 # This allows restarting after partway, when something when wrong. 12 | cmd=run.pl 13 | iter=final 14 | # End configuration. 15 | 16 | echo "$0 $@" # Print the command line for logging 17 | 18 | [ -f path.sh ] && . ./path.sh; 19 | . parse_options.sh || exit 1; 20 | 21 | if [ $# -ne 4 ]; then 22 | echo "Usage: $0 [options] " 23 | echo "e.g.: $0 exp_other/nnet2_online/nnet_a_online data/lang exp/nnet2_online/nnet_a exp/nnet2_online/nnet_a_online" 24 | echo "main options (for others, see top of script file)" 25 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 26 | echo " --config # config containing options" 27 | echo " --stage # stage to do partial re-run from." 28 | exit 1; 29 | fi 30 | 31 | online_src=$1 32 | lang=$2 33 | nnet_src=$3 34 | dir=$4 35 | 36 | for f in $online_src/conf/online_nnet2_decoding.conf $nnet_src/final.mdl $nnet_src/tree $lang/words.txt; do 37 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 38 | done 39 | 40 | 41 | dir_as_given=$dir 42 | dir=$(readlink -f $dir) # Convert $dir to an absolute pathname, so that the 43 | # configuration files we write will contain absolute 44 | # pathnames. 45 | mkdir -p $dir/conf $dir/log 46 | 47 | utils/lang/check_phones_compatible.sh $lang/phones.txt $nnet_src/phones.txt || exit 1; 48 | cp $nnet_src/phones.txt $dir || exit 1; 49 | 50 | cp $nnet_src/tree $dir/ || exit 1; 51 | 52 | cp $nnet_src/$iter.mdl $dir/ || exit 1; 53 | 54 | 55 | # There are a bunch of files that we will need to copy from $online_src, because 56 | # we're aiming to have one self-contained directory that has everything in it. 57 | mkdir -p $dir/ivector_extractor 58 | cp -r $online_src/ivector_extractor/* $dir/ivector_extractor 59 | 60 | [ ! -d $online_src/conf ] && \ 61 | echo "Expected directory $online_src/conf to exist" && exit 1; 62 | 63 | for x in $online_src/conf/*conf; do 64 | # Replace directory name starting $online_src with those starting with $dir. 65 | # We actually replace any directory names ending in /ivector_extractor/ or /conf/ 66 | # with $dir/ivector_extractor/ or $dir/conf/ 67 | cat $x | perl -ape "s:=(.+)/(ivector_extractor|conf)/:=$dir/\$2/:;" > $dir/conf/$(basename $x) 68 | done 69 | 70 | 71 | # modify the silence-phones in the config; these are only used for the 72 | # endpointing code. 73 | cp $dir/conf/online_nnet2_decoding.conf{,.tmp} 74 | silphones=$(cat $lang/phones/silence.csl) || exit 1; 75 | cat $dir/conf/online_nnet2_decoding.conf.tmp | \ 76 | sed s/silence-phones=.\\+/silence-phones=$silphones/ > $dir/conf/online_nnet2_decoding.conf 77 | rm $dir/conf/online_nnet2_decoding.conf.tmp 78 | 79 | echo "$0: formatted neural net for online decoding in $dir_as_given" 80 | -------------------------------------------------------------------------------- /steps/paste_feats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Brno University of Technology (Author: Karel Vesely) 4 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) 5 | # Apache 2.0 6 | # This script appends the features in two or more data directories. 7 | 8 | # To be run from .. (one directory up from here) 9 | # see ../run.sh for example 10 | 11 | # Begin configuration section. 12 | cmd=run.pl 13 | nj=4 14 | length_tolerance=10 # length tolerance in frames (trim to shortest) 15 | compress=true 16 | # End configuration section. 17 | 18 | echo "$0 $@" # Print the command line for logging 19 | 20 | if [ -f path.sh ]; then . ./path.sh; fi 21 | . parse_options.sh || exit 1; 22 | 23 | if [ $# -lt 5 ]; then 24 | echo "usage: $0 [options] [] "; 25 | echo "e.g.: $0 data/train_mfcc data/train_bottleneck data/train_combined exp/append_mfcc_plp mfcc" 26 | echo "options: " 27 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 28 | exit 1; 29 | fi 30 | 31 | data_src_arr=(${@:1:$(($#-3))}) #array of source data-dirs 32 | data=${@: -3: 1} 33 | logdir=${@: -2: 1} 34 | ark_dir=${@: -1: 1} #last arg. 35 | 36 | data_src_first=${data_src_arr[0]} # get 1st src dir 37 | 38 | # make $ark_dir an absolute pathname. 39 | ark_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $ark_dir ${PWD}` 40 | 41 | for data_src in ${data_src_arr[@]}; do 42 | utils/split_data.sh $data_src $nj || exit 1; 43 | done 44 | 45 | mkdir -p $ark_dir $logdir 46 | 47 | mkdir -p $data 48 | cp $data_src_first/* $data/ 2>/dev/null # so we get the other files, such as utt2spk. 49 | rm $data/cmvn.scp 2>/dev/null 50 | rm $data/feats.scp 2>/dev/null 51 | 52 | # use "name" as part of name of the archive. 53 | name=`basename $data` 54 | 55 | # get list of source scp's for pasting 56 | data_src_args= 57 | for data_src in ${data_src_arr[@]}; do 58 | data_src_args="$data_src_args scp:$data_src/split$nj/JOB/feats.scp" 59 | done 60 | 61 | for n in $(seq $nj); do 62 | # the next command does nothing unless $ark_dir/storage/ exists, see 63 | # utils/create_data_link.pl for more info. 64 | utils/create_data_link.pl $ark_dir/pasted_$name.$n.ark 65 | done 66 | 67 | $cmd JOB=1:$nj $logdir/append.JOB.log \ 68 | paste-feats --length-tolerance=$length_tolerance $data_src_args ark:- \| \ 69 | copy-feats --compress=$compress ark:- \ 70 | ark,scp:$ark_dir/pasted_$name.JOB.ark,$ark_dir/pasted_$name.JOB.scp || exit 1; 71 | 72 | # concatenate the .scp files together. 73 | for ((n=1; n<=nj; n++)); do 74 | cat $ark_dir/pasted_$name.$n.scp >> $data/feats.scp || exit 1; 75 | done > $data/feats.scp || exit 1; 76 | 77 | 78 | nf=`cat $data/feats.scp | wc -l` 79 | nu=`cat $data/utt2spk | wc -l` 80 | if [ $nf -ne $nu ]; then 81 | echo "It seems not all of the feature files were successfully processed ($nf != $nu);" 82 | echo "consider using utils/fix_data_dir.sh $data" 83 | fi 84 | 85 | echo "Succeeded pasting features for $name into $data" 86 | -------------------------------------------------------------------------------- /steps/score_kaldi_compare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Nicolas Serrano 3 | # Apache 2.0 4 | 5 | [ -f ./path.sh ] && . ./path.sh 6 | 7 | # begin configuration section. 8 | cmd=run.pl 9 | replications=10000 10 | #end configuration section. 11 | 12 | echo "$0 $@" # Print the command line for logging 13 | [ -f ./path.sh ] && . ./path.sh 14 | . parse_options.sh || exit 1; 15 | 16 | if [ $# -ne 3 ]; then 17 | echo "Usage: local/score_compare.sh [--cmd (run.pl|queue.pl...)] " 18 | echo " Options:" 19 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 20 | echo " --replications # number of bootstrap evaluation to compute confidence." 21 | exit 1; 22 | fi 23 | 24 | dir1=$1 25 | dir2=$2 26 | dir_compare=$3 27 | 28 | mkdir -p $dir_compare/log 29 | 30 | for d in $dir1 $dir2; do 31 | for f in test_filt.txt best_wer; do 32 | [ ! -f $d/$f ] && echo "score_compare.sh: no such file $d/$f" && exit 1; 33 | done 34 | done 35 | 36 | 37 | best_wer_file1=$(awk '{print $NF}' $dir1/best_wer) 38 | best_transcript_file1=$(echo $best_wer_file1 | sed -e 's=.*/wer_==' | \ 39 | awk -v FS='_' -v dir=$dir1 '{print dir"/penalty_"$2"/"$1".txt"}') 40 | 41 | best_wer_file2=$(awk '{print $NF}' $dir2/best_wer) 42 | best_transcript_file2=$(echo $best_wer_file2 | sed -e 's=.*/wer_==' | \ 43 | awk -v FS='_' -v dir=$dir2 '{print dir"/penalty_"$2"/"$1".txt"}') 44 | 45 | $cmd $dir_compare/log/score_compare.log \ 46 | compute-wer-bootci --replications=$replications \ 47 | ark:$dir1/test_filt.txt ark:$best_transcript_file1 ark:$best_transcript_file2 \ 48 | '>' $dir_compare/wer_bootci_comparison || exit 1; 49 | 50 | exit 0; 51 | -------------------------------------------------------------------------------- /steps/search_index.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Guoguo Chen) 4 | # Apache 2.0 5 | 6 | # Begin configuration section. 7 | cmd=run.pl 8 | nbest=-1 9 | strict=true 10 | indices_dir= 11 | # End configuration section. 12 | 13 | echo "$0 $@" # Print the command line for logging 14 | 15 | [ -f ./path.sh ] && . ./path.sh; # source the path. 16 | . parse_options.sh || exit 1; 17 | 18 | if [ $# != 2 ]; then 19 | echo "Usage: steps/search_index.sh [options] " 20 | echo " e.g.: steps/search_index.sh data/kws exp/sgmm2_5a_mmi/decode/kws/" 21 | echo "" 22 | echo "main options (for others, see top of script file)" 23 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 24 | echo " --nbest # return n best results. (-1 means all)" 25 | echo " --indices-dir # where the indices should be stored, by default it will be in " 26 | exit 1; 27 | fi 28 | 29 | 30 | kwsdatadir=$1; 31 | kwsdir=$2; 32 | 33 | if [ -z $indices_dir ] ; then 34 | indices_dir=$kwsdir 35 | fi 36 | 37 | mkdir -p $kwsdir/log; 38 | nj=`cat $indices_dir/num_jobs` || exit 1; 39 | keywords=$kwsdatadir/keywords.fsts; 40 | 41 | for f in $indices_dir/index.1.gz $keywords; do 42 | [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1; 43 | done 44 | 45 | $cmd JOB=1:$nj $kwsdir/log/search.JOB.log \ 46 | kws-search --strict=$strict --negative-tolerance=-1 \ 47 | "ark:gzip -cdf $indices_dir/index.JOB.gz|" ark:$keywords \ 48 | "ark,t:|int2sym.pl -f 2 $kwsdatadir/utter_id > $kwsdir/result.JOB" || exit 1; 49 | 50 | exit 0; 51 | -------------------------------------------------------------------------------- /steps/select_feats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Johns Hopkins University (Author: Daniel Povey) 4 | # Apache 2.0 5 | # This script selects some specified dimensions of the features in the 6 | # input data directory. 7 | 8 | # To be run from .. (one directory up from here) 9 | # see ../run.sh for example 10 | 11 | # Begin configuration section. 12 | cmd=run.pl 13 | nj=4 14 | compress=true 15 | # End configuration section. 16 | 17 | echo "$0 $@" # Print the command line for logging 18 | 19 | if [ -f path.sh ]; then . ./path.sh; fi 20 | . parse_options.sh || exit 1; 21 | 22 | if [ $# -ne 5 ]; then 23 | echo "usage: $0 [options] "; 24 | echo "e.g.: $0 0-12 data/train_mfcc_pitch data/train_mfcconly exp/select_pitch_train mfcc" 25 | echo "options: " 26 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 27 | exit 1; 28 | fi 29 | 30 | selector="$1" 31 | data_in=$2 32 | data=$3 33 | logdir=$4 34 | ark_dir=$5 35 | 36 | # make $ark_dir an absolute pathname. 37 | ark_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $ark_dir ${PWD}` 38 | 39 | 40 | utils/split_data.sh $data_in $nj || exit 1; 41 | 42 | mkdir -p $ark_dir $logdir 43 | mkdir -p $data 44 | 45 | cp $data_in/* $data/ 2>/dev/null # so we get the other files, such as utt2spk. 46 | rm $data/cmvn.scp 2>/dev/null 47 | rm $data/feats.scp 2>/dev/null 48 | 49 | # use "name" as part of name of the archive. 50 | name=`basename $data` 51 | 52 | for j in $(seq $nj); do 53 | # the next command does nothing unless $mfccdir/storage/ exists, see 54 | # utils/create_data_link.pl for more info. 55 | utils/create_data_link.pl $ark_dir/selected_$name.$j.ark 56 | done 57 | 58 | $cmd JOB=1:$nj $logdir/append.JOB.log \ 59 | select-feats "$selector" scp:$data_in/split$nj/JOB/feats.scp ark:- \| \ 60 | copy-feats --compress=$compress ark:- \ 61 | ark,scp:$ark_dir/selected_$name.JOB.ark,$ark_dir/selected_$name.JOB.scp || exit 1; 62 | 63 | # concatenate the .scp files together. 64 | for ((n=1; n<=nj; n++)); do 65 | cat $ark_dir/selected_$name.$n.scp >> $data/feats.scp || exit 1; 66 | done > $data/feats.scp || exit 1; 67 | 68 | 69 | nf=`cat $data/feats.scp | wc -l` 70 | nu=`cat $data/utt2spk | wc -l` 71 | if [ $nf -ne $nu ]; then 72 | echo "It seems not all of the feature files were successfully processed ($nf != $nu);" 73 | exit 1; 74 | fi 75 | 76 | echo "Succeeded selecting features for $name into $data" 77 | -------------------------------------------------------------------------------- /steps/shift_feats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2016 Vimal Manohar 4 | # Apache 2.0 5 | 6 | # This script shifts the feats in the input data directory and creates a 7 | # new directory _fs with shifted feats. 8 | # If the shift is negative, the initial frames get truncated. 9 | # If the shift is positive, the first frame is repeated. 10 | # Usually applicable for sequence training 11 | 12 | # To be run from .. (one directory up from here) 13 | # see ../run.sh for example 14 | 15 | # Begin configuration section. 16 | cmd=run.pl 17 | nj=4 18 | compress=true 19 | # End configuration section. 20 | 21 | echo "$0 $@" # Print the command line for logging 22 | 23 | if [ -f path.sh ]; then . ./path.sh; fi 24 | . parse_options.sh || exit 1; 25 | 26 | if [ $# -ne 4 ]; then 27 | echo "usage: $0 [options] "; 28 | echo "e.g.: $0 -1 data/train exp/shift-1_train mfcc" 29 | echo "options: " 30 | echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." 31 | exit 1; 32 | fi 33 | 34 | num_frames_shift=$1 35 | data_in=$2 36 | logdir=$3 37 | featdir=$4 38 | 39 | utt_prefix="fs$num_frames_shift-" 40 | spk_prefix="fs$num_frames_shift-" 41 | 42 | # make $featdir an absolute pathname. 43 | featdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $featdir ${PWD}` 44 | 45 | utils/split_data.sh $data_in $nj || exit 1; 46 | 47 | data=${data_in}_fs$num_frames_shift 48 | 49 | mkdir -p $featdir $logdir 50 | mkdir -p $data 51 | 52 | utils/copy_data_dir.sh --utt-prefix $utt_prefix --spk-prefix $spk_prefix \ 53 | $data_in $data 54 | 55 | rm $data/feats.scp 2>/dev/null 56 | 57 | # use "name" as part of name of the archive. 58 | name=`basename $data` 59 | 60 | for j in $(seq $nj); do 61 | # the next command does nothing unless $mfccdir/storage/ exists, see 62 | # utils/create_data_link.pl for more info. 63 | utils/create_data_link.pl $featdir/raw_feats_$name.$j.ark 64 | done 65 | 66 | $cmd JOB=1:$nj $logdir/shift.JOB.log \ 67 | shift-feats --shift=$num_frames_shift \ 68 | scp:$data_in/split$nj/JOB/feats.scp ark:- \| \ 69 | copy-feats --compress=$compress ark:- \ 70 | ark,scp:$featdir/raw_feats_$name.JOB.ark,$featdir/raw_feats_$name.JOB.scp || exit 1; 71 | 72 | # concatenate the .scp files together. 73 | for ((n=1; n<=nj; n++)); do 74 | cat $featdir/raw_feats_$name.$n.scp 75 | done | awk -v nfs=$num_frames_shift '{print "fs"nfs"-"$0}'>$data/feats.scp || exit 1; 76 | 77 | nf=`cat $data/feats.scp | wc -l` 78 | nu=`cat $data/utt2spk | wc -l` 79 | if [ $nf -ne $nu ]; then 80 | echo "It seems not all of the feature files were successfully processed ($nf != $nu);" 81 | exit 1; 82 | fi 83 | 84 | echo "Succeeded shifting features for $name into $data" 85 | 86 | -------------------------------------------------------------------------------- /steps/tandem/decode_si.sh: -------------------------------------------------------------------------------- 1 | decode.sh -------------------------------------------------------------------------------- /steps/train_nnet.sh: -------------------------------------------------------------------------------- 1 | nnet/train.sh -------------------------------------------------------------------------------- /steps/word_align_lattices.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright Johns Hopkins University (Author: Daniel Povey) 2012 4 | # Apache 2.0. 5 | 6 | # Begin configuration section. 7 | silence_label=0 8 | cmd=run.pl 9 | # End configuration section. 10 | 11 | echo "$0 $@" # Print the command line for logging 12 | 13 | for x in `seq 2`; do 14 | [ "$1" == "--silence-label" ] && silence_label=$2 && shift 2; 15 | [ "$1" == "--cmd" ] && cmd="$2" && shift 2; 16 | done 17 | 18 | if [ $# != 3 ]; then 19 | echo "Word-align lattices (make the arcs sync up with words)" 20 | echo "" 21 | echo "Usage: $0 [options] " 22 | echo "options: [--cmd (run.pl|queue.pl [queue opts])] [--silence-label ]" 23 | exit 1; 24 | fi 25 | 26 | . ./path.sh || exit 1; 27 | 28 | lang=$1 29 | indir=$2 30 | outdir=$3 31 | 32 | mdl=`dirname $indir`/final.mdl 33 | wbfile=$lang/phones/word_boundary.int 34 | 35 | for f in $mdl $wbfile $indir/num_jobs; do 36 | [ ! -f $f ] && echo "word_align_lattices.sh: no such file $f" && exit 1; 37 | done 38 | 39 | mkdir -p $outdir/log 40 | 41 | 42 | cp $indir/num_jobs $outdir; 43 | nj=`cat $indir/num_jobs` 44 | 45 | $cmd JOB=1:$nj $outdir/log/align.JOB.log \ 46 | lattice-align-words --silence-label=$silence_label --test=true \ 47 | $wbfile $mdl "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c >$outdir/lat.JOB.gz" || exit 1; 48 | 49 | -------------------------------------------------------------------------------- /utils/add_disambig.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | # Adds some specified number of disambig symbols to a symbol table. 19 | # Adds these as #1, #2, etc. 20 | # If the --include-zero option is specified, includes an extra one 21 | # #0. 22 | 23 | $include_zero = 0; 24 | if($ARGV[0] eq "--include-zero") { 25 | $include_zero = 1; 26 | shift @ARGV; 27 | } 28 | 29 | if(@ARGV != 2) { 30 | die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt "; 31 | } 32 | 33 | 34 | $input = $ARGV[0]; 35 | $nsyms = $ARGV[1]; 36 | 37 | open(F, "<$input") || die "Opening file $input"; 38 | 39 | while() { 40 | @A = split(" ", $_); 41 | @A == 2 || die "Bad line $_"; 42 | $lastsym = $A[1]; 43 | print; 44 | } 45 | 46 | if(!defined($lastsym)){ 47 | die "Empty symbol file?"; 48 | } 49 | 50 | if($include_zero) { 51 | $lastsym++; 52 | print "#0 $lastsym\n"; 53 | } 54 | 55 | for($n = 1; $n <= $nsyms; $n++) { 56 | $y = $n + $lastsym; 57 | print "#$n $y\n"; 58 | } 59 | -------------------------------------------------------------------------------- /utils/analyze_segments.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Copyright 2015 GoVivace Inc. (Author: Nagendra Kumar Goel) 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Analyze a segments file and print important stats on it. 18 | 19 | $dur = $total = 0; 20 | $maxDur = 0; 21 | $minDur = 9999999999; 22 | $n = 0; 23 | while(<>){ 24 | chomp; 25 | @t = split(/\s+/); 26 | $dur = $t[3] - $t[2]; 27 | $total += $dur; 28 | if ($dur > $maxDur) { 29 | $maxSegId = $t[0]; 30 | $maxDur = $dur; 31 | } 32 | if ($dur < $minDur) { 33 | $minSegId = $t[0]; 34 | $minDur = $dur; 35 | } 36 | $n++; 37 | } 38 | $avg=$total/$n; 39 | $hrs = $total/3600; 40 | print "Total $hrs hours of data\n"; 41 | print "Average segment length $avg seconds\n"; 42 | print "Segment $maxSegId has length of $maxDur seconds\n"; 43 | print "Segment $minSegId has length of $minDur seconds\n"; 44 | -------------------------------------------------------------------------------- /utils/apply_map.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use warnings; #sed replacement for -w perl parameter 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) 4 | # Apache 2.0. 5 | 6 | # This program is a bit like ./sym2int.pl in that it applies a map 7 | # to things in a file, but it's a bit more general in that it doesn't 8 | # assume the things being mapped to are single tokens, they could 9 | # be sequences of tokens. See the usage message. 10 | 11 | 12 | if (@ARGV > 0 && $ARGV[0] eq "-f") { 13 | shift @ARGV; 14 | $field_spec = shift @ARGV; 15 | if ($field_spec =~ m/^\d+$/) { 16 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1; 17 | } 18 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) 19 | if ($1 ne "") { 20 | $field_begin = $1 - 1; # Change to zero-based indexing. 21 | } 22 | if ($2 ne "") { 23 | $field_end = $2 - 1; # Change to zero-based indexing. 24 | } 25 | } 26 | if (!defined $field_begin && !defined $field_end) { 27 | die "Bad argument to -f option: $field_spec"; 28 | } 29 | } 30 | 31 | # Mapping is obligatory 32 | $permissive = 0; 33 | if (@ARGV > 0 && $ARGV[0] eq '--permissive') { 34 | shift @ARGV; 35 | # Mapping is optional (missing key is printed to output) 36 | $permissive = 1; 37 | } 38 | 39 | if(@ARGV != 1) { 40 | print STDERR "Invalid usage: " . join(" ", @ARGV) . "\n"; 41 | print STDERR "Usage: apply_map.pl [options] map output\n" . 42 | "options: [-f ]\n" . 43 | "Applies the map 'map' to all input text, where each line of the map\n" . 44 | "is interpreted as a map from the first field to the list of the other fields\n" . 45 | "Note: can look like 4-5, or 4-, or 5-, or 1, it means the field\n" . 46 | "range in the input to apply the map to.\n" . 47 | "e.g.: echo A B | apply_map.pl a.txt\n" . 48 | "where a.txt is:\n" . 49 | "A a1 a2\n" . 50 | "B b\n" . 51 | "will produce:\n" . 52 | "a1 a2 b\n"; 53 | exit(1); 54 | } 55 | 56 | ($map) = @ARGV; 57 | open(M, "<$map") || die "Error opening map file $map: $!"; 58 | 59 | while () { 60 | @A = split(" ", $_); 61 | @A >= 1 || die "apply_map.pl: empty line."; 62 | $i = shift @A; 63 | $o = join(" ", @A); 64 | $map{$i} = $o; 65 | } 66 | 67 | while() { 68 | @A = split(" ", $_); 69 | for ($x = 0; $x < @A; $x++) { 70 | if ( (!defined $field_begin || $x >= $field_begin) 71 | && (!defined $field_end || $x <= $field_end)) { 72 | $a = $A[$x]; 73 | if (!defined $map{$a}) { 74 | if (!$permissive) { 75 | die "apply_map.pl: undefined key $a\n"; 76 | } else { 77 | print STDERR "apply_map.pl: warning! missing key $a\n"; 78 | } 79 | } else { 80 | $A[$x] = $map{$a}; 81 | } 82 | } 83 | } 84 | print join(" ", @A) . "\n"; 85 | } 86 | -------------------------------------------------------------------------------- /utils/best_wer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2010-2011 Microsoft Corporation 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # To be run from one directory above this script. 19 | 20 | perl -e 'while(<>){ 21 | s/\|(\d)/\| $1/g; s/(\d)\|/$1 \|/g; 22 | if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool. 23 | elsif (m: (Mean|Sum/Avg|)\s*\|\s*\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|: 24 | && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } } # sclite. 25 | if (defined $bestline){ print $bestline; } ' | \ 26 | awk 'BEGIN{ FS="%WER"; } { if(NF == 2) { print FS$2" "$1; } else { print $0; }}' | \ 27 | awk 'BEGIN{ FS="Sum/Avg"; } { if(NF == 2) { print $2" "$1; } else { print $0; }}' | \ 28 | awk '{ if($1!~/%WER/) { print "%WER "$9" "$0; } else { print $0; }}' | \ 29 | sed -e 's|\s\s*| |g' -e 's|\:$||' -e 's|\:\s*\|\s*$||' 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /utils/build_const_arpa_lm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Guoguo Chen 4 | # Apache 2.0 5 | 6 | # This script reads in an Arpa format language model, and converts it into the 7 | # ConstArpaLm format language model. 8 | 9 | # begin configuration section 10 | # end configuration section 11 | 12 | [ -f path.sh ] && . ./path.sh; 13 | 14 | . utils/parse_options.sh 15 | 16 | if [ $# != 3 ]; then 17 | echo "Usage: " 18 | echo " $0 [options] " 19 | echo "e.g.:" 20 | echo " $0 data/local/lm/3-gram.full.arpa.gz data/lang/ data/lang_test_tgmed" 21 | echo "Options" 22 | exit 1; 23 | fi 24 | 25 | export LC_ALL=C 26 | 27 | arpa_lm=$1 28 | old_lang=$2 29 | new_lang=$3 30 | 31 | mkdir -p $new_lang 32 | 33 | mkdir -p $new_lang 34 | cp -r $old_lang/* $new_lang 35 | 36 | unk=`cat $new_lang/oov.int` 37 | bos=`grep "" $new_lang/words.txt | awk '{print $2}'` 38 | eos=`grep "" $new_lang/words.txt | awk '{print $2}'` 39 | if [[ -z $bos || -z $eos ]]; then 40 | echo "$0: and symbols are not in $new_lang/words.txt" 41 | exit 1 42 | fi 43 | 44 | 45 | arpa-to-const-arpa --bos-symbol=$bos \ 46 | --eos-symbol=$eos --unk-symbol=$unk \ 47 | "gunzip -c $arpa_lm | utils/map_arpa_lm.pl $new_lang/words.txt|" $new_lang/G.carpa || exit 1; 48 | 49 | exit 0; 50 | -------------------------------------------------------------------------------- /utils/convert_ctm.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 4 | 5 | # This takes as standard input a ctm file that's "relative to the utterance", 6 | # i.e. times are measured relative to the beginning of the segments, and it 7 | # uses a "segments" file (format: 8 | # utterance-id recording-id start-time end-time 9 | # ) and a "reco2file_and_channel" file (format: 10 | # recording-id basename-of-file 11 | 12 | $skip_unknown=undef; 13 | if ( $ARGV[0] eq "--skip-unknown" ) { 14 | $skip_unknown=1; 15 | shift @ARGV; 16 | } 17 | 18 | if (@ARGV < 2 || @ARGV > 3) { 19 | print STDERR "Usage: convert_ctm.pl [] > real-ctm\n"; 20 | exit(1); 21 | } 22 | 23 | $segments = shift @ARGV; 24 | $reco2file_and_channel = shift @ARGV; 25 | 26 | open(S, "<$segments") || die "opening segments file $segments"; 27 | while() { 28 | @A = split(" ", $_); 29 | @A == 4 || die "Bad line in segments file: $_"; 30 | ($utt, $recording_id, $begin_time, $end_time) = @A; 31 | $utt2reco{$utt} = $recording_id; 32 | $begin{$utt} = $begin_time; 33 | $end{$utt} = $end_time; 34 | } 35 | close(S); 36 | open(R, "<$reco2file_and_channel") || die "open reco2file_and_channel file $reco2file_and_channel"; 37 | while() { 38 | @A = split(" ", $_); 39 | @A == 3 || die "Bad line in reco2file_and_channel file: $_"; 40 | ($recording_id, $file, $channel) = @A; 41 | $reco2file{$recording_id} = $file; 42 | $reco2channel{$recording_id} = $channel; 43 | } 44 | 45 | 46 | # Now process the ctm file, which is either the standard input or the third 47 | # command-line argument. 48 | $num_done = 0; 49 | while(<>) { 50 | @A= split(" ", $_); 51 | ( @A == 5 || @A == 6 ) || die "Unexpected ctm format: $_"; 52 | # lines look like: 53 | # 1 [ confidence ] 54 | ($utt, $one, $wbegin, $wlen, $w, $conf) = @A; 55 | $reco = $utt2reco{$utt}; 56 | if (!defined $reco) { 57 | next if defined $skip_unknown; 58 | die "Utterance-id $utt not defined in segments file $segments"; 59 | } 60 | $file = $reco2file{$reco}; 61 | $channel = $reco2channel{$reco}; 62 | if (!defined $file || !defined $channel) { 63 | die "Recording-id $reco not defined in reco2file_and_channel file $reco2file_and_channel"; 64 | } 65 | $b = $begin{$utt}; 66 | $e = $end{$utt}; 67 | $wbegin_r = $wbegin + $b; # Make it relative to beginning of the recording. 68 | $wbegin_r = sprintf("%.2f", $wbegin_r); 69 | $wlen = sprintf("%.2f", $wlen); 70 | if (defined $conf) { 71 | $line = "$file $channel $wbegin_r $wlen $w $conf\n"; 72 | } else { 73 | $line = "$file $channel $wbegin_r $wlen $w\n"; 74 | } 75 | if ($wbegin_r + $wlen > $e + 0.01) { 76 | print STDERR "Warning: word appears to be past end of recording; line is $line"; 77 | } 78 | print $line; # goes to stdout. 79 | $num_done++; 80 | } 81 | 82 | if ($num_done == 0) { exit 1; } else { exit 0; } 83 | 84 | __END__ 85 | 86 | # Test example [also test it without the 0.5's] 87 | echo utt reco 10.0 20.0 > segments 88 | echo reco file A > reco2file_and_channel 89 | echo utt 1 8.0 1.0 word 0.5 > ctm_in 90 | echo file A 18.00 1.00 word 0.5 > ctm_out 91 | utils/convert_ctm.pl segments reco2file_and_channel ctm_in | cmp - ctm_out || echo error 92 | rm segments reco2file_and_channel ctm_in ctm_out 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /utils/convert_slf_parallel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright Brno University of Technology (Author: Karel Vesely) 2014. Apache 2.0. 3 | 4 | # This script converts lattices to HTK format compatible with other toolkits. 5 | # We can choose to put words to nodes or arcs, as both is valid in the SLF format. 6 | 7 | # begin configuration section. 8 | cmd=run.pl 9 | dirname=lats-in-htk-slf 10 | parallel_opts="-tc 50" # We should limit disk stress 11 | word_to_node=false # Words in arcs or nodes? [default:arcs] 12 | #end configuration section. 13 | 14 | echo "$0 $@" 15 | 16 | [ -f ./path.sh ] && . ./path.sh 17 | . parse_options.sh || exit 1; 18 | 19 | if [ $# -ne 3 ]; then 20 | echo "Usage: $0 [options] " 21 | echo " Options:" 22 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 23 | echo " --word-to-link (true|false) # put word symbols on links or nodes." 24 | echo " --parallel-opts STR # parallelization options (def.: '-tc 50')." 25 | echo "e.g.:" 26 | echo "$0 data/dev data/lang exp/tri4a/decode_dev" 27 | exit 1; 28 | fi 29 | 30 | data=$1 31 | lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. 32 | dir=$3 33 | 34 | model=$(dirname $dir)/final.mdl # assume model one level up from decoding dir. 35 | 36 | for f in $lang/words.txt $lang/phones/align_lexicon.int $model $dir/lat.1.gz; do 37 | [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; 38 | done 39 | 40 | [ ! -d $dir/$dirname/log ] && mkdir -p $dir/$dirname 41 | 42 | echo "$0: Converting lattices into '$dir/$dirname'" 43 | 44 | # Words in arcs or nodes? [default:nodes] 45 | word_to_link_arg= 46 | $word_to_node && word_to_node_arg="--word-to-node" 47 | 48 | nj=$(cat $dir/num_jobs) 49 | 50 | # convert the lattices (individually, gzipped) 51 | $cmd $parallel_opts JOB=1:$nj $dir/$dirname/log/lat_convert.JOB.log \ 52 | mkdir -p $dir/$dirname/JOB/ '&&' \ 53 | lattice-align-words-lexicon --output-error-lats=true --output-if-empty=true \ 54 | $lang/phones/align_lexicon.int $model "ark:gunzip -c $dir/lat.JOB.gz |" ark,t:- \| \ 55 | utils/int2sym.pl -f 3 $lang/words.txt \| \ 56 | utils/convert_slf.pl $word_to_node_arg - $dir/$dirname/JOB/ || exit 1 57 | 58 | # make list of lattices 59 | find -L $PWD/$dir/$dirname -name *.lat.gz > $dir/$dirname/lat_htk.scp || exit 1 60 | 61 | # check number of lattices: 62 | nseg=$(cat $data/segments | wc -l) 63 | nlat_out=$(cat $dir/$dirname/lat_htk.scp | wc -l) 64 | echo "segments $nseg, saved-lattices $nlat_out" 65 | # 66 | [ $nseg -ne $nlat_out ] && echo "WARNING: missing $((nseg-nlat_out)) lattices for some segments!" \ 67 | && exit 1 68 | 69 | echo "success, converted lats to HTK : $PWD/$dir/$dirname/lat_htk.scp" 70 | exit 0 71 | 72 | -------------------------------------------------------------------------------- /utils/create_split_dir.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2013 Guoguo Chen 4 | # Apache 2.0. 5 | # 6 | # This script creates storage directories on different file systems, and creates 7 | # symbolic links to those directories. For example, a command 8 | # 9 | # utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage 10 | # 11 | # will mkdir -p all of those directories, and will create links 12 | # 13 | # egs/storage/1 -> /export/gpu-03/egs/storage 14 | # egs/storage/2 -> /export/gpu-03/egs/storage 15 | # ... 16 | # 17 | use strict; 18 | use warnings; 19 | use File::Spec; 20 | use Getopt::Long; 21 | 22 | my $Usage = < 28 | e.g.: utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage 29 | 30 | Allowed options: 31 | --suffix : Common suffix to (string, default = "") 32 | 33 | See also create_data_link.pl, which is intended to work with the resulting 34 | directory structure, and remove_data_links.sh 35 | EOU 36 | 37 | my $suffix=""; 38 | GetOptions('suffix=s' => \$suffix); 39 | 40 | if (@ARGV < 2) { 41 | die $Usage; 42 | } 43 | 44 | my $ans = 1; 45 | 46 | my $dir = pop(@ARGV); 47 | system("mkdir -p $dir 2>/dev/null"); 48 | 49 | my @all_actual_storage = (); 50 | foreach my $file (@ARGV) { 51 | push @all_actual_storage, File::Spec->rel2abs($file . "/" . $suffix); 52 | } 53 | 54 | my $index = 1; 55 | foreach my $actual_storage (@all_actual_storage) { 56 | my $pseudo_storage = "$dir/$index"; 57 | 58 | # If the symbolic link already exists, delete it. 59 | if (-l $pseudo_storage) { 60 | print STDERR "$0: link $pseudo_storage already exists, not overwriting.\n"; 61 | $index++; 62 | next; 63 | } 64 | 65 | # Create the destination directory and make the link. 66 | system("mkdir -p $actual_storage 2>/dev/null"); 67 | if ($? != 0) { 68 | print STDERR "$0: error creating directory $actual_storage\n"; 69 | exit(1); 70 | } 71 | { # create a README file for easier deletion. 72 | open(R, ">$actual_storage/README.txt"); 73 | my $storage_dir = File::Spec->rel2abs($dir); 74 | print R "# This directory is linked from $storage_dir, as part of Kaldi striped data\n"; 75 | print R "# The full list of directories where this data resides is:\n"; 76 | foreach my $d (@all_actual_storage) { 77 | print R "$d\n"; 78 | } 79 | close(R); 80 | } 81 | my $ret = symlink($actual_storage, $pseudo_storage); 82 | 83 | # Process the returned values 84 | $ans = $ans && $ret; 85 | if (! $ret) { 86 | print STDERR "Error linking $actual_storage to $pseudo_storage\n"; 87 | } 88 | 89 | $index++; 90 | } 91 | 92 | exit($ans == 1 ? 0 : 1); 93 | -------------------------------------------------------------------------------- /utils/data/combine_data.sh: -------------------------------------------------------------------------------- 1 | ../combine_data.sh -------------------------------------------------------------------------------- /utils/data/copy_data_dir.sh: -------------------------------------------------------------------------------- 1 | ../copy_data_dir.sh -------------------------------------------------------------------------------- /utils/data/fix_data_dir.sh: -------------------------------------------------------------------------------- 1 | ../fix_data_dir.sh -------------------------------------------------------------------------------- /utils/data/get_frame_shift.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2016 Johns Hopkins University (author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # This script takes as input a data directory, such as data/train/, preferably 7 | # with utt2dur file already existing (or the utt2dur file will be created if 8 | # not), and it attempts to work out the approximate frame shift by comparing the 9 | # utt2dur with the output of feat-to-len on the feats.scp. It prints it out. 10 | # if the shift is very close to, but above, 0.01 (the normal frame shift) it 11 | # rounds it down. 12 | 13 | . utils/parse_options.sh 14 | . ./path.sh 15 | 16 | if [ $# != 1 ]; then 17 | echo "Usage: $0 " 18 | echo "e.g.:" 19 | echo " $0 data/train" 20 | echo "This script prints the frame-shift (e.g. 0.01) to the standard out." 21 | echo "If does not contain utt2dur, this script may call utils/data/get_utt2dur.sh," 22 | echo "which will require write permission to " 23 | exit 1 24 | fi 25 | 26 | export LC_ALL=C 27 | 28 | dir=$1 29 | 30 | 31 | if [ ! -s $dir/utt2dur ]; then 32 | if [ ! -e $dir/wav.scp ] && [ ! -s $dir/segments ]; then 33 | echo "$0: neither $dir/wav.scp nor $dir/segments exist; assuming a frame shift of 0.01." 1>&2 34 | echo 0.01 35 | exit 0 36 | fi 37 | echo "$0: $dir/utt2dur does not exist: creating it" 1>&2 38 | utils/data/get_utt2dur.sh $dir 1>&2 39 | fi 40 | 41 | if [ ! -f $dir/feats.scp ]; then 42 | echo "$0: $dir/feats.scp does not exist" 1>&2 43 | exit 1 44 | fi 45 | 46 | temp=$(mktemp /tmp/tmp.XXXX) 47 | 48 | feat-to-len scp:$dir/feats.scp ark,t:- | head -n 10 > $temp 49 | 50 | if [ -z $temp ]; then 51 | echo "$0: error running feat-to-len" 1>&2 52 | exit 1 53 | fi 54 | 55 | head -n 10 $dir/utt2dur | paste - $temp | \ 56 | awk '{ dur += $2; frames += $4; } END { shift = dur / frames; if (shift > 0.01 && shift < 0.0102) shift = 0.01; print shift; }' || exit 1; 57 | 58 | rm $temp 59 | 60 | exit 0 61 | -------------------------------------------------------------------------------- /utils/data/get_num_frames.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script works out the approximate number of frames in a training directory. 4 | # This is sometimes needed by higher-level scripts 5 | 6 | 7 | if [ -f path.sh ]; then . ./path.sh; fi 8 | . parse_options.sh || exit 1; 9 | 10 | if [ $# -ne 1 ]; then 11 | ( 12 | echo "Usage: $0 " 13 | echo "Prints the number of frames of data in the data-dir" 14 | ) 1>&2 15 | fi 16 | 17 | data=$1 18 | 19 | if [ ! -f $data/utt2dur ]; then 20 | utils/data/get_utt2dur.sh $data 1>&2 || exit 1 21 | fi 22 | 23 | frame_shift=$(utils/data/get_frame_shift.sh $data) || exit 1 24 | 25 | awk -v s=$frame_shift '{n += $2} END{print int(n / s)}' <$data/utt2dur 26 | -------------------------------------------------------------------------------- /utils/data/get_segments_for_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script operates on a data directory, such as in data/train/, 4 | # and writes new segments to stdout. The file 'segments' maps from 5 | # utterance to time offsets into a recording, with the format: 6 | # 7 | # This script assumes utterance and recording ids are the same (i.e., that 8 | # wav.scp is indexed by utterance), and uses durations from 'utt2dur', 9 | # created if necessary by get_utt2dur.sh. 10 | 11 | . ./path.sh 12 | 13 | if [ $# != 1 ]; then 14 | echo "Usage: $0 [options] " 15 | echo "e.g.:" 16 | echo " $0 data/train > data/train/segments" 17 | exit 1 18 | fi 19 | 20 | data=$1 21 | 22 | if [ ! -f $data/utt2dur ]; then 23 | utils/data/get_utt2dur.sh $data 1>&2 || exit 1; 24 | fi 25 | 26 | # 0 27 | awk '{ print $1, $1, 0, $2 }' $data/utt2dur 28 | 29 | exit 0 30 | -------------------------------------------------------------------------------- /utils/data/perturb_data_dir_speed.sh: -------------------------------------------------------------------------------- 1 | ../perturb_data_dir_speed.sh -------------------------------------------------------------------------------- /utils/data/perturb_data_dir_speed_3way.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2016 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Apache 2.0 6 | 7 | # This script does the standard 3-way speed perturbing of 8 | # a data directory (it operates on the wav.scp). 9 | 10 | . utils/parse_options.sh 11 | 12 | if [ $# != 2 ]; then 13 | echo "Usage: perturb_data_dir_speed_3way.sh " 14 | echo "Applies standard 3-way speed perturbation using factors of 0.9, 1.0 and 1.1." 15 | echo "e.g.:" 16 | echo " $0 data/train data/train_sp" 17 | echo "Note: if /feats.scp already exists, this will refuse to run." 18 | exit 1 19 | fi 20 | 21 | srcdir=$1 22 | destdir=$2 23 | 24 | if [ ! -f $srcdir/wav.scp ]; then 25 | echo "$0: expected $srcdir/wav.scp to exist" 26 | exit 1 27 | fi 28 | 29 | if [ -f $destdir/feats.scp ]; then 30 | echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)" 31 | exit 1 32 | fi 33 | 34 | echo "$0: making sure the utt2dur file is present in ${srcdir}, because " 35 | echo "... obtaining it after speed-perturbing would be very slow, and" 36 | echo "... you might need it." 37 | utils/data/get_utt2dur.sh ${srcdir} 38 | 39 | utils/data/perturb_data_dir_speed.sh 0.9 ${srcdir} ${destdir}_speed0.9 || exit 1 40 | utils/data/perturb_data_dir_speed.sh 1.1 ${srcdir} ${destdir}_speed1.1 || exit 1 41 | utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1 42 | 43 | rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 44 | 45 | echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir" 46 | utils/validate_data_dir.sh --no-feats $destdir 47 | 48 | -------------------------------------------------------------------------------- /utils/data/perturb_data_dir_volume.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2016 Johns Hopkins University (author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # This script operates on a data directory, such as in data/train/, and modifies 7 | # the wav.scp to perturb the volume (typically useful for training data when 8 | # using systems that don't have cepstral mean normalization). 9 | 10 | . utils/parse_options.sh 11 | 12 | if [ $# != 1 ]; then 13 | echo "Usage: $0 " 14 | echo "e.g.:" 15 | echo " $0 data/train" 16 | exit 1 17 | fi 18 | 19 | export LC_ALL=C 20 | 21 | data=$1 22 | 23 | if [ ! -f $data/wav.scp ]; then 24 | echo "$0: Expected $data/wav.scp to exist" 25 | exit 1 26 | fi 27 | 28 | if grep -q "sox --vol" $data/wav.scp; then 29 | echo "$0: It looks like the data was already volume perturbed. Not doing anything." 30 | exit 0 31 | fi 32 | 33 | cat $data/wav.scp | python -c " 34 | import sys, os, subprocess, re, random 35 | random.seed(0) 36 | scale_low = 1.0/8 37 | scale_high = 2.0 38 | for line in sys.stdin.readlines(): 39 | if len(line.strip()) == 0: 40 | continue 41 | # Handle three cases of rxfilenames appropriately; 'input piped command', 'file offset' and 'filename' 42 | if line.strip()[-1] == '|': 43 | print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) 44 | elif re.search(':[0-9]+$', line.strip()) is not None: 45 | parts = line.split() 46 | print '{id} wav-copy {wav} - | sox --vol {vol} -t wav - -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high)) 47 | else: 48 | parts = line.split() 49 | print '{id} sox --vol {vol} -t wav {wav} -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high)) 50 | " > $data/wav.scp_scaled || exit 1; 51 | 52 | len1=$(cat $data/wav.scp | wc -l) 53 | len2=$(cat $data/wav.scp_scaled | wc -l) 54 | if [ "$len1" != "$len2" ]; then 55 | echo "$0: error detected: number of lines changed $len1 vs $len2"; 56 | exit 1 57 | fi 58 | 59 | mv $data/wav.scp_scaled $data/wav.scp 60 | 61 | if [ -f $data/feats.scp ]; then 62 | echo "$0: $data/feats.scp exists; moving it to $data/.backup/ as it wouldn't be valid any more." 63 | mkdir -p $data/.backup/ 64 | mv $data/feats.scp $data/.backup/ 65 | fi 66 | 67 | echo "$0: added volume perturbation to the data in $data" 68 | exit 0 69 | 70 | -------------------------------------------------------------------------------- /utils/data/remove_dup_utts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Remove excess utterances once they appear more than a specified 4 | # number of times with the same transcription, in a data set. 5 | # E.g. useful for removing excess "uh-huh" from training. 6 | 7 | if [ $# != 3 ]; then 8 | echo "Usage: remove_dup_utts.sh max-count " 9 | echo "e.g.: remove_dup_utts.sh 10 data/train data/train_nodup" 10 | echo "This script is used to filter out utterances that have from over-represented" 11 | echo "transcriptions (such as 'uh-huh'), by limiting the number of repetitions of" 12 | echo "any given word-sequence to a specified value. It's often used to get" 13 | echo "subsets for early stages of training." 14 | exit 1; 15 | fi 16 | 17 | maxcount=$1 18 | srcdir=$2 19 | destdir=$3 20 | mkdir -p $destdir 21 | 22 | [ ! -f $srcdir/text ] && echo "$0: Invalid input directory $srcdir" && exit 1; 23 | 24 | ! mkdir -p $destdir && echo "$0: could not create directory $destdir" && exit 1; 25 | 26 | ! [ "$maxcount" -gt 1 ] && echo "$0: invalid max-count '$maxcount'" && exit 1; 27 | 28 | cp $srcdir/* $destdir 29 | cat $srcdir/text | \ 30 | perl -e ' 31 | $maxcount = shift @ARGV; 32 | @all = (); 33 | $p1 = 103349; $p2 = 71147; $k = 0; 34 | sub random { # our own random number generator: predictable. 35 | $k = ($k + $p1) % $p2; 36 | return ($k / $p2); 37 | } 38 | while(<>) { 39 | push @all, $_; 40 | @A = split(" ", $_); 41 | shift @A; 42 | $text = join(" ", @A); 43 | $count{$text} ++; 44 | } 45 | foreach $line (@all) { 46 | @A = split(" ", $line); 47 | shift @A; 48 | $text = join(" ", @A); 49 | $n = $count{$text}; 50 | if ($n < $maxcount || random() < ($maxcount / $n)) { 51 | print $line; 52 | } 53 | }' $maxcount >$destdir/text 54 | 55 | echo "Reduced number of utterances from `cat $srcdir/text | wc -l` to `cat $destdir/text | wc -l`" 56 | 57 | echo "Using fix_data_dir.sh to reconcile the other files." 58 | utils/fix_data_dir.sh $destdir 59 | rm -r $destdir/.backup 60 | 61 | exit 0 62 | -------------------------------------------------------------------------------- /utils/data/split_data.sh: -------------------------------------------------------------------------------- 1 | ../split_data.sh -------------------------------------------------------------------------------- /utils/data/subset_data_dir.sh: -------------------------------------------------------------------------------- 1 | ../subset_data_dir.sh -------------------------------------------------------------------------------- /utils/data/validate_data_dir.sh: -------------------------------------------------------------------------------- 1 | ../validate_data_dir.sh -------------------------------------------------------------------------------- /utils/eps2disambig.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | # 2015 Guoguo Chen 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # This script replaces epsilon with #0 on the input side only, of the G.fst 19 | # acceptor. 20 | 21 | while(<>){ 22 | if (/\s+#0\s+/) { 23 | print STDERR "$0: ERROR: LM has word #0, " . 24 | "which is reserved as disambiguation symbol\n"; 25 | exit 1; 26 | } 27 | s:^(\d+\s+\d+\s+)\(\s+):$1#0$2:; 28 | print; 29 | } 30 | -------------------------------------------------------------------------------- /utils/filt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Apache 2.0 4 | 5 | import sys 6 | 7 | vocab=set() 8 | with open(sys.argv[1]) as vocabfile: 9 | for line in vocabfile: 10 | vocab.add(line.strip()) 11 | 12 | with open(sys.argv[2]) as textfile: 13 | for line in textfile: 14 | print " ".join(map(lambda word: word if word in vocab else '', line.strip().split())) 15 | -------------------------------------------------------------------------------- /utils/filter_scp.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2012 Microsoft Corporation 3 | # Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | 19 | # This script takes a list of utterance-ids or any file whose first field 20 | # of each line is an utterance-id, and filters an scp 21 | # file (or any file whose "n-th" field is an utterance id), printing 22 | # out only those lines whose "n-th" field is in id_list. The index of 23 | # the "n-th" field is 1, by default, but can be changed by using 24 | # the -f switch 25 | 26 | $exclude = 0; 27 | $field = 1; 28 | $shifted = 0; 29 | 30 | do { 31 | $shifted=0; 32 | if ($ARGV[0] eq "--exclude") { 33 | $exclude = 1; 34 | shift @ARGV; 35 | $shifted=1; 36 | } 37 | if ($ARGV[0] eq "-f") { 38 | $field = $ARGV[1]; 39 | shift @ARGV; shift @ARGV; 40 | $shifted=1 41 | } 42 | } while ($shifted); 43 | 44 | if(@ARGV < 1 || @ARGV > 2) { 45 | die "Usage: filter_scp.pl [--exclude] [-f ] id_list [in.scp] > out.scp \n" . 46 | "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" . 47 | "Note: only the first field of each line in id_list matters. With --exclude, prints\n" . 48 | "only the lines that were *not* in id_list.\n" . 49 | "Caution: previously, the -f option was interpreted as a zero-based field index.\n" . 50 | "If your older scripts (written before Oct 2014) stopped working and you used the\n" . 51 | "-f option, add 1 to the argument.\n" . 52 | "See also: utils/filter_scp.pl .\n"; 53 | } 54 | 55 | 56 | $idlist = shift @ARGV; 57 | open(F, "<$idlist") || die "Could not open id-list file $idlist"; 58 | while() { 59 | @A = split; 60 | @A>=1 || die "Invalid id-list file line $_"; 61 | $seen{$A[0]} = 1; 62 | } 63 | 64 | if ($field == 1) { # Treat this as special case, since it is common. 65 | while(<>) { 66 | $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field."; 67 | # $1 is what we filter on. 68 | if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) { 69 | print $_; 70 | } 71 | } 72 | } else { 73 | while(<>) { 74 | @A = split; 75 | @A > 0 || die "Invalid scp file line $_"; 76 | @A >= $field || die "Invalid scp file line $_"; 77 | if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) { 78 | print $_; 79 | } 80 | } 81 | } 82 | 83 | # tests: 84 | # the following should print "foo 1" 85 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo) 86 | # the following should print "bar 2". 87 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2) 88 | -------------------------------------------------------------------------------- /utils/find_arpa_oovs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | if ( @ARGV < 1 && @ARGV > 2) { 19 | die "Usage: find_arpa_oovs.pl words.txt [lm.arpa]\n"; 20 | # This program finds words in the arpa file that are not symbols 21 | # in the OpenFst-format symbol table words.txt. It prints them 22 | # on the standard output, one per line. 23 | } 24 | 25 | $symtab = shift @ARGV; 26 | open(S, "<$symtab") || die "Failed opening symbol table file $symtab\n"; 27 | while(){ 28 | @A = split(" ", $_); 29 | @A == 2 || die "Bad line in symbol table file: $_"; 30 | $seen{$A[0]} = 1; 31 | } 32 | 33 | $found_data=0; 34 | $curgram=0; 35 | while(<>) { # Find the \data\ marker. 36 | if(m:^\\data\\\s*$:) { $found_data=1; last; } 37 | } 38 | 39 | if ($found_data==0) { 40 | print STDERR "find_arpa_oovs.pl: found no \\data\\ marker in the ARPA input.\n"; 41 | exit(1); 42 | } 43 | 44 | while(<>) { 45 | if(m/^\\(\d+)\-grams:\s*$/) { 46 | $curgram = $1; 47 | if($curgram > 1) { 48 | last; # This is an optimization as we can get the vocab from the 1-grams 49 | } 50 | } elsif($curgram > 0) { 51 | @A = split(" ", $_); 52 | if(@A > 1) { 53 | shift @A; 54 | for($n=0;$n<$curgram;$n++) { 55 | $word = $A[$n]; 56 | if(!defined $word) { print STDERR "Unusual line $_ (line $.) in arpa file.\n"; } 57 | $in_arpa{$word} = 1; 58 | } 59 | } else { 60 | if(@A > 0 && $A[0] !~ m:\\end\\:) { 61 | print STDERR "Unusual line $_ (line $.) in arpa file\n"; 62 | } 63 | } 64 | } 65 | } 66 | 67 | foreach $w (keys %in_arpa) { 68 | if(!defined $seen{$w} && $w ne "" && $w ne "") { 69 | print "$w\n"; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /utils/fix_ctm.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | stmfile=$1 4 | ctmfile=$2 5 | 6 | segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u` 7 | segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u` 8 | 9 | segments_stm_count=`echo "$segments_stm" | wc -l ` 10 | segments_ctm_count=`echo "$segments_ctm" | wc -l ` 11 | 12 | #echo $segments_stm_count 13 | #echo $segments_ctm_count 14 | 15 | if [ "$segments_stm_count" -gt "$segments_ctm_count" ] ; then 16 | pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g") 17 | ( 18 | for elem in $pp ; do 19 | echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE" 20 | done 21 | ) >> $ctmfile 22 | echo "FIXED CTM FILE" 23 | exit 0 24 | elif [ "$segments_stm_count" -lt "$segments_ctm_count" ] ; then 25 | echo "Segment STM count: $segments_stm_count" 26 | echo "Segment CTM count: $segments_ctm_count" 27 | echo "FAILURE FIXING CTM FILE" 28 | exit 1 29 | else 30 | exit 0 31 | fi 32 | 33 | -------------------------------------------------------------------------------- /utils/format_lm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -u 2 | 3 | # Copyright 2012 Arnab Ghoshal 4 | # Copyright 2010-2011 Microsoft Corporation 5 | 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 15 | # MERCHANTABLITY OR NON-INFRINGEMENT. 16 | # See the Apache 2 License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | set -o errexit 20 | 21 | if [ $# -ne 4 ]; then 22 | printf "Usage: %s lang_dir LM lexicon out_dir\n" `basename $0` 23 | echo " Convert ARPA-format language models to FSTs."; 24 | exit 1; 25 | fi 26 | 27 | lang_dir=$1 28 | lm=$2 29 | lexicon=$3 30 | out_dir=$4 31 | mkdir -p $out_dir 32 | 33 | [ -f ./path.sh ] && . ./path.sh 34 | 35 | echo "Converting '$lm' to FST" 36 | 37 | for f in phones.txt words.txt L.fst L_disambig.fst phones/ oov.int oov.txt; do 38 | cp -r $lang_dir/$f $out_dir 39 | done 40 | 41 | lm_base=$(basename $lm '.gz') 42 | gunzip -c $lm \ 43 | | arpa2fst --disambig-symbol=#0 \ 44 | --read-symbol-table=$out_dir/words.txt - $out_dir/G.fst 45 | set +e 46 | fstisstochastic $out_dir/G.fst 47 | set -e 48 | # The output is like: 49 | # 9.14233e-05 -0.259833 50 | # we do expect the first of these 2 numbers to be close to zero (the second is 51 | # nonzero because the backoff weights make the states sum to >1). 52 | 53 | # Everything below is only for diagnostic. 54 | # Checking that G has no cycles with empty words on them (e.g. , ); 55 | # this might cause determinization failure of CLG. 56 | # #0 is treated as an empty word. 57 | mkdir -p $out_dir/tmpdir.g 58 | awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} 59 | END{print "0 0 #0 #0"; print "0";}' \ 60 | < "$lexicon" > $out_dir/tmpdir.g/select_empty.fst.txt 61 | 62 | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \ 63 | $out_dir/tmpdir.g/select_empty.fst.txt \ 64 | | fstarcsort --sort_type=olabel \ 65 | | fstcompose - $out_dir/G.fst > $out_dir/tmpdir.g/empty_words.fst 66 | 67 | fstinfo $out_dir/tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' \ 68 | && echo "Language model has cycles with empty words" && exit 1 69 | 70 | rm -r $out_dir/tmpdir.g 71 | 72 | 73 | echo "Succeeded in formatting LM: '$lm'" 74 | -------------------------------------------------------------------------------- /utils/gen_topo.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Generate a topology file. This allows control of the number of states in the 6 | # non-silence HMMs, and in the silence HMMs. 7 | 8 | if (@ARGV != 4) { 9 | print STDERR "Usage: utils/gen_topo.pl \n"; 10 | print STDERR "e.g.: utils/gen_topo.pl 3 5 4:5:6:7:8:9:10 1:2:3\n"; 11 | exit (1); 12 | } 13 | 14 | ($num_nonsil_states, $num_sil_states, $nonsil_phones, $sil_phones) = @ARGV; 15 | 16 | ( $num_nonsil_states >= 1 && $num_nonsil_states <= 100 ) || 17 | die "Unexpected number of nonsilence-model states $num_nonsil_states\n"; 18 | (( $num_sil_states == 1 || $num_sil_states >= 3) && $num_sil_states <= 100 ) || 19 | die "Unexpected number of silence-model states $num_sil_states\n"; 20 | 21 | $nonsil_phones =~ s/:/ /g; 22 | $sil_phones =~ s/:/ /g; 23 | $nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n"; 24 | $sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n"; 25 | 26 | print "\n"; 27 | print "\n"; 28 | print "\n"; 29 | print "$nonsil_phones\n"; 30 | print "\n"; 31 | for ($state = 0; $state < $num_nonsil_states; $state++) { 32 | $statep1 = $state+1; 33 | print " $state $state $state 0.75 $statep1 0.25 \n"; 34 | } 35 | print " $num_nonsil_states \n"; # non-emitting final state. 36 | print "\n"; 37 | # Now silence phones. They have a different topology-- apart from the first and 38 | # last states, it's fully connected, as long as you have >= 3 states. 39 | 40 | if ($num_sil_states > 1) { 41 | $transp = 1.0 / ($num_sil_states-1); 42 | print "\n"; 43 | print "\n"; 44 | print "$sil_phones\n"; 45 | print "\n"; 46 | print " 0 0 "; 47 | for ($nextstate = 0; $nextstate < $num_sil_states-1; $nextstate++) { # Transitions to all but last 48 | # emitting state. 49 | print " $nextstate $transp "; 50 | } 51 | print "\n"; 52 | for ($state = 1; $state < $num_sil_states-1; $state++) { # the central states all have transitions to 53 | # themselves and to the last emitting state. 54 | print " $state $state "; 55 | for ($nextstate = 1; $nextstate < $num_sil_states; $nextstate++) { 56 | print " $nextstate $transp "; 57 | } 58 | print "\n"; 59 | } 60 | # Final emitting state (non-skippable). 61 | $state = $num_sil_states-1; 62 | print " $state $state $state 0.75 $num_sil_states 0.25 \n"; 63 | # Final nonemitting state: 64 | print " $num_sil_states \n"; 65 | print "\n"; 66 | } else { 67 | print "\n"; 68 | print "\n"; 69 | print "$sil_phones\n"; 70 | print "\n"; 71 | print " 0 0 "; 72 | print " 0 0.75 "; 73 | print " 1 0.25 "; 74 | print "\n"; 75 | print " $num_nonsil_states \n"; # non-emitting final state. 76 | print "\n"; 77 | } 78 | 79 | print "\n"; 80 | -------------------------------------------------------------------------------- /utils/int2sym.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) 3 | # Apache 2.0. 4 | 5 | undef $field_begin; 6 | undef $field_end; 7 | 8 | 9 | if ($ARGV[0] eq "-f") { 10 | shift @ARGV; 11 | $field_spec = shift @ARGV; 12 | if ($field_spec =~ m/^\d+$/) { 13 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1; 14 | } 15 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) 16 | if ($1 ne "") { 17 | $field_begin = $1 - 1; # Change to zero-based indexing. 18 | } 19 | if ($2 ne "") { 20 | $field_end = $2 - 1; # Change to zero-based indexing. 21 | } 22 | } 23 | if (!defined $field_begin && !defined $field_end) { 24 | die "Bad argument to -f option: $field_spec"; 25 | } 26 | } 27 | $symtab = shift @ARGV; 28 | if(!defined $symtab) { 29 | print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" . 30 | "options: [-f (|-)]\n" . 31 | "e.g.: -f 2, or -f 3-4\n"; 32 | exit(1); 33 | } 34 | 35 | open(F, "<$symtab") || die "Error opening symbol table file $symtab"; 36 | while() { 37 | @A = split(" ", $_); 38 | @A == 2 || die "bad line in symbol table file: $_"; 39 | $int2sym{$A[1]} = $A[0]; 40 | } 41 | 42 | sub int2sym { 43 | my $a = shift @_; 44 | my $pos = shift @_; 45 | if($a !~ m:^\d+$:) { # not all digits.. 46 | $pos1 = $pos+1; # make it one-based. 47 | die "int2sym.pl: found noninteger token $a [in position $pos1]\n"; 48 | } 49 | $s = $int2sym{$a}; 50 | if(!defined ($s)) { 51 | die "int2sym.pl: integer $a not in symbol table $symtab."; 52 | } 53 | return $s; 54 | } 55 | 56 | $error = 0; 57 | while (<>) { 58 | @A = split(" ", $_); 59 | for ($pos = 0; $pos <= $#A; $pos++) { 60 | $a = $A[$pos]; 61 | if ( (!defined $field_begin || $pos >= $field_begin) 62 | && (!defined $field_end || $pos <= $field_end)) { 63 | $a = int2sym($a, $pos); 64 | } 65 | print $a . " "; 66 | } 67 | print "\n"; 68 | } 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /utils/lang/add_lex_disambig.pl: -------------------------------------------------------------------------------- 1 | ../add_lex_disambig.pl -------------------------------------------------------------------------------- /utils/lang/check_g_properties.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use IPC::Open2; 4 | 5 | if (@ARGV != 1) { 6 | print "Usage: $0 [options] \n"; 7 | print "e.g.: $0 data/lang\n"; 8 | exit(1); 9 | } 10 | 11 | $lang = shift @ARGV; 12 | 13 | # This script checks that G.fst in the lang.fst directory is OK with respect 14 | # to certain expected properties, and returns nonzero exit status if a problem was 15 | # detected. It is called from validate_lang.pl. 16 | # This only checks the properties of G that relate to disambiguation symbols, 17 | # epsilons and forbidden symbols and . 18 | 19 | if (! -e "$lang/G.fst") { 20 | print "$0: error: $lang/G.fst does not exist\n"; 21 | exit(1); 22 | } 23 | 24 | open(W, "<$lang/words.txt") || die "opening $lang/words.txt"; 25 | $hash_zero = -1; 26 | while () { 27 | @A = split(" ", $_); 28 | ($sym, $int) = @A; 29 | if ($sym eq "" || $sym eq "") { $is_forbidden{$int} = 1; } 30 | if ($sym eq "#0") { $hash_zero = $int; } 31 | } 32 | 33 | if (-e "$lang/phones/wdisambig_words.int") { 34 | open(F, "<$lang/phones/wdisambig_words.int") || die "opening $lang/phones/wdisambig_words.int"; 35 | while () { 36 | chop; 37 | $is_disambig{$_} = 1; 38 | } 39 | } else { 40 | $is_disambig{$hash_zero} = 1; 41 | } 42 | 43 | $input_cmd = ". ./path.sh; fstprint $lang/G.fst|"; 44 | open(G, $input_cmd) || die "running command $input_cmd"; 45 | 46 | $info_cmd = ". ./path.sh; fstcompile | fstinfo "; 47 | open2(O, I, "$info_cmd") || die "running command $info_cmd"; 48 | 49 | $has_epsilons = 0; 50 | 51 | while () { 52 | @A = split(" ", $_); 53 | if (@A >= 4) { 54 | if ($is_forbidden{$A[2]} || $is_forbidden{$A[3]}) { 55 | chop; 56 | print "$0: validating $lang: error: line $_ in G.fst contains forbidden symbol or \n"; 57 | exit(1); 58 | } elsif ($is_disambig{$A[2]}) { 59 | print I $_; 60 | if ($A[3] != 0) { 61 | chop; 62 | print "$0: validating $lang: error: line $_ in G.fst has disambig on input but no epsilon on output\n"; 63 | exit(1); 64 | } 65 | } elsif ($A[2] == 0) { 66 | print I $_; 67 | $has_epsilons = 1; 68 | } elsif ($A[2] != $A[3]) { 69 | chop; 70 | print "$0: validating $lang: error: line $_ in G.fst has inputs and outputs different but input is not disambig symbol.\n"; 71 | exit(1); 72 | } 73 | } 74 | } 75 | 76 | close(I); # tell 'fstcompile | fstinfo' pipeline that its input is done. 77 | while () { 78 | if (m/cyclic\s+y/) { 79 | print "$0: validating $lang: error: G.fst has cycles containing only disambig symbols and epsilons. Would cause determinization failure\n"; 80 | exit(1); 81 | } 82 | } 83 | 84 | if ($has_epsilons) { 85 | print "$0: warning: validating $lang: G.fst has epsilon-input arcs. We don't expect these in most setups.\n"; 86 | } 87 | 88 | print "--> $0 successfully validated $lang/G.fst\n"; 89 | exit(0); 90 | -------------------------------------------------------------------------------- /utils/lang/check_phones_compatible.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Hang Lyu 3 | 4 | # Licensed udner the Apache License, Version 2.0 (the "Lincense"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OF IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # This script exits with status zero if the phone symbols tables are the same 18 | # except for possible differences in disambiguation symbols (meaning that all 19 | # symbols except those beginning with a # are mapped to the same values). 20 | # Otherwise it prints a warning and exits with status 1. 21 | # For the sake of compatibility with other scripts that did not write the 22 | # phones.txt to model directories, this script exits silently with status 0 23 | # if one of the phone symbol tables does not exist. 24 | # For the sake of compatibility with other scripts that did not write the 25 | # phones.txt to model directories, this script exits silently with status 0 26 | # if one of the phone symbol tables does not exist. 27 | 28 | . utils/parse_options.sh || exit 1; 29 | 30 | if [ $# -ne 2 ]; then 31 | echo "Usage: utils/lang/check_phones_compatible.sh " 32 | echo "e.g.: utils/lang/check_phones_compatible.sh data/lang/phones.txt exp/tri3/phones.txt" 33 | exit 1; 34 | fi 35 | 36 | table_first=$1 37 | table_second=$2 38 | 39 | # check the files exist or not 40 | if [ ! -f $table_first ]; then 41 | if [ ! -f $table_second ]; then 42 | echo "$0: Error! Both of the two phones-symbol tables are absent." 43 | echo "Please check your command" 44 | exit 1; 45 | else 46 | #The phones-symbol-table1 is absent. The model directory maybe created by old script. 47 | #For back compatibility, this script exits silently with status 0. 48 | exit 0; 49 | fi 50 | elif [ ! -f $table_second ]; then 51 | #The phones-symbol-table2 is absent. The model directory maybe created by old script. 52 | #For back compatibility, this script exits silently with status 0. 53 | exit 0; 54 | fi 55 | 56 | #Check the two tables are same or not (except for possible difference in disambiguation symbols). 57 | if ! cmp -s <(grep -v "^#" $table_first) <(grep -v "^#" $table_second); then 58 | echo "$0: phone symbol tables $table_first and $table_second are not compatible." 59 | exit 1; 60 | fi 61 | 62 | exit 0; 63 | -------------------------------------------------------------------------------- /utils/lang/prepare_lang.sh: -------------------------------------------------------------------------------- 1 | ../prepare_lang.sh -------------------------------------------------------------------------------- /utils/lang/validate_lang.pl: -------------------------------------------------------------------------------- 1 | ../validate_lang.pl -------------------------------------------------------------------------------- /utils/ln.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use File::Spec; 3 | 4 | if ( @ARGV < 2 ) { 5 | print STDERR "usage: ln.pl input1 input2 dest-dir\n" . 6 | "This script does a soft link of input1, input2, etc." . 7 | "to dest-dir, using relative links where possible\n" . 8 | "Note: input-n and dest-dir may both be absolute pathnames,\n" . 9 | "or relative pathnames, relative to the current directlory.\n"; 10 | exit(1); 11 | } 12 | 13 | $dir = pop @ARGV; 14 | if ( ! -d $dir ) { 15 | print STDERR "ln.pl: last argument must be a directory ($dir is not a directory)\n"; 16 | exit(1); 17 | } 18 | 19 | $ans = 1; # true. 20 | 21 | $absdir = File::Spec->rel2abs($dir); # Get $dir as abs path. 22 | defined $absdir || die "No such directory $dir"; 23 | foreach $file (@ARGV) { 24 | $absfile = File::Spec->rel2abs($file); # Get $file as abs path. 25 | defined $absfile || die "No such file or directory: $file"; 26 | @absdir_split = split("/", $absdir); 27 | @absfile_split = split("/", $absfile); 28 | 29 | $newfile = $absdir . "/" . $absfile_split[$#absfile_split]; # we'll use this 30 | # as the destination in the link command. 31 | $num_removed = 0; 32 | while (@absdir_split > 0 && $absdir_split[0] eq $absfile_split[0]) { 33 | shift @absdir_split; 34 | shift @absfile_split; 35 | $num_removed++; 36 | } 37 | if (-l $newfile) { # newfile is already a link -> safe to delete it. 38 | unlink($newfile); # "unlink" just means delete. 39 | } 40 | if ($num_removed == 0) { # will use absolute pathnames. 41 | $oldfile = "/" . join("/", @absfile_split); 42 | $ret = symlink($oldfile, $newfile); 43 | } else { 44 | $num_dots = @absdir_split; 45 | $oldfile = join("/", @absfile_split); 46 | for ($n = 0; $n < $num_dots; $n++) { 47 | $oldfile = "../" . $oldfile; 48 | } 49 | $ret = symlink($oldfile, $newfile); 50 | } 51 | $ans = $ans && $ret; 52 | if (! $ret) { 53 | print STDERR "Error linking $oldfile to $newfile\n"; 54 | } 55 | } 56 | 57 | exit ($ans == 1 ? 0 : 1); 58 | 59 | -------------------------------------------------------------------------------- /utils/make_unigram_grammar.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # This script is used in discriminative training. 18 | # This script makes a simple unigram-loop version of G.fst 19 | # using a unigram grammar estimated from some training transcripts. 20 | # This is for MMI training. 21 | # We don't have any silences in G.fst; these are supplied by the 22 | # optional silences in the lexicon. 23 | 24 | # Note: the symbols in the transcripts become the input and output 25 | # symbols of G.txt; these can be numeric or not. 26 | 27 | if(@ARGV != 0) { 28 | die "Usage: make_unigram_grammar.pl < text-transcripts > G.txt" 29 | } 30 | 31 | $totcount = 0; 32 | $nl = 0; 33 | while (<>) { 34 | @A = split(" ", $_); 35 | foreach $a (@A) { 36 | $count{$a}++; 37 | $totcount++; 38 | } 39 | $nl++; 40 | $totcount++; # Treat end-of-sentence as a symbol for purposes of 41 | # $totcount, so the grammar is properly stochastic. This doesn't 42 | # become , it just becomes the final-prob. 43 | } 44 | 45 | foreach $a (keys %count) { 46 | $prob = $count{$a} / $totcount; 47 | $cost = -log($prob); # Negated natural-log probs. 48 | print "0\t0\t$a\t$a\t$cost\n"; 49 | } 50 | # Zero final-cost. 51 | $final_prob = $nl / $totcount; 52 | $final_cost = -log($final_prob); 53 | print "0\t$final_cost\n"; 54 | 55 | -------------------------------------------------------------------------------- /utils/nnet/gen_dct_mat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Brno University of Technology (author: Karel Vesely) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # ./gen_dct_mat.py 19 | # script generates matrix with DCT transform, which is sparse 20 | # and takes into account that data-layout is along frequency axis, 21 | # while DCT is done along temporal axis. 22 | 23 | from math import * 24 | import sys 25 | 26 | 27 | from optparse import OptionParser 28 | 29 | parser = OptionParser() 30 | parser.add_option('--fea-dim', dest='dim', help='feature dimension') 31 | parser.add_option('--splice', dest='splice', help='applied splice value') 32 | parser.add_option('--dct-basis', dest='dct_basis', help='number of DCT basis') 33 | (options, args) = parser.parse_args() 34 | 35 | if(options.dim == None): 36 | parser.print_help() 37 | sys.exit(1) 38 | 39 | dim=int(options.dim) 40 | splice=int(options.splice) 41 | dct_basis=int(options.dct_basis) 42 | 43 | timeContext=2*splice+1 44 | 45 | 46 | #generate the DCT matrix 47 | M_PI = 3.1415926535897932384626433832795 48 | M_SQRT2 = 1.4142135623730950488016887 49 | 50 | 51 | #generate sparse DCT matrix 52 | print '[' 53 | for k in range(dct_basis): 54 | for m in range(dim): 55 | for n in range(timeContext): 56 | if(n==0): 57 | print m*'0 ', 58 | else: 59 | print (dim-1)*'0 ', 60 | print str(sqrt(2.0/timeContext)*cos(M_PI/timeContext*k*(n+0.5))), 61 | if(n==timeContext-1): 62 | print (dim-m-1)*'0 ', 63 | print 64 | print 65 | 66 | print ']' 67 | 68 | -------------------------------------------------------------------------------- /utils/nnet/gen_hamm_mat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Brno University of Technology (author: Karel Vesely) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # ./gen_hamm_mat.py 19 | # script generates diagonal matrix with hamming window values 20 | 21 | from math import * 22 | import sys 23 | 24 | 25 | from optparse import OptionParser 26 | 27 | parser = OptionParser() 28 | parser.add_option('--fea-dim', dest='dim', help='feature dimension') 29 | parser.add_option('--splice', dest='splice', help='applied splice value') 30 | (options, args) = parser.parse_args() 31 | 32 | if(options.dim == None): 33 | parser.print_help() 34 | sys.exit(1) 35 | 36 | dim=int(options.dim) 37 | splice=int(options.splice) 38 | 39 | 40 | #generate the diagonal matrix with hammings 41 | M_2PI = 6.283185307179586476925286766559005 42 | 43 | dim_mat=(2*splice+1)*dim 44 | timeContext=2*splice+1 45 | print '[' 46 | for row in range(dim_mat): 47 | for col in range(dim_mat): 48 | if col!=row: 49 | print '0', 50 | else: 51 | i=int(row/dim) 52 | print str(0.54 - 0.46*cos((M_2PI * i) / (timeContext-1))), 53 | print 54 | 55 | print ']' 56 | 57 | 58 | -------------------------------------------------------------------------------- /utils/nnet/gen_splice.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Brno University of Technology (author: Karel Vesely) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # ./gen_splice.py 19 | # generates Component 20 | 21 | from math import * 22 | import sys 23 | 24 | 25 | from optparse import OptionParser 26 | 27 | parser = OptionParser() 28 | parser.add_option('--fea-dim', dest='dim_in', help='feature dimension') 29 | parser.add_option('--splice', dest='splice', help='number of frames to concatenate with the central frame') 30 | parser.add_option('--splice-step', dest='splice_step', help='splicing step (frames dont need to be consecutive, --splice 3 --splice-step 2 will select offsets: -6 -4 -2 0 2 4 6)', default='1' ) 31 | (options, args) = parser.parse_args() 32 | 33 | if(options.dim_in == None): 34 | parser.print_help() 35 | sys.exit(1) 36 | 37 | dim_in=int(options.dim_in) 38 | splice=int(options.splice) 39 | splice_step=int(options.splice_step) 40 | 41 | dim_out=(2*splice+1)*dim_in 42 | 43 | print '', dim_out, dim_in 44 | print '[', 45 | 46 | splice_vec = range(-splice*splice_step, splice*splice_step+1, splice_step) 47 | for idx in range(len(splice_vec)): 48 | print splice_vec[idx], 49 | 50 | print ']' 51 | 52 | -------------------------------------------------------------------------------- /utils/prepare_online_nnet_dist_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Johns Hopkins University (Author: Vijayaditya Peddinti) 4 | # Guoguo Chen 5 | # Apache 2.0 6 | # Script to prepare the distribution from the online-nnet build 7 | 8 | other_files= #other files to be included in the build 9 | other_dirs= 10 | conf_files="ivector_extractor.conf mfcc.conf online_cmvn.conf online_nnet2_decoding.conf splice.conf" 11 | ivec_extractor_files="final.dubm final.ie final.mat global_cmvn.stats online_cmvn.conf splice_opts" 12 | 13 | echo "$0 $@" # Print the command line for logging 14 | [ -f path.sh ] && . ./path.sh; 15 | . parse_options.sh || exit 1; 16 | 17 | if [ $# -ne 3 ]; then 18 | echo "Usage: $0 " 19 | echo "e.g.: $0 data/lang exp/nnet2_online/nnet_ms_a_online tedlium.tgz" 20 | exit 1; 21 | fi 22 | 23 | lang=$1 24 | modeldir=$2 25 | tgzfile=$3 26 | 27 | for f in $lang/phones.txt $other_files; do 28 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 29 | done 30 | 31 | build_files= 32 | for d in $modeldir/conf $modeldir/ivector_extractor; do 33 | [ ! -d $d ] && echo "$0: no such directory $d" && exit 1; 34 | done 35 | 36 | for f in $ivec_extractor_files; do 37 | f=$modeldir/ivector_extractor/$f 38 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 39 | build_files="$build_files $f" 40 | done 41 | 42 | # Makes a copy of the original config files, as we will change the absolute path 43 | # to relative. 44 | rm -rf $modeldir/conf_abs_path 45 | mkdir -p $modeldir/conf_abs_path 46 | cp -r $modeldir/conf/* $modeldir/conf_abs_path 47 | 48 | for f in $conf_files; do 49 | [ ! -f $modeldir/conf/$f ] && \ 50 | echo "$0: no such file $modeldir/conf/$f" && exit 1; 51 | # Changes absolute path to relative path. The path entries in the config file 52 | # are generated by scripts and it is safe to assume that they have structure: 53 | # variable=path 54 | cat $modeldir/conf_abs_path/$f | perl -e ' 55 | use File::Spec; 56 | while() { 57 | chomp; 58 | @col = split("=", $_); 59 | if (@col == 2 && (-f $col[1])) { 60 | $col[1] = File::Spec->abs2rel($col[1]); 61 | print "$col[0]=$col[1]\n"; 62 | } else { 63 | print "$_\n"; 64 | } 65 | } 66 | ' > $modeldir/conf/$f 67 | build_files="$build_files $modeldir/conf/$f" 68 | done 69 | 70 | tar -hczvf $tgzfile $lang $build_files $other_files $other_dirs \ 71 | $modeldir/final.mdl $modeldir/tree >/dev/null 72 | 73 | # Changes back to absolute path. 74 | rm -rf $modeldir/conf 75 | mv $modeldir/conf_abs_path $modeldir/conf 76 | -------------------------------------------------------------------------------- /utils/reduce_data_dir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # koried, 10/29/2012 4 | 5 | # Reduce a data set based on a list of turn-ids 6 | 7 | if [ $# != 3 ]; then 8 | echo "usage: $0 srcdir turnlist destdir" 9 | exit 1; 10 | fi 11 | 12 | srcdir=$1 13 | reclist=$2 14 | destdir=$3 15 | 16 | if [ ! -f $srcdir/utt2spk ]; then 17 | echo "$0: no such file $srcdir/utt2spk" 18 | exit 1; 19 | fi 20 | 21 | function do_filtering { 22 | # assumes the utt2spk and spk2utt files already exist. 23 | [ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp 24 | [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp 25 | [ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text 26 | [ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender 27 | [ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp 28 | if [ -f $srcdir/segments ]; then 29 | utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments 30 | awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings. 31 | # The next line would override the command above for wav.scp, which would be incorrect. 32 | [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp 33 | [ -f $srcdir/reco2file_and_channel ] && \ 34 | utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel 35 | 36 | # Filter the STM file for proper sclite scoring (this will also remove the comments lines) 37 | [ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm 38 | rm $destdir/reco 39 | fi 40 | srcutts=`cat $srcdir/utt2spk | wc -l` 41 | destutts=`cat $destdir/utt2spk | wc -l` 42 | echo "Reduced #utt from $srcutts to $destutts" 43 | } 44 | 45 | mkdir -p $destdir 46 | 47 | # filter the utt2spk based on the set of recordings 48 | utils/filter_scp.pl $reclist < $srcdir/utt2spk > $destdir/utt2spk 49 | 50 | utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt 51 | do_filtering; 52 | 53 | -------------------------------------------------------------------------------- /utils/reduce_data_dir_by_reclist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # koried, 10/29/2012 4 | 5 | # Reduce a data set based on a list of recordings 6 | 7 | if [ $# != 3 ]; then 8 | echo "usage: $0 srcdir reclist destdir" 9 | exit 1; 10 | fi 11 | 12 | srcdir=$1 13 | reclist=$2 14 | destdir=$3 15 | 16 | if [ ! -f $srcdir/utt2spk ]; then 17 | echo "$0: no such file $srcdir/utt2spk" 18 | exit 1; 19 | fi 20 | 21 | function do_filtering { 22 | # assumes the utt2spk and spk2utt files already exist. 23 | [ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp 24 | [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp 25 | [ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text 26 | [ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender 27 | [ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp 28 | if [ -f $srcdir/segments ]; then 29 | utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments 30 | awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings. 31 | # The next line would override the command above for wav.scp, which would be incorrect. 32 | [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp 33 | [ -f $srcdir/reco2file_and_channel ] && \ 34 | utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel 35 | [ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm 36 | rm $destdir/reco 37 | fi 38 | srcutts=`cat $srcdir/utt2spk | wc -l` 39 | destutts=`cat $destdir/utt2spk | wc -l` 40 | echo "Reduced #utt from $srcutts to $destutts" 41 | } 42 | 43 | mkdir -p $destdir 44 | 45 | # filter the utt2spk based on the set of recordings 46 | rm -f $destdir/utt2spk 47 | for i in `cat $reclist`; do 48 | cat $srcdir/utt2spk | grep ^$i >> $destdir/utt2spk 49 | done 50 | 51 | utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt 52 | do_filtering; 53 | 54 | -------------------------------------------------------------------------------- /utils/remove_data_links.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This program searches within a directory for soft links that 4 | # appear to be created by 'create_data_link.pl' to a 'storage/' subdirectory, 5 | # and it removes both the soft links and the things they point to. 6 | # for instance, if you have a soft link 7 | # foo/egs/1.1.egs -> storage/2/1.1.egs 8 | # it will remove both foo/egs/storage/2/1.1.egs, and foo/egs/1.1.egs. 9 | 10 | ret=0 11 | 12 | dry_run=false 13 | 14 | if [ "$1" == "--dry-run" ]; then 15 | dry_run=true 16 | shift 17 | fi 18 | 19 | if [ $# == 0 ]; then 20 | echo "Usage: $0 [--dry-run] " 21 | echo "e.g.: $0 exp/nnet4a/egs/" 22 | echo " Removes from any subdirectories of the command-line arguments, soft links that " 23 | echo " appear to have been created by utils/create_data_link.pl, as well as the things" 24 | echo " that those soft links point to. Will typically be called on a directory prior" 25 | echo " to 'rm -r' on that directory, to ensure that data that was distributed on other" 26 | echo " volumes also gets deleted." 27 | echo " With --dry-run, just prints what it would do." 28 | fi 29 | 30 | for dir in $*; do 31 | if [ ! -d $dir ]; then 32 | echo "$0: not a directory: $dir" 33 | ret=1 34 | else 35 | for subdir in $(find $dir -type d); do 36 | if [ -d $subdir/storage ]; then 37 | for x in $(ls $subdir); do 38 | f=$subdir/$x 39 | if [ -L $f ] && [[ $(readlink $f) == storage/* ]]; then 40 | target=$subdir/$(readlink $f) 41 | if $dry_run; then 42 | echo rm $f $target 43 | else 44 | rm $f $target 45 | fi 46 | fi 47 | done 48 | fi 49 | done 50 | fi 51 | done 52 | 53 | exit $ret 54 | -------------------------------------------------------------------------------- /utils/remove_oovs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # This script removes lines that contain these OOVs on either the 18 | # third or fourth fields of the line. It is intended to remove arcs 19 | # with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in). 20 | 21 | if ( @ARGV < 1 && @ARGV > 2) { 22 | die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n"; 23 | } 24 | 25 | $unklist = shift @ARGV; 26 | open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n"; 27 | while(){ 28 | @A = split(" ", $_); 29 | @A == 1 || die "Bad line in unknown-symbol list: $_"; 30 | $unk{$A[0]} = 1; 31 | } 32 | 33 | $num_removed = 0; 34 | while(<>){ 35 | @A = split(" ", $_); 36 | if(defined $unk{$A[2]} || defined $unk{$A[3]}) { 37 | $num_removed++; 38 | } else { 39 | print; 40 | } 41 | } 42 | print STDERR "remove_oovs.pl: removed $num_removed lines.\n"; 43 | 44 | -------------------------------------------------------------------------------- /utils/rnnlm_compute_scores.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Compute scores from RNNLM. This script takes a directory 4 | # $dir (e.g. dir=local/rnnlm/rnnlm.voc30.hl30 ), 5 | # where it expects the files: 6 | # rnnlm wordlist.rnn unk.probs, 7 | # and also an input file location where it can get the sentences to score, and 8 | # an output file location to put the scores (negated logprobs) for each 9 | # sentence. This script uses the Kaldi-style "archive" format, so the input and 10 | # output files will have a first field that corresponds to some kind of 11 | # utterance-id or, in practice, utterance-id-1, utterance-id-2, etc., for the 12 | # N-best list. 13 | # 14 | # Here, "wordlist.rnn" is the set of words, like a vocabulary, 15 | # that the RNN was trained on (note, it won't include or ), 16 | # plus which is a kind of class where we put low-frequency 17 | # words; unk.probs gives the probs for words given this class, and it 18 | # has, on each line, "word prob". 19 | 20 | rnnlm_ver=rnnlm-0.3e 21 | ensure_normalized_probs=false # if true then we add the neccesary options to 22 | # normalize the probabilities of RNNLM 23 | # e.g. when using faster-rnnlm in the nce mode 24 | 25 | . ./path.sh || exit 1; 26 | . utils/parse_options.sh 27 | 28 | rnnlm=$KALDI_ROOT/tools/$rnnlm_ver/rnnlm 29 | 30 | [ ! -f $rnnlm ] && echo No such program $rnnlm && exit 1; 31 | 32 | if [ $# != 4 ]; then 33 | echo "Usage: rnnlm_compute_scores.sh " 34 | exit 1; 35 | fi 36 | 37 | dir=$1 38 | tempdir=$2 39 | text_in=$3 40 | scores_out=$4 41 | 42 | for x in rnnlm wordlist.rnn unk.probs; do 43 | if [ ! -f $dir/$x ]; then 44 | echo "rnnlm_compute_scores.sh: expected file $dir/$x to exist." 45 | exit 1; 46 | fi 47 | done 48 | 49 | mkdir -p $tempdir 50 | cat $text_in | awk '{for (x=2;x<=NF;x++) {printf("%s ", $x)} printf("\n");}' >$tempdir/text 51 | cat $text_in | awk '{print $1}' > $tempdir/ids # e.g. utterance ids. 52 | cat $tempdir/text | awk -v voc=$dir/wordlist.rnn -v unk=$dir/unk.probs \ 53 | -v logprobs=$tempdir/loglikes.oov \ 54 | 'BEGIN{ while((getline0) { invoc[$1]=1; } while ((getline0){ unkprob[$1]=$2;} } 55 | { logprob=0; 56 | if (NF==0) { printf ""; logprob = log(1.0e-07); 57 | print "Warning: empty sequence." | "cat 1>&2"; } 58 | for (x=1;x<=NF;x++) { w=$x; 59 | if (invoc[w]) { printf("%s ",w); } else { 60 | printf(" "); 61 | if (unkprob[w] != 0) { logprob += log(unkprob[w]); } 62 | else { print "Warning: unknown word ", w | "cat 1>&2"; logprob += log(1.0e-07); }}} 63 | printf("\n"); print logprob > logprobs } ' > $tempdir/text.nounk 64 | 65 | # OK, now we compute the scores on the text with OOVs replaced 66 | # with 67 | 68 | if [ $rnnlm_ver == "faster-rnnlm" ]; then 69 | extra_options= 70 | if [ "$ensure_normalized_probs" = true ]; then 71 | extra_options="--nce-accurate-test 1" 72 | fi 73 | $rnnlm $extra_options -independent -rnnlm $dir/rnnlm -test $tempdir/text.nounk -nbest -debug 0 | \ 74 | awk '{print $1*log(10);}' > $tempdir/loglikes.rnn 75 | else 76 | # add the utterance_id as required by Mikolove's rnnlm 77 | paste $tempdir/ids $tempdir/text.nounk > $tempdir/id_text.nounk 78 | 79 | $rnnlm -independent -rnnlm $dir/rnnlm -test $tempdir/id_text.nounk -nbest -debug 0 | \ 80 | awk '{print $1*log(10);}' > $tempdir/loglikes.rnn 81 | fi 82 | 83 | [ `cat $tempdir/loglikes.rnn | wc -l` -ne `cat $tempdir/loglikes.oov | wc -l` ] && \ 84 | echo "rnnlm rescoring failed" && exit 1; 85 | 86 | paste $tempdir/loglikes.rnn $tempdir/loglikes.oov | awk '{print -($1+$2);}' >$tempdir/scores 87 | 88 | # scores out, with utterance-ids. 89 | paste $tempdir/ids $tempdir/scores > $scores_out 90 | 91 | -------------------------------------------------------------------------------- /utils/s2eps.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # This script replaces and with (on both input and output sides), 18 | # for the G.fst acceptor. 19 | 20 | while(<>){ 21 | @A = split(" ", $_); 22 | if ( @A >= 4 ) { 23 | if ($A[2] eq "" || $A[2] eq "") { $A[2] = ""; } 24 | if ($A[3] eq "" || $A[3] eq "") { $A[3] = ""; } 25 | } 26 | print join("\t", @A) . "\n"; 27 | } 28 | -------------------------------------------------------------------------------- /utils/scoring/wer_report.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2015 Johns Hopkins University (author: Jan Trmal ) 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | # This script reads per-utt table generated for example during scoring 19 | # and outpus the WER similar to the format the compute-wer utility 20 | # or the utils/best_wer.pl produces 21 | # i.e. from table containing lines in this format 22 | # SUM raw 23344 243230 176178 46771 9975 20281 77027 16463 23 | # produces something output like this 24 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] 25 | # NB: if the STDIN stream will contain more of the SUM raw entries, 26 | # the best one will be found and printed 27 | # 28 | # If the script is called with parameters, it uses them pro provide 29 | # a description of the output 30 | # i.e. 31 | # cat per-spk-report | utils/scoring/wer_report.pl Full set 32 | # the following output will be produced 33 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] Full set 34 | 35 | 36 | while () { 37 | if ( m:SUM\s+raw:) { 38 | @F = split; 39 | if ((!defined $wer) || ($wer > $F[8])) { 40 | $corr=$F[4]; 41 | $sub=$F[5]; 42 | $ins=$F[6]; 43 | $del=$F[7]; 44 | $wer=$F[8]; 45 | $words=$F[3]; 46 | } 47 | } 48 | } 49 | 50 | if (defined $wer) { 51 | $wer_str = sprintf("%.2f", (100.0 * $wer) / $words); 52 | print "%WER $wer_str [ $wer / $words, $ins ins, $del del, $sub sub ]"; 53 | print " " . join(" ", @ARGV) if @ARGV > 0; 54 | print "\n"; 55 | } 56 | -------------------------------------------------------------------------------- /utils/show_lattice.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | format=pdf # pdf svg 4 | mode=save # display save 5 | lm_scale=0.0 6 | acoustic_scale=0.0 7 | #end of config 8 | 9 | . utils/parse_options.sh 10 | 11 | if [ $# != 3 ]; then 12 | echo "usage: $0 [--mode display|save] [--format pdf|svg] " 13 | echo "e.g.: $0 utt-0001 \"test/lat.*.gz\" tri1/graph/words.txt" 14 | exit 1; 15 | fi 16 | 17 | . path.sh 18 | 19 | uttid=$1 20 | lat=$2 21 | words=$3 22 | 23 | tmpdir=$(mktemp -d /tmp/kaldi.XXXX); # trap "rm -r $tmpdir" EXIT # cleanup 24 | 25 | gunzip -c $lat | lattice-to-fst --lm-scale=$lm_scale --acoustic-scale=$acoustic_scale ark:- "scp,p:echo $uttid $tmpdir/$uttid.fst|" || exit 1; 26 | ! [ -s $tmpdir/$uttid.fst ] && \ 27 | echo "Failed to extract lattice for utterance $uttid (not present?)" && exit 1; 28 | fstdraw --portrait=true --osymbols=$words $tmpdir/$uttid.fst | dot -T${format} > $tmpdir/$uttid.${format} 29 | 30 | if [ "$(uname)" == "Darwin" ]; then 31 | doc_open=open 32 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 33 | doc_open=xdg-open 34 | elif [ $mode == "display" ] ; then 35 | echo "Can not automaticaly open file on your operating system" 36 | mode=save 37 | fi 38 | 39 | [ $mode == "display" ] && $doc_open $tmpdir/$uttid.${format} 40 | [[ $mode == "display" && $? -ne 0 ]] && echo "Failed to open ${format} format." && mode=save 41 | [ $mode == "save" ] && echo "Saving to $uttid.${format}" && cp $tmpdir/$uttid.${format} . 42 | 43 | exit 0 44 | -------------------------------------------------------------------------------- /utils/shuffle_list.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2013 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | 19 | if ($ARGV[0] eq "--srand") { 20 | $n = $ARGV[1]; 21 | $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\""; 22 | srand($ARGV[1]); 23 | shift; 24 | shift; 25 | } else { 26 | srand(0); # Gives inconsistent behavior if we don't seed. 27 | } 28 | 29 | if (@ARGV > 1 || $ARGV[0] =~ m/^-.+/) { # >1 args, or an option we 30 | # don't understand. 31 | print "Usage: shuffle_list.pl [--srand N] [input file] > output\n"; 32 | print "randomizes the order of lines of input.\n"; 33 | exit(1); 34 | } 35 | 36 | @lines; 37 | while (<>) { 38 | push @lines, [ (rand(), $_)] ; 39 | } 40 | 41 | @lines = sort { $a->[0] cmp $b->[0] } @lines; 42 | foreach $l (@lines) { 43 | print $l->[1]; 44 | } 45 | -------------------------------------------------------------------------------- /utils/spk2utt_to_utt2spk.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | while(<>){ 19 | @A = split(" ", $_); 20 | @A > 1 || die "Invalid line in spk2utt file: $_"; 21 | $s = shift @A; 22 | foreach $u ( @A ) { 23 | print "$u $s\n"; 24 | } 25 | } 26 | 27 | 28 | -------------------------------------------------------------------------------- /utils/subset_scp.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use warnings; #sed replacement for -w perl parameter 3 | # Copyright 2010-2011 Microsoft Corporation 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # This program selects a subset of N elements in the scp. 19 | 20 | # By default, it selects them evenly from throughout the scp, in order to avoid 21 | # selecting too many from the same speaker. It prints them on the standard 22 | # output. 23 | # With the option --first, it just selects the N first utterances. 24 | # With the option --last, it just selects the N last utterances. 25 | 26 | # Last modified by JHU & HKUST @2013 27 | 28 | 29 | $quiet = 0; 30 | $first = 0; 31 | $last = 0; 32 | 33 | if (@ARGV > 0 && $ARGV[0] eq "--quiet") { 34 | shift; 35 | $quiet = 1; 36 | } 37 | if (@ARGV > 0 && $ARGV[0] eq "--first") { 38 | shift; 39 | $first = 1; 40 | } 41 | if (@ARGV > 0 && $ARGV[0] eq "--last") { 42 | shift; 43 | $last = 1; 44 | } 45 | 46 | if(@ARGV < 2 ) { 47 | die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" . 48 | " --quiet causes it to not die if N < num lines in scp.\n" . 49 | " --first and --last make it equivalent to head or tail.\n" . 50 | "See also: filter_scp.pl\n"; 51 | } 52 | 53 | $N = shift @ARGV; 54 | if($N == 0) { 55 | die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\""; 56 | } 57 | $inscp = shift @ARGV; 58 | open(I, "<$inscp") || die "Opening input scp file $inscp"; 59 | 60 | @F = (); 61 | while() { 62 | push @F, $_; 63 | } 64 | $numlines = @F; 65 | if($N > $numlines) { 66 | if ($quiet) { 67 | $N = $numlines; 68 | } else { 69 | die "You requested from subset_scp.pl more elements than available: $N > $numlines"; 70 | } 71 | } 72 | 73 | sub select_n { 74 | my ($start,$end,$num_needed) = @_; 75 | my $diff = $end - $start; 76 | if ($num_needed > $diff) { 77 | die "select_n: code error"; 78 | } 79 | if ($diff == 1 ) { 80 | if ($num_needed > 0) { 81 | print $F[$start]; 82 | } 83 | } else { 84 | my $halfdiff = int($diff/2); 85 | my $halfneeded = int($num_needed/2); 86 | select_n($start, $start+$halfdiff, $halfneeded); 87 | select_n($start+$halfdiff, $end, $num_needed - $halfneeded); 88 | } 89 | } 90 | 91 | if ( ! $first && ! $last) { 92 | if ($N > 0) { 93 | select_n(0, $numlines, $N); 94 | } 95 | } else { 96 | if ($first) { # --first option: same as head. 97 | for ($n = 0; $n < $N; $n++) { 98 | print $F[$n]; 99 | } 100 | } else { # --last option: same as tail. 101 | for ($n = @F - $N; $n < @F; $n++) { 102 | print $F[$n]; 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /utils/summarize_logs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 4 | 5 | #scalar(@ARGV) >= 1 && print STDERR "Usage: summarize_warnings.pl \n" && exit 1; 6 | 7 | sub split_hundreds { # split list of filenames into groups of 100. 8 | my $names = shift @_; 9 | my @A = split(" ", $names); 10 | my @ans = (); 11 | while (@A > 0) { 12 | my $group = ""; 13 | for ($x = 0; $x < 100 && @A>0; $x++) { 14 | $fname = pop @A; 15 | $group .= "$fname "; 16 | } 17 | push @ans, $group; 18 | } 19 | return @ans; 20 | } 21 | 22 | sub parse_accounting_entry { 23 | $entry= shift @_; 24 | 25 | @elems = split " ", $entry; 26 | 27 | $time=undef; 28 | $threads=undef; 29 | foreach $elem (@elems) { 30 | if ( $elem=~ m/time=(\d+)/ ) { 31 | $elem =~ s/time=(\d+)/$1/; 32 | $time = $elem; 33 | } elsif ( $elem=~ m/threads=(\d+)/ ) { 34 | $elem =~ s/threads=(\d+)/$1/g; 35 | $threads = $elem; 36 | } else { 37 | die "Unknown entry \"$elem\" when parsing \"$entry\" \n"; 38 | } 39 | } 40 | 41 | if (defined($time) and defined($threads) ) { 42 | return ($time, $threads); 43 | } else { 44 | die "The accounting entry \"$entry\" did not contain all necessary attributes"; 45 | } 46 | } 47 | 48 | foreach $dir (@ARGV) { 49 | 50 | #$dir = $ARGV[0]; 51 | print $dir 52 | 53 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" ; 54 | 55 | $dir =~ s:/$::; # Remove trailing slash. 56 | 57 | 58 | # Group the files into categories where all have the same base-name. 59 | foreach $f (glob ("$dir/*.log")) { 60 | $f_category = $f; 61 | # do next expression twice; s///g doesn't work as they overlap. 62 | $f_category =~ s:\.\d+\.(?!\d+):.*.:; 63 | #$f_category =~ s:\.\d+\.:.*.:; 64 | $fmap{$f_category} .= " $f"; 65 | } 66 | } 67 | 68 | foreach $c (sort (keys %fmap) ) { 69 | $n = 0; 70 | foreach $fgroup (split_hundreds($fmap{$c})) { 71 | $n += `grep -w WARNING $fgroup | wc -l`; 72 | } 73 | if ($n != 0) { 74 | print "$n warnings in $c\n" 75 | } 76 | } 77 | foreach $c (sort (keys %fmap)) { 78 | $n = 0; 79 | foreach $fgroup (split_hundreds($fmap{$c})) { 80 | $n += `grep -w ERROR $fgroup | wc -l`; 81 | } 82 | if ($n != 0) { 83 | print "$n errors in $c\n" 84 | } 85 | } 86 | 87 | $supertotal_cpu_time=0.0; 88 | $supertotal_clock_time=0.0; 89 | $supertotal_threads=0.0; 90 | 91 | foreach $c (sort (keys %fmap)) { 92 | $n = 0; 93 | 94 | $total_cpu_time=0.0; 95 | $total_clock_time=0.0; 96 | $total_threads=0.0; 97 | foreach $fgroup (split_hundreds($fmap{$c})) { 98 | $lines=`grep -a "# Accounting: " $fgroup |sed 's/.* Accounting: *//g'`; 99 | 100 | #print $lines ."\n"; 101 | 102 | @entries = split "\n", $lines; 103 | 104 | foreach $line (@entries) { 105 | $time, $threads = parse_accounting_entry($line); 106 | 107 | $total_cpu_time += $time * $threads; 108 | $total_threads += $threads; 109 | if ( $time > $total_clock_time ) { 110 | $total_clock_time = $time; 111 | } 112 | } 113 | } 114 | print "total_cpu_time=$total_cpu_time clock_time=$total_clock_time total_threads=$total_threads group=$c\n"; 115 | 116 | $supertotal_cpu_time += $total_cpu_time; 117 | $supertotal_clock_time += $total_clock_time; 118 | $supertotal_threads += $total_threads; 119 | } 120 | print "total_cpu_time=$supertotal_cpu_time clock_time=$supertotal_clock_time total_threads=$supertotal_threads group=all\n"; 121 | 122 | -------------------------------------------------------------------------------- /utils/summarize_warnings.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 4 | 5 | @ARGV != 1 && print STDERR "Usage: summarize_warnings.pl \n" && exit 1; 6 | 7 | $dir = $ARGV[0]; 8 | 9 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" && exit 1; 10 | 11 | $dir =~ s:/$::; # Remove trailing slash. 12 | 13 | 14 | # Group the files into categories where all have the same base-name. 15 | foreach $f (glob ("$dir/*.log")) { 16 | $f_category = $f; 17 | # do next expression twice; s///g doesn't work as they overlap. 18 | $f_category =~ s:\.\d+\.:.*.:; 19 | $f_category =~ s:\.\d+\.:.*.:; 20 | $fmap{$f_category} .= " $f"; 21 | } 22 | 23 | sub split_hundreds { # split list of filenames into groups of 100. 24 | my $names = shift @_; 25 | my @A = split(" ", $names); 26 | my @ans = (); 27 | while (@A > 0) { 28 | my $group = ""; 29 | for ($x = 0; $x < 100 && @A>0; $x++) { 30 | $fname = pop @A; 31 | $group .= "$fname "; 32 | } 33 | push @ans, $group; 34 | } 35 | return @ans; 36 | } 37 | 38 | foreach $c (keys %fmap) { 39 | $n = 0; 40 | foreach $fgroup (split_hundreds($fmap{$c})) { 41 | $n += `grep -w WARNING $fgroup | wc -l`; 42 | } 43 | if ($n != 0) { 44 | print "$n warnings in $c\n" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /utils/sym2int.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | $ignore_oov = 0; 19 | 20 | for($x = 0; $x < 2; $x++) { 21 | if ($ARGV[0] eq "--map-oov") { 22 | shift @ARGV; 23 | $map_oov = shift @ARGV; 24 | if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") { 25 | # disallow '-f', the empty string and anything ending in words.txt as the 26 | # OOV symbol because these are likely command-line errors. 27 | die "the --map-oov option requires an argument"; 28 | } 29 | } 30 | if ($ARGV[0] eq "-f") { 31 | shift @ARGV; 32 | $field_spec = shift @ARGV; 33 | if ($field_spec =~ m/^\d+$/) { 34 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1; 35 | } 36 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) 37 | if ($1 ne "") { 38 | $field_begin = $1 - 1; # Change to zero-based indexing. 39 | } 40 | if ($2 ne "") { 41 | $field_end = $2 - 1; # Change to zero-based indexing. 42 | } 43 | } 44 | if (!defined $field_begin && !defined $field_end) { 45 | die "Bad argument to -f option: $field_spec"; 46 | } 47 | } 48 | } 49 | 50 | $symtab = shift @ARGV; 51 | if (!defined $symtab) { 52 | print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" . 53 | "options: [--map-oov ] [-f ]\n" . 54 | "note: can look like 4-5, or 4-, or 5-, or 1.\n"; 55 | } 56 | open(F, "<$symtab") || die "Error opening symbol table file $symtab"; 57 | while() { 58 | @A = split(" ", $_); 59 | @A == 2 || die "bad line in symbol table file: $_"; 60 | $sym2int{$A[0]} = $A[1] + 0; 61 | } 62 | 63 | if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up 64 | if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; } 65 | $map_oov = $sym2int{$map_oov}; 66 | } 67 | 68 | $num_warning = 0; 69 | $max_warning = 20; 70 | 71 | while (<>) { 72 | @A = split(" ", $_); 73 | @B = (); 74 | for ($n = 0; $n < @A; $n++) { 75 | $a = $A[$n]; 76 | if ( (!defined $field_begin || $n >= $field_begin) 77 | && (!defined $field_end || $n <= $field_end)) { 78 | $i = $sym2int{$a}; 79 | if (!defined ($i)) { 80 | if (defined $map_oov) { 81 | if ($num_warning++ < $max_warning) { 82 | print STDERR "sym2int.pl: replacing $a with $map_oov\n"; 83 | if ($num_warning == $max_warning) { 84 | print STDERR "sym2int.pl: not warning for OOVs any more times\n"; 85 | } 86 | } 87 | $i = $map_oov; 88 | } else { 89 | $pos = $n+1; 90 | die "sym2int.pl: undefined symbol $a (in position $pos)\n"; 91 | } 92 | } 93 | $a = $i; 94 | } 95 | push @B, $a; 96 | } 97 | print join(" ", @B); 98 | print "\n"; 99 | } 100 | if ($num_warning > 0) { 101 | print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n"; 102 | } 103 | 104 | exit(0); 105 | -------------------------------------------------------------------------------- /utils/utt2spk_to_spk2utt.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # converts an utt2spk file to a spk2utt file. 18 | # Takes input from the stdin or from a file argument; 19 | # output goes to the standard out. 20 | 21 | if ( @ARGV > 1 ) { 22 | die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt"; 23 | } 24 | 25 | while(<>){ 26 | @A = split(" ", $_); 27 | @A == 2 || die "Invalid line in utt2spk file: $_"; 28 | ($u,$s) = @A; 29 | if(!$seen_spk{$s}) { 30 | $seen_spk{$s} = 1; 31 | push @spklist, $s; 32 | } 33 | push (@{$spk_hash{$s}}, "$u"); 34 | } 35 | foreach $s (@spklist) { 36 | $l = join(' ',@{$spk_hash{$s}}); 37 | print "$s $l\n"; 38 | } 39 | -------------------------------------------------------------------------------- /waves_yesno/0_0_0_0_1_1_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_0_0_1_1_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_0_1_0_0_0_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_0_1_0_0_0_1.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_0_1_0_1_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_0_1_0_1_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_0_0_0_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_0_0_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_0_0_1_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_0_1_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_0_0_1_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_0_1_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_0_1_0_0_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_1_0_0_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_0_1_0_0_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_1_0_0_1.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_0_1_0_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_1_0_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_1_0_0_0_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_0_0_0_1.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_1_0_1_0_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_0_1_0_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_1_0_1_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_0_1_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_1_0_1_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_0_1_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_1_1_0_0_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_1_0_0_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_1_1_0_0_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_1_0_0_1.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_1_1_1_0_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_1_1_0_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_0_1_1_1_1_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_1_1_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_0_0_0_1_0_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_0_0_1_0_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_0_0_0_1_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_0_0_1_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_0_0_1_0_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_0_1_0_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_0_0_1_0_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_0_1_0_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_0_1_0_0_0_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_1_0_0_0_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_0_1_1_0_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_1_1_0_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_0_1_1_1_0_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_1_1_1_0_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_1_0_0_1_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_0_0_1_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_1_0_0_1_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_0_0_1_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_1_1_0_0_0_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_0_0_0_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_1_1_0_0_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_0_0_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_1_1_0_1_0_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_0_1_0_1.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_1_1_1_0_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_1_0_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/0_1_1_1_1_1_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_1_1_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_0_0_0_0_0_0_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_0_0_0_0_0.wav -------------------------------------------------------------------------------- /waves_yesno/1_0_0_0_0_0_0_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_0_0_0_0_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_0_0_0_0_0_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_0_0_0_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_0_0_0_1_0_0_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_0_1_0_0_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_0_0_1_0_1_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_1_0_1_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_0_1_0_1_0_0_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_1_0_1_0_0_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_0_1_1_0_1_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_1_1_0_1_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_0_1_1_1_0_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_1_1_1_0_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/1_0_1_1_1_1_0_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_1_1_1_1_0_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_0_0_0_0_0_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_0_0_0_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_0_0_0_1_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_0_1_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_0_0_1_0_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_1_0_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_0_0_1_0_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_1_0_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_0_0_1_1_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_1_1_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_0_1_0_1_0_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_0_1_0_0.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_0_1_0_1_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_0_1_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_0_1_1_0_0_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_1_0_0_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_0_1_1_0_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_1_0_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_0_1_1_1_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_1_1_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_1_0_0_0_0_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_0_0_0_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_1_0_0_1_0_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_0_1_0_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_1_0_0_1_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_0_1_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_1_0_1_0_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_1_0_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_1_0_1_0_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_1_0_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_1_1_0_0_1_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_0_0_1_0.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_1_1_0_1_0_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_0_1_0_0.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_1_1_1_0_0_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_1_0_0_0.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_1_1_1_1_0_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_1_1_0_0.wav -------------------------------------------------------------------------------- /waves_yesno/1_1_1_1_1_1_1_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_1_1_1_1.wav -------------------------------------------------------------------------------- /waves_yesno/README: -------------------------------------------------------------------------------- 1 | This dataset can be found at http://openslr.org/resources/1/waves_yesno.tar.gz 2 | 3 | This dataset was created for the Kaldi project (see kaldi.sf.net), 4 | by a contributor who prefes to remain anonymous. The main point of the dataset is 5 | to provide a way to test out the Kaldi scripts for free. 6 | 7 | The archive "waves_yesno.tar.gz" contains 60 .wav files, sampled at 8 kHz. All were recorded 8 | by the same male speaker, in English (although the individual is not a native speaker). 9 | In each file, the individual says 8 words; each word is either "yes" or "no", so each 10 | file is a random sequence of 8 yes-es or noes. There is no separate transcription provided; the 11 | sequence is encoded in the filename, with 1 for yes and 0 for no, for instance: 12 | 13 | # tar -xvzf waves_yesno.tar.gz 14 | waves_yesno/1_0_1_1_1_0_1_0.wav 15 | waves_yesno/0_1_1_0_0_1_1_0.wav 16 | ... 17 | --------------------------------------------------------------------------------