├── .gitignore
├── LICENSE
├── README.md
├── conf
    ├── mfcc.conf
    └── topo_orig.proto
├── data_prep.py
├── lm
    ├── prepare_lm.sh
    └── yesno-unigram.arpabo
├── local
    └── score.sh
├── path.sh
├── steps
    ├── align_basis_fmllr.sh
    ├── align_fmllr.sh
    ├── align_fmllr_lats.sh
    ├── align_lvtln.sh
    ├── align_raw_fmllr.sh
    ├── align_sgmm.sh
    ├── align_sgmm2.sh
    ├── align_si.sh
    ├── append_feats.sh
    ├── cleanup
    │   ├── clean_and_segment_data.sh
    │   ├── combine_short_segments.py
    │   ├── create_segments_from_ctm.pl
    │   ├── debug_lexicon.sh
    │   ├── decode_segmentation.sh
    │   ├── find_bad_utts.sh
    │   ├── find_bad_utts_nnet.sh
    │   ├── get_ctm_edits.py
    │   ├── get_non_scored_words.py
    │   ├── internal
    │   │   ├── get_ctm_edits.py
    │   │   ├── get_non_scored_words.py
    │   │   ├── make_one_biased_lm.py
    │   │   ├── modify_ctm_edits.py
    │   │   ├── segment_ctm_edits.py
    │   │   └── taint_ctm_edits.py
    │   ├── lattice_oracle_align.sh
    │   ├── make_biased_lm_graphs.sh
    │   ├── make_biased_lms.py
    │   ├── make_one_biased_lm.py
    │   ├── make_segmentation_data_dir.sh
    │   ├── make_segmentation_graph.sh
    │   ├── make_utterance_fsts.pl
    │   ├── make_utterance_graph.sh
    │   ├── modify_ctm_edits.py
    │   ├── segment_ctm_edits.py
    │   ├── split_long_utterance.sh
    │   └── taint_ctm_edits.py
    ├── combine_ali_dirs.sh
    ├── compute_cmvn_stats.sh
    ├── conf
    │   ├── append_eval_to_ctm.py
    │   ├── append_prf_to_ctm.py
    │   ├── apply_calibration.sh
    │   ├── convert_ctm_to_tra.py
    │   ├── lattice_depth_per_frame.sh
    │   ├── parse_arpa_unigrams.py
    │   ├── prepare_calibration_data.py
    │   ├── prepare_word_categories.py
    │   └── train_calibration.sh
    ├── data
    │   ├── data_dir_manipulation_lib.py
    │   └── reverberate_data_dir.py
    ├── decode.sh
    ├── decode_basis_fmllr.sh
    ├── decode_biglm.sh
    ├── decode_combine.sh
    ├── decode_fmllr.sh
    ├── decode_fmllr_extra.sh
    ├── decode_fmmi.sh
    ├── decode_fromlats.sh
    ├── decode_lvtln.sh
    ├── decode_nnet.sh
    ├── decode_nolats.sh
    ├── decode_raw_fmllr.sh
    ├── decode_sgmm.sh
    ├── decode_sgmm2.sh
    ├── decode_sgmm2_fromlats.sh
    ├── decode_sgmm2_rescore.sh
    ├── decode_sgmm2_rescore_project.sh
    ├── decode_sgmm_fromlats.sh
    ├── decode_sgmm_rescore.sh
    ├── decode_si.sh
    ├── decode_with_map.sh
    ├── diagnostic
    │   ├── analyze_alignments.sh
    │   ├── analyze_lats.sh
    │   ├── analyze_lattice_depth_stats.py
    │   └── analyze_phone_length_stats.py
    ├── get_ctm.sh
    ├── get_fmllr_basis.sh
    ├── get_lexicon_probs.sh
    ├── get_prons.sh
    ├── get_train_ctm.sh
    ├── info
    │   ├── chain_dir_info.pl
    │   ├── gmm_dir_info.pl
    │   ├── nnet2_dir_info.pl
    │   └── nnet3_dir_info.pl
    ├── kl_hmm
    │   ├── build_tree.sh
    │   ├── decode_kl_hmm.sh
    │   └── train_kl_hmm.sh
    ├── lmrescore.sh
    ├── lmrescore_const_arpa.sh
    ├── lmrescore_rnnlm_lat.sh
    ├── make_denlats.sh
    ├── make_denlats_sgmm.sh
    ├── make_denlats_sgmm2.sh
    ├── make_fbank.sh
    ├── make_fbank_pitch.sh
    ├── make_index.sh
    ├── make_mfcc.sh
    ├── make_mfcc_pitch.sh
    ├── make_mfcc_pitch_online.sh
    ├── make_phone_graph.sh
    ├── make_plp.sh
    ├── make_plp_pitch.sh
    ├── mixup.sh
    ├── nnet
    │   ├── align.sh
    │   ├── decode.sh
    │   ├── make_bn_feats.sh
    │   ├── make_denlats.sh
    │   ├── make_fmllr_feats.sh
    │   ├── make_fmmi_feats.sh
    │   ├── make_priors.sh
    │   ├── pretrain_dbn.sh
    │   ├── train.sh
    │   ├── train_mmi.sh
    │   ├── train_mpe.sh
    │   └── train_scheduler.sh
    ├── nnet2
    │   ├── adjust_priors.sh
    │   ├── align.sh
    │   ├── convert_lda_to_raw.sh
    │   ├── convert_nnet1_to_nnet2.sh
    │   ├── create_appended_model.sh
    │   ├── decode.sh
    │   ├── dump_bottleneck_features.sh
    │   ├── get_egs.sh
    │   ├── get_egs2.sh
    │   ├── get_egs_discriminative2.sh
    │   ├── get_lda.sh
    │   ├── get_lda_block.sh
    │   ├── get_num_frames.sh
    │   ├── get_perturbed_feats.sh
    │   ├── make_denlats.sh
    │   ├── make_multisplice_configs.py
    │   ├── relabel_egs.sh
    │   ├── relabel_egs2.sh
    │   ├── remove_egs.sh
    │   ├── retrain_fast.sh
    │   ├── retrain_simple2.sh
    │   ├── retrain_tanh.sh
    │   ├── train_block.sh
    │   ├── train_convnet_accel2.sh
    │   ├── train_discriminative.sh
    │   ├── train_discriminative2.sh
    │   ├── train_discriminative_multilang2.sh
    │   ├── train_more.sh
    │   ├── train_more2.sh
    │   ├── train_multilang2.sh
    │   ├── train_multisplice_accel2.sh
    │   ├── train_multisplice_ensemble.sh
    │   ├── train_pnorm.sh
    │   ├── train_pnorm_accel2.sh
    │   ├── train_pnorm_bottleneck_fast.sh
    │   ├── train_pnorm_ensemble.sh
    │   ├── train_pnorm_fast.sh
    │   ├── train_pnorm_multisplice.sh
    │   ├── train_pnorm_multisplice2.sh
    │   ├── train_pnorm_simple.sh
    │   ├── train_pnorm_simple2.sh
    │   ├── train_tanh.sh
    │   ├── train_tanh_bottleneck.sh
    │   ├── train_tanh_fast.sh
    │   └── update_nnet.sh
    ├── nnet3
    │   ├── adjust_priors.sh
    │   ├── align.sh
    │   ├── chain
    │   │   ├── build_tree.sh
    │   │   ├── gen_topo.pl
    │   │   ├── gen_topo.py
    │   │   ├── gen_topo2.py
    │   │   ├── gen_topo3.py
    │   │   ├── gen_topo4.py
    │   │   ├── gen_topo5.py
    │   │   ├── get_egs.sh
    │   │   ├── nnet3_chain_lib.py
    │   │   ├── train.py
    │   │   └── train_tdnn.sh
    │   ├── components.py
    │   ├── decode.sh
    │   ├── dot
    │   │   ├── descriptor_parser.py
    │   │   └── nnet3_to_dot.py
    │   ├── get_egs.sh
    │   ├── get_egs_discriminative.sh
    │   ├── get_egs_targets.sh
    │   ├── get_successful_models.py
    │   ├── lstm
    │   │   ├── make_configs.py
    │   │   └── train.sh
    │   ├── make_denlats.sh
    │   ├── make_jesus_configs.py
    │   ├── make_tdnn_configs.py
    │   ├── nnet3_to_dot.sh
    │   ├── nnet3_train_lib.py
    │   ├── report
    │   │   ├── generate_plots.py
    │   │   └── nnet3_log_parse_lib.py
    │   ├── tdnn
    │   │   ├── make_configs.py
    │   │   ├── train.sh
    │   │   └── train_raw_nnet.sh
    │   ├── train_discriminative.sh
    │   ├── train_dnn.py
    │   ├── train_rnn.py
    │   └── train_tdnn.sh
    ├── online
    │   ├── decode.sh
    │   ├── nnet2
    │   │   ├── align.sh
    │   │   ├── copy_data_dir.sh
    │   │   ├── decode.sh
    │   │   ├── dump_nnet_activations.sh
    │   │   ├── extract_ivectors.sh
    │   │   ├── extract_ivectors_online.sh
    │   │   ├── get_egs.sh
    │   │   ├── get_egs2.sh
    │   │   ├── get_egs_discriminative2.sh
    │   │   ├── make_denlats.sh
    │   │   ├── prepare_online_decoding.sh
    │   │   ├── prepare_online_decoding_retrain.sh
    │   │   ├── prepare_online_decoding_transfer.sh
    │   │   ├── train_diag_ubm.sh
    │   │   └── train_ivector_extractor.sh
    │   ├── nnet3
    │   │   ├── decode.sh
    │   │   └── prepare_online_decoding.sh
    │   └── prepare_online_decoding.sh
    ├── oracle_wer.sh
    ├── paste_feats.sh
    ├── resegment_data.sh
    ├── resegment_text.sh
    ├── rnnlmrescore.sh
    ├── score_kaldi.sh
    ├── score_kaldi_compare.sh
    ├── search_index.sh
    ├── select_feats.sh
    ├── shift_feats.sh
    ├── tandem
    │   ├── align_fmllr.sh
    │   ├── align_sgmm.sh
    │   ├── align_sgmm2.sh
    │   ├── align_si.sh
    │   ├── decode.sh
    │   ├── decode_fmllr.sh
    │   ├── decode_sgmm.sh
    │   ├── decode_sgmm2.sh
    │   ├── decode_si.sh
    │   ├── make_denlats.sh
    │   ├── make_denlats_sgmm.sh
    │   ├── make_denlats_sgmm2.sh
    │   ├── mk_aslf_lda_mllt.sh
    │   ├── mk_aslf_sgmm2.sh
    │   ├── train_deltas.sh
    │   ├── train_lda_mllt.sh
    │   ├── train_mllt.sh
    │   ├── train_mmi.sh
    │   ├── train_mmi_sgmm.sh
    │   ├── train_mmi_sgmm2.sh
    │   ├── train_mono.sh
    │   ├── train_sat.sh
    │   ├── train_sgmm.sh
    │   ├── train_sgmm2.sh
    │   └── train_ubm.sh
    ├── train_deltas.sh
    ├── train_diag_ubm.sh
    ├── train_lda_mllt.sh
    ├── train_lvtln.sh
    ├── train_map.sh
    ├── train_mmi.sh
    ├── train_mmi_fmmi.sh
    ├── train_mmi_fmmi_indirect.sh
    ├── train_mmi_sgmm.sh
    ├── train_mmi_sgmm2.sh
    ├── train_mono.sh
    ├── train_mpe.sh
    ├── train_nnet.sh
    ├── train_quick.sh
    ├── train_raw_sat.sh
    ├── train_sat.sh
    ├── train_sat_basis.sh
    ├── train_segmenter.sh
    ├── train_sgmm.sh
    ├── train_sgmm2.sh
    ├── train_sgmm2_group.sh
    ├── train_smbr.sh
    ├── train_ubm.sh
    └── word_align_lattices.sh
├── utils
    ├── add_disambig.pl
    ├── add_lex_disambig.pl
    ├── analyze_segments.pl
    ├── apply_map.pl
    ├── best_wer.sh
    ├── build_const_arpa_lm.sh
    ├── combine_data.sh
    ├── convert_ctm.pl
    ├── convert_slf.pl
    ├── convert_slf_parallel.sh
    ├── copy_data_dir.sh
    ├── create_data_link.pl
    ├── create_split_dir.pl
    ├── data
    │   ├── combine_data.sh
    │   ├── combine_short_segments.sh
    │   ├── copy_data_dir.sh
    │   ├── extend_segment_times.py
    │   ├── fix_data_dir.sh
    │   ├── get_frame_shift.sh
    │   ├── get_num_frames.sh
    │   ├── get_segments_for_data.sh
    │   ├── get_utt2dur.sh
    │   ├── internal
    │   │   ├── choose_utts_to_combine.py
    │   │   └── modify_speaker_info.py
    │   ├── modify_speaker_info.sh
    │   ├── normalize_data_range.pl
    │   ├── perturb_data_dir_speed.sh
    │   ├── perturb_data_dir_speed_3way.sh
    │   ├── perturb_data_dir_volume.sh
    │   ├── remove_dup_utts.sh
    │   ├── split_data.sh
    │   ├── subsegment_data_dir.sh
    │   ├── subset_data_dir.sh
    │   └── validate_data_dir.sh
    ├── dict_dir_add_pronprobs.sh
    ├── eps2disambig.pl
    ├── filt.py
    ├── filter_scp.pl
    ├── filter_scps.pl
    ├── find_arpa_oovs.pl
    ├── fix_ctm.sh
    ├── fix_data_dir.sh
    ├── format_lm.sh
    ├── format_lm_sri.sh
    ├── gen_topo.pl
    ├── int2sym.pl
    ├── kwslist_post_process.pl
    ├── lang
    │   ├── add_lex_disambig.pl
    │   ├── check_g_properties.pl
    │   ├── check_phones_compatible.sh
    │   ├── prepare_lang.sh
    │   └── validate_lang.pl
    ├── ln.pl
    ├── make_lexicon_fst.pl
    ├── make_lexicon_fst_silprob.pl
    ├── make_phone_bigram_lang.sh
    ├── make_unigram_grammar.pl
    ├── map_arpa_lm.pl
    ├── mkgraph.sh
    ├── nnet-cpu
    │   ├── make_nnet_config.pl
    │   ├── make_nnet_config_block.pl
    │   ├── make_nnet_config_preconditioned.pl
    │   └── update_learning_rates.pl
    ├── nnet
    │   ├── gen_dct_mat.py
    │   ├── gen_hamm_mat.py
    │   ├── gen_splice.py
    │   ├── make_blstm_proto.py
    │   ├── make_cnn2d_proto.py
    │   ├── make_cnn_proto.py
    │   ├── make_lstm_proto.py
    │   └── make_nnet_proto.py
    ├── parse_options.sh
    ├── pbs.pl
    ├── perturb_data_dir_speed.sh
    ├── pinyin_map.pl
    ├── prepare_lang.sh
    ├── prepare_online_nnet_dist_build.sh
    ├── queue.pl
    ├── reduce_data_dir.sh
    ├── reduce_data_dir_by_reclist.sh
    ├── remove_data_links.sh
    ├── remove_oovs.pl
    ├── reverse_arpa.py
    ├── rnnlm_compute_scores.sh
    ├── run.pl
    ├── s2eps.pl
    ├── scoring
    │   ├── wer_ops_details.pl
    │   ├── wer_per_spk_details.pl
    │   ├── wer_per_utt_details.pl
    │   └── wer_report.pl
    ├── segmentation.pl
    ├── show_lattice.sh
    ├── shuffle_list.pl
    ├── slurm.pl
    ├── spk2utt_to_utt2spk.pl
    ├── split_data.sh
    ├── split_scp.pl
    ├── ssh.pl
    ├── subset_data_dir.sh
    ├── subset_data_dir_tr_cv.sh
    ├── subset_scp.pl
    ├── summarize_logs.pl
    ├── summarize_warnings.pl
    ├── sym2int.pl
    ├── utt2spk_to_spk2utt.pl
    ├── validate_data_dir.sh
    ├── validate_dict_dir.pl
    ├── validate_lang.pl
    └── write_kwslist.pl
└── waves_yesno
    ├── 0_0_0_0_1_1_1_1.wav
    ├── 0_0_0_1_0_0_0_1.wav
    ├── 0_0_0_1_0_1_1_0.wav
    ├── 0_0_1_0_0_0_1_0.wav
    ├── 0_0_1_0_0_1_1_0.wav
    ├── 0_0_1_0_0_1_1_1.wav
    ├── 0_0_1_0_1_0_0_0.wav
    ├── 0_0_1_0_1_0_0_1.wav
    ├── 0_0_1_0_1_0_1_1.wav
    ├── 0_0_1_1_0_0_0_1.wav
    ├── 0_0_1_1_0_1_0_0.wav
    ├── 0_0_1_1_0_1_1_0.wav
    ├── 0_0_1_1_0_1_1_1.wav
    ├── 0_0_1_1_1_0_0_0.wav
    ├── 0_0_1_1_1_0_0_1.wav
    ├── 0_0_1_1_1_1_0_0.wav
    ├── 0_0_1_1_1_1_1_0.wav
    ├── 0_1_0_0_0_1_0_0.wav
    ├── 0_1_0_0_0_1_1_0.wav
    ├── 0_1_0_0_1_0_1_0.wav
    ├── 0_1_0_0_1_0_1_1.wav
    ├── 0_1_0_1_0_0_0_0.wav
    ├── 0_1_0_1_1_0_1_0.wav
    ├── 0_1_0_1_1_1_0_0.wav
    ├── 0_1_1_0_0_1_1_0.wav
    ├── 0_1_1_0_0_1_1_1.wav
    ├── 0_1_1_1_0_0_0_0.wav
    ├── 0_1_1_1_0_0_1_0.wav
    ├── 0_1_1_1_0_1_0_1.wav
    ├── 0_1_1_1_1_0_1_0.wav
    ├── 0_1_1_1_1_1_1_1.wav
    ├── 1_0_0_0_0_0_0_0.wav
    ├── 1_0_0_0_0_0_0_1.wav
    ├── 1_0_0_0_0_0_1_1.wav
    ├── 1_0_0_0_1_0_0_1.wav
    ├── 1_0_0_1_0_1_1_1.wav
    ├── 1_0_1_0_1_0_0_1.wav
    ├── 1_0_1_1_0_1_1_1.wav
    ├── 1_0_1_1_1_0_1_0.wav
    ├── 1_0_1_1_1_1_0_1.wav
    ├── 1_1_0_0_0_0_0_1.wav
    ├── 1_1_0_0_0_1_1_1.wav
    ├── 1_1_0_0_1_0_1_0.wav
    ├── 1_1_0_0_1_0_1_1.wav
    ├── 1_1_0_0_1_1_1_0.wav
    ├── 1_1_0_1_0_1_0_0.wav
    ├── 1_1_0_1_0_1_1_0.wav
    ├── 1_1_0_1_1_0_0_1.wav
    ├── 1_1_0_1_1_0_1_1.wav
    ├── 1_1_0_1_1_1_1_0.wav
    ├── 1_1_1_0_0_0_0_1.wav
    ├── 1_1_1_0_0_1_0_1.wav
    ├── 1_1_1_0_0_1_1_1.wav
    ├── 1_1_1_0_1_0_1_0.wav
    ├── 1_1_1_0_1_0_1_1.wav
    ├── 1_1_1_1_0_0_1_0.wav
    ├── 1_1_1_1_0_1_0_0.wav
    ├── 1_1_1_1_1_0_0_0.wav
    ├── 1_1_1_1_1_1_0_0.wav
    ├── 1_1_1_1_1_1_1_1.wav
    └── README


/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 |  --use-energy=false   # only non-default option.
2 | --sample-frequency=8000 #  Switchboard is sampled at 8kHz
3 | 


--------------------------------------------------------------------------------
/conf/topo_orig.proto:
--------------------------------------------------------------------------------
 1 | <Topology> 
 2 | <TopologyEntry> 
 3 | <ForPhones>
 4 | NONSILENCEPHONES
 5 | </ForPhones> 
 6 | <State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State> 
 7 | <State> 1 <PdfClass> 1 <Transition> 1 0.75 <Transition> 2 0.25 </State> 
 8 | <State> 2 <PdfClass> 2 <Transition> 2 0.75 <Transition> 3 0.25 </State> 
 9 | <State> 3 </State>
10 | </TopologyEntry> 
11 | <TopologyEntry> 
12 | <ForPhones>
13 | SILENCEPHONES
14 | </ForPhones> 
15 | <State> 0 <PdfClass> 0 <Transition> 0 0.25 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 </State> 
16 | <State> 1 <PdfClass> 1 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State> 
17 | <State> 2 <PdfClass> 2 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State> 
18 | <State> 3 <PdfClass> 3 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State> 
19 | <State> 4 <PdfClass> 4 <Transition> 4 0.25 <Transition> 5 0.75 </State> 
20 | <State> 5 </State>
21 | </TopologyEntry> 
22 | </Topology> 
23 | 


--------------------------------------------------------------------------------
/data_prep.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | import os
 4 | import os.path
 5 | import sys
 6 | 
 7 | zeroes = []
 8 | ones = []
 9 | for fn in os.listdir('waves_yesno'):
10 |     if fn.startswith('0'):
11 |         zeroes.append(fn)   # => training set
12 |     elif fn.startswith('1'):
13 |         ones.append(fn)     # => test set
14 | 
15 | def text(filenames):
16 |     results = []
17 |     for filename in filenames:
18 |         basename = filename.split('.')[0]
19 |         transcript = basename.replace('1', 'YES').replace('0', 'NO').replace('_', " ")
20 |         results.append("{} {}".format(basename.split('.')[0], transcript))
21 | 
22 |     return '\n'.join(sorted(results))
23 | 
24 | with open('data/train_yesno/text', 'w') as train_text, open('data/test_yesno/text', 'w') as test_text:
25 |     train_text.write(text(zeroes))
26 |     test_text.write(text(ones))
27 | 
28 | # finish this method
29 | def wav_scp():
30 |     pass
31 | 
32 | with open('data/train_yesno/wav.scp', 'w') as train_text, open('data/test_yesno/wav.scp', 'w') as test_text:
33 |     train_text.write(wav_scp(zeroes))
34 |     test_text.write(wav_scp(ones))
35 | 
36 | 
37 | # finish this method
38 | def utt2spk():
39 |     pass
40 | 
41 | with open('data/train_yesno/utt2spk', 'w') as train_text, open('data/test_yesno/utt2spk', 'w') as test_text:
42 |     train_text.write(utt2spk(zeroes))
43 |     test_text.write(utt2spk(ones))
44 | 
45 | 
46 | # finish this method
47 | # note that, spk2utt can be generated by using Kaldi util, once you have utt2spk file.
48 | def spk2utt():
49 |     pass
50 | 


--------------------------------------------------------------------------------
/lm/prepare_lm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | . path.sh
 4 |  
 5 | echo Preparing language models for test
 6 | 
 7 | for lm_suffix in tg; do
 8 |   test=data/lang_test_${lm_suffix}
 9 | 
10 |   rm -rf data/lang_test_${lm_suffix}
11 |   cp -r data/lang data/lang_test_${lm_suffix}
12 | 
13 |   arpa2fst --disambig-symbol=#0 --read-symbol-table=$test/words.txt lm/task.arpabo $test/G.fst
14 | 
15 |   fstisstochastic $test/G.fst
16 |       
17 |  # The output is like:
18 |  # 9.14233e-05 -0.259833
19 |  # we do expect the first of these 2 numbers to be close to zero (the second is
20 |  # nonzero because the backoff weights make the states sum to >1).
21 |  # Because of the <s> fiasco for these particular LMs, the first number is not
22 |  # as close to zero as it could be.
23 | 
24 |   # Everything below is only for diagnostic.
25 |   # Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
26 |   # this might cause determinization failure of CLG.
27 |   # #0 is treated as an empty word.
28 |   mkdir -p tmpdir.g
29 |   awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} END{print "0 0 #0 #0"; print "0";}' \
30 |     < dict/lexicon.txt  >tmpdir.g/select_empty.fst.txt
31 |   fstcompile --isymbols=$test/words.txt --osymbols=$test/words.txt tmpdir.g/select_empty.fst.txt | \
32 |    fstarcsort --sort_type=olabel | fstcompose - $test/G.fst > tmpdir.g/empty_words.fst
33 |   fstinfo tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' && 
34 |     echo "Language model has cycles with empty words" && exit 1
35 |   rm -r tmpdir.g
36 | done
37 | 
38 | echo "Succeeded in formatting data."
39 | 


--------------------------------------------------------------------------------
/lm/yesno-unigram.arpabo:
--------------------------------------------------------------------------------
 1 | 
 2 | \data\
 3 | ngram 1=4
 4 | 
 5 | \1-grams:
 6 | -1	NO
 7 | -1	YES
 8 | -99 <s>
 9 | -1 </s>
10 | 
11 | \end\
12 | 


--------------------------------------------------------------------------------
/local/score.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 3 | # Apache 2.0
 4 | 
 5 | [ -f ./path.sh ] && . ./path.sh
 6 | 
 7 | # begin configuration section.
 8 | cmd=run.pl
 9 | stage=0
10 | decode_mbr=true
11 | word_ins_penalty=0.0
12 | min_lmwt=7
13 | max_lmwt=11
14 | #end configuration section.
15 | 
16 | [ -f ./path.sh ] && . ./path.sh
17 | . parse_options.sh || exit 1;
18 | 
19 | if [ $# -ne 3 ]; then
20 |   echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
21 |   echo " Options:"
22 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
23 |   echo "    --stage (0|1|2)                 # start scoring script from part-way through."
24 |   echo "    --decode_mbr (true/false)       # maximum bayes risk decoding (confusion network)."
25 |   echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
26 |   echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
27 |   exit 1;
28 | fi
29 | 
30 | data=$1
31 | lang_or_graph=$2
32 | dir=$3
33 | 
34 | symtab=$lang_or_graph/words.txt
35 | 
36 | for f in $symtab $dir/lat.1.gz $data/text; do
37 |   [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
38 | done
39 | 
40 | mkdir -p $dir/scoring/log
41 | 
42 | cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
43 | 
44 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.log \
45 |   lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
46 |   lattice-add-penalty --word-ins-penalty=$word_ins_penalty ark:- ark:- \| \
47 |   lattice-best-path --word-symbol-table=$symtab \
48 |     ark:- ark,t:$dir/scoring/LMWT.tra || exit 1;
49 | 
50 | # Note: the double level of quoting for the sed command
51 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \
52 |    cat $dir/scoring/LMWT.tra \| \
53 |     utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
54 |     compute-wer --text --mode=present \
55 |      ark:$dir/scoring/test_filt.txt  ark,p:- ">&" $dir/wer_LMWT || exit 1;
56 | 
57 | exit 0;
58 | 


--------------------------------------------------------------------------------
/path.sh:
--------------------------------------------------------------------------------
1 | export KALDI_ROOT=YOUR_KALDI_PATH
2 | [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
3 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
4 | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
5 | . $KALDI_ROOT/tools/config/common_path.sh
6 | export LC_ALL=C
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/steps/append_feats.sh:
--------------------------------------------------------------------------------
1 | paste_feats.sh


--------------------------------------------------------------------------------
/steps/cleanup/make_utterance_fsts.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use warnings; #sed replacement for -w perl parameter
 3 | 
 4 | # makes unigram decoding-graph FSTs specific to each utterances, where the
 5 | # supplied top-n-words list together with the supervision text of the utterance are
 6 | # combined.
 7 | 
 8 | if (@ARGV != 1) {
 9 |   print STDERR "** Warning: this script is deprecated and will be removed.  See\n" .
10 |                "** steps/cleanup/make_biased_lm_graphs.sh.\n" .
11 |                "Usage: make_utterance_fsts.pl top-words-file.txt < text-archive > fsts-archive\n" .
12 |                "e.g.: utils/sym2int.pl -f 2- data/lang/words.txt data/train/text | \\\n" .
13 |                "  make_utterance_fsts.pl exp/foo/top_words.int | compile-train-graphs-fsts ... \n";
14 |   exit(1);
15 | }
16 | 
17 | ($top_words_file) = @ARGV;
18 | 
19 | open(F, "<$top_words_file") || die "opening $top_words_file";
20 | 
21 | %top_word_probs = ( );
22 | 
23 | while(<F>) {
24 |   @A = split;
25 |   (@A == 2 && $A[0] > 0.0) || die "Bad line $_ in $top_words_file";
26 |   $A[1] =~ m/^[0-9]+$/ || die "Expecting numeric word-ids in $top_words_file: $_\n";
27 |   $top_word_probs{$A[1]} += $A[0];
28 | }
29 | 
30 | while (<STDIN>) {
31 |   @A = split;
32 |   $utterance_id = shift @A;
33 |   print "$utterance_id\n";
34 |   $num_words = @A + 0;  # length of array @A
35 |   %word_probs = %top_word_probs;
36 |   foreach $w (@A) {
37 |     $w =~ m/^[0-9]+$/ || die "Expecting numeric word-ids as stdin: $_";
38 |     $word_probs{$w} += 1.0 / $num_words;
39 |   }
40 |   foreach $w (keys %word_probs) {
41 |     $prob = $word_probs{$w};
42 |     $prob > 0.0 || die "Word $w with bad probability $prob, utterance-id = $utterance_id\n";
43 |     $cost = -log($prob);
44 |     print "0 0 $w $w $cost\n";
45 |   }
46 |   $final_cost = -log(1.0 / $num_words);
47 |   print "0 $final_cost\n";
48 |   print "\n"; # Empty line terminates the FST in the text-archive format.
49 | }
50 | 


--------------------------------------------------------------------------------
/steps/combine_ali_dirs.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2016  Xiaohui Zhang  Apache 2.0.
  3 | 
  4 | # This srcipt operates on alignment directories, such as exp/tri4a_ali
  5 | # the output is a new ali dir which has alignments from all the input ali dirs
  6 | 
  7 | # Begin configuration section.
  8 | cmd=run.pl
  9 | extra_files=
 10 | num_jobs=4
 11 | # End configuration section.
 12 | echo "$0 $@"  # Print the command line for logging
 13 | 
 14 | if [ -f path.sh ]; then . ./path.sh; fi
 15 | . parse_options.sh || exit 1;
 16 | 
 17 | if [[ $# -lt 3 ]]; then
 18 |   echo "Usage: $0 [options] <data> <dest-ali-dir> <src-ali-dir1> <src-ali-dir2> ..."
 19 |   echo "e.g.: $0 --num-jobs 32 data/train exp/tri3_ali_combined exp/tri3_ali_1 exp_tri3_ali_2"
 20 |   echo "Options:"
 21 |   echo " --extra-files <file1 file2...>   # specify addtional files in 'src-ali-dir1' to copy"
 22 |   echo " --num-jobs <nj>                  # number of jobs used to split the data directory."
 23 |   echo " Note, files that don't appear in the first source dir will not be added even if they appear in later ones."
 24 |   echo " Other than alignments, only files from the first src ali dir are copied."
 25 |   exit 1;
 26 | fi
 27 | 
 28 | data=$1;
 29 | shift;
 30 | dest=$1;
 31 | shift;
 32 | first_src=$1;
 33 | 
 34 | mkdir -p $dest;
 35 | rm $dest/{ali.*.gz,num_jobs} 2>/dev/null
 36 | 
 37 | cp $first_src/phones.txt $dest || exit 1;
 38 | 
 39 | export LC_ALL=C
 40 | 
 41 | for dir in $*; do
 42 |   if [ ! -f $dir/ali.1.gz ]; then
 43 |     echo "$0: check if alignments (ali.*.gz) are present in $dir."
 44 |     exit 1;
 45 |   fi
 46 | done
 47 | 
 48 | for dir in $*; do
 49 |   for f in tree; do
 50 |     diff $first_src/$f $dir/$f 1>/dev/null 2>&1
 51 |     if [ $? -ne 0 ]; then
 52 |       echo "$0: Cannot combine alignment directories with different $f files."
 53 |     fi
 54 |   done
 55 | done
 56 | 
 57 | for f in final.mdl tree cmvn_opts num_jobs $extra_files; do
 58 |   if [ ! -f $first_src/$f ]; then
 59 |     echo "combine_ali_dir.sh: no such file $first_src/$f"
 60 |     exit 1;
 61 |   fi
 62 |   cp $first_src/$f $dest/
 63 | done
 64 | 
 65 | src_id=0
 66 | temp_dir=$dest/temp
 67 | [ -d $temp_dir ] && rm -r $temp_dir;
 68 | mkdir -p $temp_dir
 69 | echo "$0: dumping alignments in each source directory as single archive and index."
 70 | for dir in $*; do
 71 |   src_id=$((src_id + 1))
 72 |   cur_num_jobs=$(cat $dir/num_jobs) || exit 1;
 73 |   alis=$(for n in $(seq $cur_num_jobs); do echo -n "$dir/ali.$n.gz "; done)
 74 |   $cmd $dir/log/copy_alignments.log \
 75 |     copy-int-vector "ark:gunzip -c $alis|" \
 76 |     ark,scp:$temp_dir/ali.$src_id.ark,$temp_dir/ali.$src_id.scp || exit 1;
 77 | done
 78 | sort -m $temp_dir/ali.*.scp > $temp_dir/ali.scp || exit 1;
 79 | 
 80 | echo "$0: splitting data to get reference utt2spk for individual ali.JOB.gz files."
 81 | utils/split_data.sh $data $num_jobs || exit 1;
 82 | 
 83 | echo "$0: splitting the alignments to appropriate chunks according to the reference utt2spk files."
 84 | utils/filter_scps.pl JOB=1:$num_jobs \
 85 |   $data/split$num_jobs/JOB/utt2spk $temp_dir/ali.scp $temp_dir/ali.JOB.scp
 86 | 
 87 | for i in `seq 1 $num_jobs`; do
 88 |     copy-int-vector scp:$temp_dir/ali.${i}.scp "ark:|gzip -c >$dest/ali.$i.gz" || exit 1;
 89 | done
 90 | 
 91 | echo $num_jobs > $dest/num_jobs  || exit 1
 92 | 
 93 | echo "$0: checking the alignment files generated have at least 90% of the utterances."
 94 | for i in `seq 1 $num_jobs`; do
 95 |   num_lines=`cat $temp_dir/ali.$i.scp | wc -l` || exit 1;
 96 |   num_lines_tot=`cat $data/split$num_jobs/$i/utt2spk | wc -l` || exit 1;
 97 |   python -c "import sys;
 98 | percent = 100.0 * float($num_lines) / $num_lines_tot
 99 | if percent < 90 :
100 |   print ('$dest/ali.$i.gz {0}% utterances missing.'.format(percent))"  || exit 1;
101 | done
102 | rm -r $temp_dir 2>/dev/null
103 | 
104 | echo "Combined alignments and stored in $dest"
105 | exit 0
106 | 


--------------------------------------------------------------------------------
/steps/conf/append_eval_to_ctm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 4 | # Apache 2.0
 5 | 
 6 | import sys,operator
 7 | 
 8 | # Append Levenshtein alignment of 'hypothesis' and 'reference' into 'CTM':
 9 | # (i.e. the output of 'align-text' post-processed by 'wer_per_utt_details.pl')
10 | 
11 | # The tags in the appended column are:
12 | #  'C' = correct
13 | #  'S' = substitution
14 | #  'I' = insertion
15 | #  'U' = unknown (not part of scored segment)
16 | 
17 | if len(sys.argv) != 4:
18 |   print 'Usage: %s eval-in ctm-in ctm-eval-out' % __file__
19 |   sys.exit(1)
20 | dummy, eval_in, ctm_in, ctm_eval_out = sys.argv
21 | 
22 | if ctm_eval_out == '-': ctm_eval_out = '/dev/stdout'
23 | 
24 | # Read the evalutation,
25 | eval_vec = dict()
26 | with open(eval_in, 'r') as f:
27 |   while True:
28 |     # Reading 4 lines encoding one utterance,
29 |     ref = f.readline()
30 |     hyp = f.readline()
31 |     op = f.readline()
32 |     csid = f.readline()
33 |     if not ref: break
34 |     # Parse the input,
35 |     utt,tag,hyp_vec = hyp.split(' ',2)
36 |     assert(tag == 'hyp')
37 |     utt,tag,op_vec = op.split(' ',2)
38 |     assert(tag == 'op')
39 |     hyp_vec = hyp_vec.split()
40 |     op_vec = op_vec.split()
41 |     # Fill create eval vector with symbols 'C', 'S', 'I',
42 |     assert(utt not in eval_vec)
43 |     eval_vec[utt] = []
44 |     for op,hyp in zip(op_vec, hyp_vec):
45 |       if hyp != '<eps>': eval_vec[utt].append(op)
46 | 
47 | # Load the 'ctm' into dictionary,
48 | ctm = dict()
49 | with open(ctm_in) as f:
50 |   for l in f:
51 |     utt, ch, beg, dur, wrd, conf = l.split()
52 |     if not utt in ctm: ctm[utt] = []
53 |     ctm[utt].append((utt, ch, float(beg), float(dur), wrd, float(conf)))
54 | 
55 | # Build the 'ctm' with 'eval' column added,
56 | ctm_eval = []
57 | for utt,ctm_part in ctm.iteritems():
58 |   ctm_part.sort(key = operator.itemgetter(2)) # Sort by 'beg' time,
59 |   # extending the 'tuple' by '+':
60 |   merged = [ tup + (evl,) for tup,evl in zip(ctm_part,eval_vec[utt]) ]
61 |   ctm_eval.extend(merged)
62 | 
63 | # Sort again,
64 | ctm_eval.sort(key = operator.itemgetter(0,1,2))
65 | 
66 | # Store,
67 | with open(ctm_eval_out,'w') as f:
68 |   for tup in ctm_eval:
69 |     f.write('%s %s %f %f %s %f %s\n' % tup)
70 | 
71 | 


--------------------------------------------------------------------------------
/steps/conf/append_prf_to_ctm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 4 | # Apache 2.0
 5 | 
 6 | import sys
 7 | 
 8 | # Append Levenshtein alignment of 'hypothesis' and 'reference' into 'CTM':
 9 | # (parsed from the 'prf' output of 'sclite')
10 | 
11 | # The tags in appended column are:
12 | #  'C' = correct
13 | #  'S' = substitution
14 | #  'I' = insertion
15 | #  'U' = unknown (not part of scored segment)
16 | 
17 | # Parse options,
18 | if len(sys.argv) != 4:
19 |   print "Usage: %s prf ctm_in ctm_out" % __file__
20 |   sys.exit(1)
21 | prf_file, ctm_file, ctm_out_file = sys.argv[1:]
22 | 
23 | if ctm_out_file == '-': ctm_out_file = '/dev/stdout'
24 | 
25 | # Load the prf file,
26 | prf = []
27 | with open(prf_file) as f:
28 |   for l in f:
29 |     # Store the data,
30 |     if l[:5] == 'File:':
31 |       file_id = l.split()[1]
32 |     if l[:8] == 'Channel:':
33 |       chan = l.split()[1]
34 |     if l[:5] == 'H_T1:':
35 |       h_t1 = l
36 |     if l[:5] == 'Eval:':
37 |       evl = l
38 |       prf.append((file_id,chan,h_t1,evl))
39 | 
40 | # Parse the prf records into dictionary,
41 | prf_dict = dict()
42 | for (f,c,t,e) in prf:
43 |   t_pos = 0 # position in the 't' string,
44 |   while t_pos < len(t):
45 |     t1 = t[t_pos:].split(' ',1)[0] # get 1st token at 't_pos'
46 |     try:
47 |       # get word evaluation letter 'C,S,I',
48 |       evl = e[t_pos] if e[t_pos] != ' ' else 'C' 
49 |       # add to dictionary,
50 |       key='%s,%s' % (f,c) # file,channel
51 |       if key not in prf_dict: prf_dict[key] = dict()
52 |       prf_dict[key][float(t1)] = evl
53 |     except ValueError:
54 |       pass
55 |     t_pos += len(t1)+1 # advance position for parsing,
56 | 
57 | # Load the ctm file (with confidences),
58 | with open(ctm_file) as f:
59 |   ctm = [ l.split() for l in f ]
60 | 
61 | # Append the sclite alignment tags to ctm,
62 | ctm_out = []
63 | for f, chan, beg, dur, wrd, conf in ctm:
64 |   # U = unknown, C = correct, S = substitution, I = insertion,
65 |   sclite_tag = 'U' 
66 |   try:
67 |     sclite_tag = prf_dict[('%s,%s'%(f,chan)).lower()][float(beg)]
68 |   except KeyError:
69 |     pass
70 |   ctm_out.append([f,chan,beg,dur,wrd,conf,sclite_tag])
71 | 
72 | # Save the augmented ctm file,
73 | with open(ctm_out_file, 'w') as f:
74 |   f.writelines([' '.join(ctm_record)+'\n' for ctm_record in ctm_out])
75 | 
76 | 


--------------------------------------------------------------------------------
/steps/conf/apply_calibration.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2015, Brno University of Technology (Author: Karel Vesely). Apache 2.0.
 3 | 
 4 | # Trains logistic regression, which calibrates the per-word confidences,
 5 | # which are extracted by the Minimum Bayes Risk decoding.
 6 | 
 7 | # begin configuration section.
 8 | cmd=
 9 | stage=0
10 | # end configuration section.
11 | 
12 | [ -f ./path.sh ] && . ./path.sh
13 | . parse_options.sh || exit 1;
14 | 
15 | if [ $# -ne 5 ]; then
16 |   echo "Usage: $0 [opts] <data-dir> <lang-dir|graph-dir> <decode-dir> <calibration-dir> <output-dir>"
17 |   echo " Options:"
18 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
19 |   exit 1;
20 | fi
21 | 
22 | set -euo pipefail
23 | 
24 | data=$1
25 | lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
26 | latdir=$3
27 | caldir=$4
28 | dir=$5
29 | 
30 | model=$latdir/../final.mdl # assume model one level up from decoding dir.
31 | calibration=$caldir/calibration.mdl
32 | word_feats=$caldir/word_feats
33 | word_categories=$caldir/word_categories
34 | 
35 | for f in $lang/words.txt $word_feats $word_categories $latdir/lat.1.gz $calibration $model; do
36 |   [ ! -f $f ] && echo "$0: Missing file $f" && exit 1
37 | done
38 | [ -z "$cmd" ] && echo "$0: Missing --cmd '...'" && exit 1
39 | 
40 | [ -d $dir/log ] || mkdir -p $dir/log
41 | nj=$(cat $latdir/num_jobs)
42 | lmwt=$(cat $caldir/lmwt)
43 | decode_mbr=$(cat $caldir/decode_mbr)
44 | 
45 | # Store the setup,
46 | echo $lmwt >$dir/lmwt
47 | echo $decode_mbr >$dir/decode_mbr 
48 | cp $calibration $dir/calibration.mdl
49 | cp $word_feats $dir/word_feats
50 | cp $word_categories $dir/word_categories
51 | 
52 | # Create the ctm with raw confidences,
53 | # - we keep the timing relative to the utterance,
54 | if [ $stage -le 0 ]; then
55 |   $cmd JOB=1:$nj $dir/log/get_ctm.JOB.log \
56 |     lattice-scale --inv-acoustic-scale=$lmwt "ark:gunzip -c $latdir/lat.JOB.gz|" ark:- \| \
57 |     lattice-limit-depth ark:- ark:- \| \
58 |     lattice-push --push-strings=false ark:- ark:- \| \
59 |     lattice-align-words-lexicon --max-expand=10.0 \
60 |      $lang/phones/align_lexicon.int $model ark:- ark:- \| \
61 |     lattice-to-ctm-conf --decode-mbr=$decode_mbr ark:- - \| \
62 |     utils/int2sym.pl -f 5 $lang/words.txt \
63 |     '>' $dir/JOB.ctm
64 |   # Merge and clean,
65 |   for ((n=1; n<=nj; n++)); do cat $dir/${n}.ctm; done > $dir/ctm
66 |   rm $dir/*.ctm
67 |   cat $dir/ctm | utils/sym2int.pl -f 5 $lang/words.txt >$dir/ctm_int
68 | fi
69 | 
70 | # Compute lattice-depth,
71 | latdepth=$dir/lattice_frame_depth.ark
72 | if [ $stage -le 1 ]; then
73 |   [ -e $latdepth ] || steps/conf/lattice_depth_per_frame.sh --cmd "$cmd" $latdir $dir
74 | fi
75 | 
76 | # Create the forwarding data for logistic regression,
77 | if [ $stage -le 2 ]; then
78 |   steps/conf/prepare_calibration_data.py --conf-feats $dir/forward_feats.ark \
79 |     --lattice-depth $latdepth $dir/ctm_int $word_feats $word_categories
80 | fi
81 | 
82 | # Apply calibration model to dev,
83 | if [ $stage -le 3 ]; then
84 |   logistic-regression-eval --apply-log=false $calibration \
85 |     ark:$dir/forward_feats.ark ark,t:- | \
86 |     awk '{ key=$1; p_corr=$4; sub(/,.*/,"",key); gsub(/\^/," ",key); print key,p_corr }' | \
87 |     utils/int2sym.pl -f 5 $lang/words.txt \
88 |     >$dir/ctm_calibrated
89 | fi
90 | 
91 | exit 0
92 | 


--------------------------------------------------------------------------------
/steps/conf/convert_ctm_to_tra.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 4 | # Apache 2.0
 5 | 
 6 | import sys, operator
 7 | 
 8 | # This scripts loads a 'ctm' file and converts it into the 'tra' format:
 9 | # "utt-key word1 word2 word3 ... wordN"
10 | # The 'utt-key' is the 1st column in the CTM.
11 | 
12 | # Typically the CTM contains:
13 | # - utterance-relative timimng (i.e. prepared without 'utils/convert_ctm.pl')
14 | # - confidences 
15 | 
16 | if len(sys.argv) != 3:
17 |   print 'Usage: %s ctm-in tra-out' % __file__
18 |   sys.exit(1)
19 | dummy, ctm_in, tra_out = sys.argv
20 | 
21 | if ctm_in == '-': ctm_in = '/dev/stdin'
22 | if tra_out == '-': tra_out = '/dev/stdout'
23 | 
24 | # Load the 'ctm' into dictionary,
25 | tra = dict()
26 | with open(ctm_in) as f:
27 |   for l in f:
28 |     utt, ch, beg, dur, wrd, conf = l.split()
29 |     if not utt in tra: tra[utt] = []
30 |     tra[utt].append((float(beg),wrd))
31 | 
32 | # Store the in 'tra' format,
33 | with open(tra_out,'w') as f:
34 |   for utt,tuples in tra.iteritems():
35 |     tuples.sort(key = operator.itemgetter(0)) # Sort by 'beg' time,
36 |     f.write('%s %s\n' % (utt,' '.join([t[1] for t in tuples])))
37 | 
38 | 


--------------------------------------------------------------------------------
/steps/conf/lattice_depth_per_frame.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2015  Brno University of Technology (Author: Karel Vesely)
 3 | # Licensed under the Apache License, Version 2.0 (the "License")
 4 | 
 5 | # Extract lattice-depth for each frame.
 6 | 
 7 | # Begin configuration
 8 | cmd=run.pl
 9 | # End configuration
10 | 
11 | echo "$0 $@"  # Print the command line for logging
12 | 
13 | [ -f path.sh ] && . ./path.sh # source the path.
14 | . parse_options.sh || exit 1;
15 | 
16 | if [ $# != 2 ]; then
17 |    echo "usage: $0 [opts] <dir-with-lats> <out-dir>"
18 |    echo "main options (for others, see top of script file)"
19 |    echo "  --config <config-file>          # config containing options"
20 |    echo "  --cmd"
21 |    exit 1;
22 | fi
23 | 
24 | set -euo pipefail
25 | 
26 | latdir=$1
27 | dir=$2
28 | 
29 | [ ! -f $latdir/lat.1.gz ] && echo "Missing $latdir/lat.1.gz" && exit 1
30 | nj=$(cat $latdir/num_jobs)
31 | 
32 | # Get the pdf-posterior vectors,
33 | $cmd JOB=1:$nj $dir/log/lattice_depth_per_frame.JOB.log \
34 |   lattice-depth-per-frame "ark:gunzip -c $latdir/lat.JOB.gz |" ark,t:$dir/lattice_frame_depth.JOB.ark
35 | # Merge,
36 | for ((n=1; n<=nj; n++)); do cat $dir/lattice_frame_depth.${n}.ark; done >$dir/lattice_frame_depth.ark
37 | rm $dir/lattice_frame_depth.*.ark
38 | 
39 | # Done!
40 | 


--------------------------------------------------------------------------------
/steps/conf/parse_arpa_unigrams.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 4 | # Apache 2.0
 5 | 
 6 | import sys, gzip, re
 7 | 
 8 | # Parse options,
 9 | if len(sys.argv) != 4:
10 |   print "Usage: %s <words.txt> <arpa-gz> <unigrams>" % __file__
11 |   sys.exit(0)
12 | words_txt, arpa_gz, unigrams_out = sys.argv[1:]
13 | 
14 | if arpa_gz == '-': arpa_gz = '/dev/stdin'
15 | if unigrams_out == '-': unigrams_out = '/dev/stdout'
16 | 
17 | # Load the words.txt,
18 | words = [ l.split() for l in open(words_txt) ]
19 | 
20 | # Load the unigram probabilities in 10log from ARPA,
21 | wrd_log10 = dict()
22 | with gzip.open(arpa_gz,'r') as f:
23 |   read = False
24 |   for l in f:
25 |     if l.strip() == '\\1-grams:': read = True
26 |     if l.strip() == '\\2-grams:': break
27 |     if read and len(l.split())>=2:
28 |       log10_p_unigram, wrd = re.split('[\t ]+',l.strip(),2)[:2]
29 |       wrd_log10[wrd] = float(log10_p_unigram)
30 | 
31 | # Create list, 'wrd id log_p_unigram',
32 | words_unigram = [[wrd, id, (wrd_log10[wrd] if wrd in wrd_log10 else -99)] for wrd,id in words ]
33 | 
34 | print >>sys.stderr, words_unigram[0]
35 | # Store,
36 | with open(unigrams_out,'w') as f:
37 |   f.writelines(['%s %s %g\n' % (w,i,p) for (w,i,p) in words_unigram])
38 | 
39 | 


--------------------------------------------------------------------------------
/steps/conf/prepare_word_categories.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 4 | # Apache 2.0
 5 | 
 6 | import sys
 7 | 
 8 | from optparse import OptionParser
 9 | desc = """
10 | Prepare mapping of words into categories. Each word with minimal frequency 
11 | has its own category, the rest is merged into single class.
12 | """
13 | usage = "%prog [opts] words.txt ctm category_mapping"
14 | parser = OptionParser(usage=usage, description=desc)
15 | parser.add_option("--min-count", help="Minimum word-count to have a single word category. [default %default]", type='int', default=20)
16 | (o, args) = parser.parse_args()
17 | 
18 | if len(args) != 3:
19 |   parser.print_help()
20 |   sys.exit(1)
21 | words_file, text_file, category_mapping_file = args
22 | 
23 | if text_file == '-': text_file = '/dev/stdin'
24 | if category_mapping_file == '-': category_mapping_file = '/dev/stdout'
25 | 
26 | # Read the words from the 'tra' file,
27 | with open(text_file) as f:
28 |   text_words = [ l.split()[1:] for l in f ]
29 | 
30 | # Flatten the array of arrays of words,
31 | import itertools
32 | text_words = list(itertools.chain.from_iterable(text_words))
33 | 
34 | # Count the words (regardless if correct or incorrect),
35 | word_counts = dict()
36 | for w in text_words:
37 |   if w not in word_counts: word_counts[w] = 0
38 |   word_counts[w] += 1
39 | 
40 | # Read the words.txt,
41 | with open(words_file) as f:
42 |   word_id = [ l.split() for l in f ]
43 | 
44 | # Append the categories,
45 | n=1
46 | word_id_cat=[]
47 | for word, idx in word_id:
48 |   cat = 0 
49 |   if word in word_counts:
50 |     if word_counts[word] > o.min_count:
51 |       cat = n; n += 1
52 |   word_id_cat.append([word, idx, str(cat)])
53 | 
54 | # Store the mapping,
55 | with open(category_mapping_file,'w') as f:
56 |   f.writelines([' '.join(record)+'\n' for record in word_id_cat])
57 | 


--------------------------------------------------------------------------------
/steps/data/data_dir_manipulation_lib.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | 
 3 | def RunKaldiCommand(command, wait = True):
 4 |     """ Runs commands frequently seen in Kaldi scripts. These are usually a
 5 |         sequence of commands connected by pipes, so we use shell=True """
 6 |     #logger.info("Running the command\n{0}".format(command))
 7 |     p = subprocess.Popen(command, shell = True,
 8 |                          stdout = subprocess.PIPE,
 9 |                          stderr = subprocess.PIPE)
10 | 
11 |     if wait:
12 |         [stdout, stderr] = p.communicate()
13 |         if p.returncode is not 0:
14 |             raise Exception("There was an error while running the command {0}\n".format(command)+"-"*10+"\n"+stderr)
15 |         return stdout, stderr
16 |     else:
17 |         return p
18 | 
19 | 


--------------------------------------------------------------------------------
/steps/decode_combine.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | 
 5 | # Combine two decoding directories by composing the lattices (we
 6 | # apply a weight to each of the original weights, by default 0.5 each).
 7 | # Note, this is not the only combination method, or the most normal combination
 8 | # method.  See also egs/wsj/s5/local/score_combine.sh.
 9 | 
10 | # Begin configuration section.
11 | weight1=0.5 # Weight on 1st set of lattices.
12 | cmd=run.pl
13 | skip_scoring=false
14 | # End configuration section.
15 | 
16 | echo "$0 $@"  # Print the command line for logging
17 | 
18 | [ -f ./path.sh ] && . ./path.sh; # source the path.
19 | . parse_options.sh || exit 1;
20 | 
21 | if [ $# -ne 5 ]; then
22 |   echo "Usage: steps/decode_combine.sh [options] <data> <lang-dir|graph-dir> <decode-dir1> <decode-dir2> <decode-dir-out>"
23 |   echo " e.g.: steps/decode_combine.sh data/lang data/test exp/dir1/decode exp/dir2/decode exp/combine_1_2/decode"
24 |   echo "main options (for others, see top of script file)"
25 |   echo "  --config <config-file>                   # config containing options"
26 |   echo "  --cmd <cmd>                              # Command to run in parallel with"
27 |   echo "  --weight1 <weight>                       # Weight on 1st set of lattices (default 0.5)"
28 |   exit 1;
29 | fi
30 | 
31 | data=$1
32 | lang_or_graphdir=$2
33 | srcdir1=$3
34 | srcdir2=$4
35 | dir=$5
36 | 
37 | for f in $data/utt2spk $lang_or_graphdir/phones.txt $srcdir1/lat.1.gz $srcdir2/lat.1.gz; do
38 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
39 | done
40 | 
41 | nj1=`cat $srcdir1/num_jobs` || exit 1;
42 | nj2=`cat $srcdir2/num_jobs` || exit 1;
43 | [ $nj1 -ne $nj2 ] && echo "$0: mismatch in number of jobs $nj1 versus $nj2" && exit 1;
44 | nj=$nj1
45 | 
46 | mkdir -p $dir/log
47 | echo $nj > $dir/num_jobs
48 | 
49 | # The lattice-interp command does the score interpolation (with composition),
50 | # and the lattice-copy-backoff replaces the result with the 1st lattice, in
51 | # cases where the composed result was empty.
52 | $cmd JOB=1:$nj $dir/log/interp.JOB.log \
53 |   lattice-interp --alpha=$weight1 "ark:gunzip -c $srcdir1/lat.JOB.gz|" \
54 |    "ark,s,cs:gunzip -c $srcdir2/lat.JOB.gz|" ark:- \| \
55 |   lattice-copy-backoff "ark,s,cs:gunzip -c $srcdir1/lat.JOB.gz|" ark,s,cs:- \
56 |    "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
57 | 
58 | cp $srcdir1/final.mdl $dir/final.mdl
59 | 
60 | if ! $skip_scoring ; then
61 |   [ ! -x local/score.sh ] && \
62 |     echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
63 |   local/score.sh --cmd "$cmd" $data $lang_or_graphdir $dir ||
64 |     { echo "$0: Scoring failed. (ignore by '--skip-scoring true')"; exit 1; }
65 | fi
66 | 
67 | exit 0;
68 | 


--------------------------------------------------------------------------------
/steps/decode_nnet.sh:
--------------------------------------------------------------------------------
1 | nnet/decode.sh


--------------------------------------------------------------------------------
/steps/decode_si.sh:
--------------------------------------------------------------------------------
1 | decode.sh


--------------------------------------------------------------------------------
/steps/diagnostic/analyze_alignments.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright Johns Hopkins University (Author: Daniel Povey) 2016.  Apache 2.0.
 4 | 
 5 | # This script performs some analysis of alignments on disk, currently in terms
 6 | # of phone lengths, including lenghts of leading and trailing silences
 7 | 
 8 | 
 9 | # begin configuration section.
10 | cmd=run.pl
11 | #end configuration section.
12 | 
13 | echo "$0 $@"  # Print the command line for logging
14 | 
15 | [ -f ./path.sh ] && . ./path.sh
16 | . parse_options.sh || exit 1;
17 | 
18 | if [ $# -ne 2 ]; then
19 |   echo "Usage: $0 [options] <lang-dir> <ali-dir>"
20 |   echo " Options:"
21 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
22 |   echo "e.g.:"
23 |   echo "$0 data/lang exp/tri4b"
24 |   echo "This script writes some diagnostics to <ali-dir>/log/alignments.log"
25 |   exit 1;
26 | fi
27 | 
28 | lang=$1
29 | dir=$2
30 | 
31 | model=$dir/final.mdl
32 | 
33 | for f in $lang/words.txt $model $dir/ali.1.gz $dir/num_jobs; do
34 |   [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
35 | done
36 | 
37 | num_jobs=$(cat $dir/num_jobs) || exit 1
38 | 
39 | mkdir -p $dir/log
40 | 
41 | rm $dir/phone_stats.*.gz 2>/dev/null || true
42 | 
43 | $cmd JOB=1:$num_jobs $dir/log/get_phone_alignments.JOB.log \
44 |   set -o pipefail '&&' ali-to-phones --write-lengths=true "$model"  \
45 |       "ark:gunzip -c $dir/ali.JOB.gz|" ark,t:- \| \
46 |    sed -E 's/^[^ ]+ //' \| \
47 |    awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \
48 |    sort \| uniq -c \| gzip -c '>' $dir/phone_stats.JOB.gz || exit 1
49 | 
50 | if ! $cmd $dir/log/analyze_alignments.log \
51 |   gunzip -c "$dir/phone_stats.*.gz" \| \
52 |   steps/diagnostic/analyze_phone_length_stats.py $lang; then
53 |   echo "$0: analyze_phone_length_stats.py failed, but ignoring the error (it's just for diagnostics)"
54 | fi
55 | 
56 | grep WARNING $dir/log/analyze_alignments.log
57 | echo "$0: see stats in $dir/log/analyze_alignments.log"
58 | 
59 | rm $dir/phone_stats.*.gz
60 | 
61 | exit 0
62 | 


--------------------------------------------------------------------------------
/steps/diagnostic/analyze_lats.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright Johns Hopkins University (Author: Daniel Povey) 2016.  Apache 2.0.
 4 | 
 5 | # This script does the same type of diagnostics as analyze_alignments.sh, except
 6 | # it starts from lattices (so it has to convert the lattices to alignments
 7 | # first).
 8 | 
 9 | # begin configuration section.
10 | iter=final
11 | cmd=run.pl
12 | acwt=0.1
13 | #end configuration section.
14 | 
15 | echo "$0 $@"  # Print the command line for logging
16 | 
17 | [ -f ./path.sh ] && . ./path.sh
18 | . parse_options.sh || exit 1;
19 | 
20 | if [ $# -ne 2 ]; then
21 |   echo "Usage: $0 [options] (<lang-dir>|<graph-dir>) <decode-dir>"
22 |   echo " Options:"
23 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
24 |   echo "    --acwt <acoustic-scale>         # Acoustic scale for getting best-path (default: 0.1)"
25 |   echo "e.g.:"
26 |   echo "$0 data/lang exp/tri4b/decode_dev"
27 |   echo "This script writes some diagnostics to <decode-dir>/log/alignments.log"
28 |   exit 1;
29 | fi
30 | 
31 | lang=$1
32 | dir=$2
33 | 
34 | model=$dir/../${iter}.mdl
35 | 
36 | for f in $lang/words.txt $model $dir/lat.1.gz $dir/num_jobs; do
37 |   [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
38 | done
39 | 
40 | num_jobs=$(cat $dir/num_jobs) || exit 1
41 | 
42 | mkdir -p $dir/log
43 | 
44 | rm $dir/phone_stats.*.gz 2>/dev/null || true
45 | 
46 | # this writes two archives of depth_tmp and ali_tmp of (depth per frame, alignment per frame).
47 | $cmd JOB=1:$num_jobs $dir/log/lattice_best_path.JOB.log \
48 |   lattice-depth-per-frame "ark:gunzip -c $dir/lat.JOB.gz|" "ark,t:|gzip -c > $dir/depth_tmp.JOB.gz" ark:- \| \
49 |   lattice-best-path --acoustic-scale=$acwt ark:- ark:/dev/null "ark,t:|gzip -c >$dir/ali_tmp.JOB.gz" || exit 1
50 | 
51 | $cmd JOB=1:$num_jobs $dir/log/get_lattice_stats.JOB.log \
52 |   ali-to-phones --write-lengths=true "$model" "ark:gunzip -c $dir/ali_tmp.JOB.gz|" ark,t:- \| \
53 |   sed -E 's/^[^ ]+ //' \| \
54 |   awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \
55 |   sort \| uniq -c \| gzip -c '>' $dir/phone_stats.JOB.gz || exit 1
56 | 
57 | 
58 | $cmd $dir/log/analyze_alignments.log \
59 |   gunzip -c "$dir/phone_stats.*.gz" \| \
60 |   steps/diagnostic/analyze_phone_length_stats.py $lang || exit 1
61 | 
62 | grep WARNING $dir/log/analyze_alignments.log
63 | echo "$0: see stats in $dir/log/analyze_alignments.log"
64 | 
65 | 
66 | # note: below, some things that would be interpreted by the shell have to be
67 | # escaped since it needs to be passed to $cmd.
68 | # the 'paste' command will paste together the phone-indexes and the depths
69 | # so that one line will be like utt-id1 phone1 phone2 phone3 .. utt-id1 depth1 depth2 depth3 ...
70 | # the awk command computes counts of pairs (phone, lattice-depth) and outputs lines
71 | # containing 3 integers representing:
72 | #   phone lattice_depth, count[phone,lattice_depth]
73 | $cmd JOB=1:$num_jobs $dir/log/lattice_best_path.JOB.log \
74 |   ali-to-phones --per-frame=true "$model" "ark:gunzip -c $dir/ali_tmp.JOB.gz|" ark,t:- \| \
75 |   paste /dev/stdin '<(' gunzip -c $dir/depth_tmp.JOB.gz  ')'  \| \
76 |   awk '{ half=NF/2; for (n=2; n<=half; n++) { m=n+half; count[$n " " $m]++;}} END{for(k in count) print k, count[k]; }' \| \
77 |   gzip -c '>' $dir/depth_stats_tmp.JOB.gz
78 | 
79 | 
80 | $cmd $dir/log/analyze_lattice_depth_stats.log \
81 |   gunzip -c "$dir/depth_stats_tmp.*.gz" \| \
82 |   steps/diagnostic/analyze_lattice_depth_stats.py $lang || exit 1
83 | 
84 | grep Overall $dir/log/analyze_lattice_depth_stats.log
85 | echo "$0: see stats in $dir/log/analyze_lattice_depth_stats.log"
86 | 
87 | 
88 | rm $dir/phone_stats.*.gz
89 | rm $dir/depth_tmp.*.gz
90 | rm $dir/depth_stats_tmp.*.gz
91 | rm $dir/ali_tmp.*.gz
92 | 
93 | exit 0
94 | 


--------------------------------------------------------------------------------
/steps/get_ctm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright Johns Hopkins University (Author: Daniel Povey) 2012.  Apache 2.0.
 3 | 
 4 | # This script produces CTM files from a decoding directory that has lattices
 5 | # present.
 6 | 
 7 | 
 8 | # begin configuration section.
 9 | cmd=run.pl
10 | stage=0
11 | frame_shift=0.01
12 | min_lmwt=5
13 | max_lmwt=20
14 | use_segments=true # if we have a segments file, use it to convert
15 |                   # the segments to be relative to the original files.
16 | print_silence=false
17 | #end configuration section.
18 | 
19 | echo "$0 $@"  # Print the command line for logging
20 | 
21 | [ -f ./path.sh ] && . ./path.sh
22 | . parse_options.sh || exit 1;
23 | 
24 | if [ $# -ne 3 ]; then
25 |   echo "Usage: $0 [options] <data-dir> <lang-dir|graph-dir> <decode-dir>"
26 |   echo " Options:"
27 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
28 |   echo "    --stage (0|1|2)                 # start scoring script from part-way through."
29 |   echo "    --use-segments (true|false)     # use segments and reco2file_and_channel files "
30 |   echo "                                    # to produce a ctm relative to the original audio"
31 |   echo "                                    # files, with channel information (typically needed"
32 |   echo "                                    # for NIST scoring)."
33 |   echo "    --frame-shift (default=0.01)    # specify this if your lattices have a frame-shift"
34 |   echo "                                    # not equal to 0.01 seconds"
35 |   echo "e.g.:"
36 |   echo "$0 data/train data/lang exp/tri4a/decode/"
37 |   echo "See also: steps/get_train_ctm.sh"
38 |   exit 1;
39 | fi
40 | 
41 | data=$1
42 | lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
43 | dir=$3
44 | 
45 | model=$dir/../final.mdl # assume model one level up from decoding dir.
46 | 
47 | 
48 | for f in $lang/words.txt $model $dir/lat.1.gz; do
49 |   [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
50 | done
51 | 
52 | name=`basename $data`; # e.g. eval2000
53 | 
54 | mkdir -p $dir/scoring/log
55 | 
56 | if [ $stage -le 0 ]; then
57 |   if [ -f $data/segments ] && $use_segments; then
58 |     f=$data/reco2file_and_channel
59 |     [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
60 |     filter_cmd="utils/convert_ctm.pl $data/segments $data/reco2file_and_channel"
61 |   else
62 |     filter_cmd=cat
63 |   fi
64 | 
65 |   nj=$(cat $dir/num_jobs)
66 |   lats=$(for n in $(seq $nj); do echo -n "$dir/lat.$n.gz "; done)
67 |   if [ -f $lang/phones/word_boundary.int ]; then
68 |     $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \
69 |       set -o pipefail '&&' mkdir -p $dir/score_LMWT/ '&&' \
70 |       lattice-1best --lm-scale=LMWT "ark:gunzip -c $lats|" ark:- \| \
71 |       lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \
72 |       nbest-to-ctm --frame-shift=$frame_shift --print-silence=$print_silence ark:- - \| \
73 |       utils/int2sym.pl -f 5 $lang/words.txt \| \
74 |       $filter_cmd '>' $dir/score_LMWT/$name.ctm || exit 1;
75 |   elif [ -f $lang/phones/align_lexicon.int ]; then
76 |     $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.log \
77 |       set -o pipefail '&&' mkdir -p $dir/score_LMWT/ '&&' \
78 |       lattice-1best --lm-scale=LMWT "ark:gunzip -c $lats|" ark:- \| \
79 |       lattice-align-words-lexicon $lang/phones/align_lexicon.int $model ark:- ark:- \| \
80 |       nbest-to-ctm --frame-shift=$frame_shift --print-silence=$print_silence ark:- - \| \
81 |       utils/int2sym.pl -f 5 $lang/words.txt \| \
82 |       $filter_cmd '>' $dir/score_LMWT/$name.ctm || exit 1;
83 |   else
84 |     echo "$0: neither $lang/phones/word_boundary.int nor $lang/phones/align_lexicon.int exists: cannot align."
85 |     exit 1;
86 |   fi
87 | fi
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/steps/lmrescore_const_arpa.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014  Guoguo Chen
 4 | # Apache 2.0
 5 | 
 6 | # This script rescores lattices with the ConstArpaLm format language model.
 7 | 
 8 | # Begin configuration section.
 9 | cmd=run.pl
10 | skip_scoring=false
11 | stage=1
12 | scoring_opts=
13 | # End configuration section.
14 | 
15 | echo "$0 $@"  # Print the command line for logging
16 | 
17 | . ./utils/parse_options.sh
18 | 
19 | if [ $# != 5 ]; then
20 |    echo "Does language model rescoring of lattices (remove old LM, add new LM)"
21 |    echo "Usage: $0 [options] <old-lang-dir> <new-lang-dir> \\"
22 |    echo "                   <data-dir> <input-decode-dir> <output-decode-dir>"
23 |    echo "options: [--cmd (run.pl|queue.pl [queue opts])]"
24 |    exit 1;
25 | fi
26 | 
27 | [ -f path.sh ] && . ./path.sh;
28 | 
29 | oldlang=$1
30 | newlang=$2
31 | data=$3
32 | indir=$4
33 | outdir=$5
34 | 
35 | oldlm=$oldlang/G.fst
36 | newlm=$newlang/G.carpa
37 | ! cmp $oldlang/words.txt $newlang/words.txt &&\
38 |   echo "$0: Warning: vocabularies may be incompatible."
39 | [ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1;
40 | [ ! -f $newlm ] && echo "$0: Missing file $newlm" && exit 1;
41 | ! ls $indir/lat.*.gz >/dev/null &&\
42 |   echo "$0: No lattices input directory $indir" && exit 1;
43 | 
44 | if ! cmp -s $oldlang/words.txt $newlang/words.txt; then
45 |   echo "$0: $oldlang/words.txt and $newlang/words.txt differ: make sure you know what you are doing.";
46 | fi
47 | 
48 | oldlmcommand="fstproject --project_output=true $oldlm |"
49 | 
50 | mkdir -p $outdir/log
51 | nj=`cat $indir/num_jobs` || exit 1;
52 | cp $indir/num_jobs $outdir
53 | 
54 | if [ $stage -le 1 ]; then
55 |   $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
56 |     lattice-lmrescore --lm-scale=-1.0 \
57 |     "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlmcommand" ark:-  \| \
58 |     lattice-lmrescore-const-arpa --lm-scale=1.0 \
59 |     ark:- "$newlm" "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
60 | fi
61 | 
62 | if ! $skip_scoring && [ $stage -le 2 ]; then
63 |   err_msg="Not scoring because local/score.sh does not exist or not executable."
64 |   [ ! -x local/score.sh ] && echo $err_msg && exit 1;
65 |   local/score.sh --cmd "$cmd" $scoring_opts $data $newlang $outdir
66 | else
67 |   echo "Not scoring because requested so..."
68 | fi
69 | 
70 | exit 0;
71 | 


--------------------------------------------------------------------------------
/steps/lmrescore_rnnlm_lat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2015  Guoguo Chen
 4 | # Apache 2.0
 5 | 
 6 | # This script rescores lattices with RNNLM.  See also rnnlmrescore.sh which is
 7 | # an older script using n-best lists.
 8 | 
 9 | # Begin configuration section.
10 | cmd=run.pl
11 | skip_scoring=false
12 | max_ngram_order=4
13 | N=10
14 | inv_acwt=12
15 | weight=1.0  # Interpolation weight for RNNLM.
16 | # End configuration section.
17 | 
18 | echo "$0 $@"  # Print the command line for logging
19 | 
20 | . ./utils/parse_options.sh
21 | 
22 | if [ $# != 5 ]; then
23 |    echo "Does language model rescoring of lattices (remove old LM, add new LM)"
24 |    echo "with RNNLM."
25 |    echo ""
26 |    echo "Usage: $0 [options] <old-lang-dir> <rnnlm-dir> \\"
27 |    echo "                   <data-dir> <input-decode-dir> <output-decode-dir>"
28 |    echo " e.g.: $0 ./rnnlm data/lang_tg data/test \\"
29 |    echo "                   exp/tri3/test_tg exp/tri3/test_rnnlm"
30 |    echo "options: [--cmd (run.pl|queue.pl [queue opts])]"
31 |    exit 1;
32 | fi
33 | 
34 | [ -f path.sh ] && . ./path.sh;
35 | 
36 | oldlang=$1
37 | rnnlm_dir=$2
38 | data=$3
39 | indir=$4
40 | outdir=$5
41 | 
42 | oldlm=$oldlang/G.fst
43 | if [ -f $oldlang/G.carpa ]; then
44 |   oldlm=$oldlang/G.carpa
45 | elif [ ! -f $oldlm ]; then
46 |   echo "$0: expecting either $oldlang/G.fst or $oldlang/G.carpa to exist" &&\
47 |     exit 1;
48 | fi
49 | 
50 | [ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1;
51 | [ ! -f $rnnlm_dir/rnnlm ] && echo "$0: Missing file $rnnlm_dir/rnnlm" && exit 1;
52 | [ ! -f $rnnlm_dir/unk.probs ] &&\
53 |   echo "$0: Missing file $rnnlm_dir/unk.probs" && exit 1;
54 | [ ! -f $oldlang/words.txt ] &&\
55 |   echo "$0: Missing file $oldlang/words.txt" && exit 1;
56 | ! ls $indir/lat.*.gz >/dev/null &&\
57 |   echo "$0: No lattices input directory $indir" && exit 1;
58 | awk -v n=$0 -v w=$weight 'BEGIN {if (w < 0 || w > 1) {
59 |   print n": Interpolation weight should be in the range of [0, 1]"; exit 1;}}' \
60 |   || exit 1;
61 | 
62 | oldlm_command="fstproject --project_output=true $oldlm |"
63 | 
64 | acwt=`perl -e "print (1.0/$inv_acwt);"`
65 | 
66 | mkdir -p $outdir/log
67 | nj=`cat $indir/num_jobs` || exit 1;
68 | cp $indir/num_jobs $outdir
69 | 
70 | oldlm_weight=`perl -e "print -1.0 * $weight;"`
71 | if [ "$oldlm" == "$oldlang/G.fst" ]; then
72 |   $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
73 |     lattice-lmrescore --lm-scale=$oldlm_weight \
74 |     "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:-  \| \
75 |     lattice-lmrescore-rnnlm --lm-scale=$weight \
76 |     --max-ngram-order=$max_ngram_order ark:$rnnlm_dir/unk.probs \
77 |     $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \
78 |     "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
79 | else
80 |   $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
81 |     lattice-lmrescore-const-arpa --lm-scale=$oldlm_weight \
82 |     "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:-  \| \
83 |     lattice-lmrescore-rnnlm --lm-scale=$weight \
84 |     --max-ngram-order=$max_ngram_order ark:$rnnlm_dir/unk.probs \
85 |     $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \
86 |     "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
87 | fi
88 | 
89 | if ! $skip_scoring ; then
90 |   err_msg="Not scoring because local/score.sh does not exist or not executable."
91 |   [ ! -x local/score.sh ] && echo $err_msg && exit 1;
92 |   local/score.sh --cmd "$cmd" $data $oldlang $outdir
93 | else
94 |   echo "Not scoring because requested so..."
95 | fi
96 | 
97 | exit 0;
98 | 


--------------------------------------------------------------------------------
/steps/nnet2/adjust_priors.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | # Copyright (c) 2015, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 5 | # License: Apache 2.0
 6 | 
 7 | # Begin configuration section.
 8 | cmd=run.pl
 9 | iter=final
10 | # End configuration section
11 | 
12 | 
13 | echo "$0 $@"  # Print the command line for logging
14 | 
15 | if [ -f path.sh ]; then . ./path.sh; fi
16 | . parse_options.sh || exit 1;
17 | 
18 | if [ $# != 2 ]; then
19 |   echo "Usage: $0 [opts] <degs-dir> <nnet-dir>"
20 |   echo " e.g.: $0 exp/tri4_mpe_degs exp/tri4_mpe"
21 |   echo ""
22 |   echo "Performs priors adjustment either on the final iteration"
23 |   echo "or iteration of choice of the training. The adjusted model"
24 |   echo "filename will be suffixed by \"adj\", i.e. for the final"
25 |   echo "iteration final.mdl will become final.adj.mdl"
26 |   echo ""
27 |   echo "Main options (for others, see top of script file)"
28 |   echo "  --config <config-file>                           # config file containing options"
29 |   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
30 |   echo "  --iter <iteration|final>                         # which iteration to be adjusted"
31 |   exit 1;
32 | fi
33 | 
34 | degs_dir=$1
35 | dir=$2
36 | 
37 | src_model=$dir/${iter}.mdl
38 | 
39 | if [ ! -f $src_model ]; then
40 |   echo "$0: Expecting $src_model to exist."
41 |   exit 1
42 | fi
43 | 
44 | if [ ! -f $degs_dir/priors_egs.1.ark ]; then
45 |   echo "$0: Expecting $degs_dir/priors_egs.1.ark to exist."
46 |   exit 1
47 | fi
48 | 
49 | num_archives_priors=`cat $degs_dir/info/num_archives_priors` || {
50 |   echo "Could not find $degs_dir/info/num_archives_priors.";
51 |   exit 1;
52 | }
53 | 
54 | $cmd JOB=1:$num_archives_priors $dir/log/get_post.${iter}.JOB.log \
55 |   nnet-compute-from-egs "nnet-to-raw-nnet $src_model -|" \
56 |   ark:$degs_dir/priors_egs.JOB.ark ark:- \| \
57 |   matrix-sum-rows ark:- ark:- \| \
58 |   vector-sum ark:- $dir/post.${iter}.JOB.vec || {
59 |     echo "Error in getting posteriors for adjusting priors."
60 |     echo "See $dir/log/get_post.${iter}.*.log";
61 |     exit 1;
62 |   }
63 | 
64 | 
65 | $cmd $dir/log/sum_post.${iter}.log \
66 |   vector-sum $dir/post.${iter}.*.vec $dir/post.${iter}.vec || {
67 |     echo "Error in summing posteriors. See $dir/log/sum_post.${iter}.log";
68 |     exit 1;
69 |   }
70 | 
71 | rm -f $dir/post.${iter}.*.vec
72 | 
73 | echo "Re-adjusting priors based on computed posteriors for iter $iter"
74 | $cmd $dir/log/adjust_priors.${iter}.log \
75 |   nnet-adjust-priors $src_model $dir/post.${iter}.vec $dir/${iter}.adj.mdl || {
76 |     echo "Error in adjusting priors. See $dir/log/adjust_priors.${iter}.log";
77 |     exit 1;
78 |   }
79 | 
80 | echo "Done adjusting priors (on $src_model)"
81 | 


--------------------------------------------------------------------------------
/steps/nnet2/convert_nnet1_to_nnet2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014    Johns Hopkins University (Author: Daniel Povey).
 4 | # Apache 2.0.
 5 | 
 6 | # This script converts nnet1 into nnet2 models.
 7 | # Note, it doesn't support all possible types of nnet1 models.
 8 | 
 9 | # Begin configuration section
10 | cleanup=true
11 | cmd=run.pl
12 | # End configuration section.
13 | 
14 | echo "$0 $@"  # Print the command line for logging
15 | 
16 | [ -f ./path.sh ] && . ./path.sh; # source the path.
17 | . parse_options.sh || exit 1;
18 | 
19 | 
20 | if [ $# -ne 2 ]; then
21 |   echo "Usage: $0 [options] <src-nnet1-dir> <dest-nnet2-dir>"
22 |   echo "e.g.: $0 exp/dnn4b_pretrain-dbn_dnn exp/dnn4b_nnet2"
23 |   exit 1;
24 | fi
25 | 
26 | src=$1
27 | dir=$2
28 | 
29 | mkdir -p $dir/log || exit 1;
30 | 
31 | for f in $src/final.mdl $src/final.feature_transform $src/ali_train_pdf.counts; do
32 |   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
33 | done
34 | 
35 | cp $src/phones.txt $dir/phones.txt || exit 1;
36 | $cmd $dir/log/convert_feature_transform.log \
37 |   nnet1-to-raw-nnet $src/final.feature_transform $dir/0.raw || exit 1;
38 | 
39 | 
40 | if [ -f $src/final.nnet ]; then
41 |   echo "$0: $src/final.nnet exists, using it as input."
42 |   $cmd $dir/log/convert_model.log \
43 |     nnet1-to-raw-nnet $src/final.nnet $dir/1.raw || exit 1;
44 | elif [ -f $src/final.dbn ]; then
45 |   echo "$0: $src/final.dbn exists, using it as input."
46 |   num_leaves=$(am-info $src/final.mdl | grep -w pdfs | awk '{print $NF}') || exit 1;
47 |   dbn_output_dim=$(nnet-info exp/dnn4b_pretrain-dbn/6.dbn  | grep component | tail -n 1 | sed s:,::g | awk '{print $NF}') || exit 1;
48 |   [ -z "$dbn_output_dim" ] && exit 1;
49 |   
50 |   cat > $dir/final_layer.conf <<EOF
51 | AffineComponent input-dim=$dbn_output_dim output-dim=$num_leaves learning-rate=0.001
52 | SoftmaxComponent dim=$num_leaves
53 | EOF
54 |   $cmd $dir/log/convert_model.log \
55 |     nnet1-to-raw-nnet $src/final.dbn - \| \
56 |     raw-nnet-concat - "raw-nnet-init $dir/final_layer.conf -|" $dir/1.raw || exit 1;
57 | else
58 |   echo "$0: expected either $src/final.nnet or $src/final.dbn to exist"
59 | fi
60 | 
61 | $cmd $dir/log/append_model.log \
62 |   raw-nnet-concat $dir/0.raw $dir/1.raw $dir/concat.raw || exit 1;
63 | 
64 | $cmd $dir/log/init_model.log \
65 |   nnet-am-init $src/final.mdl $dir/concat.raw $dir/final_noprior.mdl || exit 1;
66 | 
67 | $cmd $dir/log/set_priors.log \
68 |   nnet-adjust-priors $dir/final_noprior.mdl $src/ali_train_pdf.counts $dir/final.mdl || exit 1;
69 | 
70 | if $cleanup; then
71 |   rm $dir/0.raw $dir/1.raw $dir/concat.raw $dir/final_noprior.mdl
72 | fi
73 | 


--------------------------------------------------------------------------------
/steps/nnet2/create_appended_model.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #  Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 4 | #  Apache 2.0.
 5 | 
 6 | # This script is for use with "retrain_fast.sh"; it combines the original model
 7 | # that you trained on top of, with the single layer model you trained, so that
 8 | # you can do joint backpropagation.
 9 | 
10 | # Begin configuration options.
11 | cmd=run.pl
12 | # End configuration options.
13 | 
14 | 
15 | echo "$0 $@"  # Print the command line for logging
16 | 
17 | if [ -f path.sh ]; then . ./path.sh; fi
18 | . parse_options.sh || exit 1;
19 | 
20 | if [ $# != 3 ]; then
21 |   echo "Usage: $0 <original-nnet-dir> <new-nnet-dir> <combined-nnet-dir>"
22 |   echo "where <original-nnet-dir> will typically be a normal neural net from another corpus,"
23 |   echo "and <new-nnet-dir> will usually be a single-layer neural net trained on top of it by"
24 |   echo "dumping the activations (e.g. using steps/online/nnet2/dump_nnet_activations.sh, I"
25 |   echo "think no such script exists for non-online), and then training using"
26 |   echo "steps/nnet2/retrain_fast.sh."
27 |   echo "e.g.: $0 ../../swbd/s5b/exp/nnet2_online/nnet_gpu_online exp/nnet2_swbd_online/nnet_gpu_online exp/nnet2_swbd_online/nnet_gpu_online_combined"
28 | fi
29 | 
30 | 
31 | src1=$1
32 | src2=$2
33 | dir=$3
34 | 
35 | for f in $src1/final.mdl $src2/tree $src2/final.mdl; do
36 |    [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1;
37 | done
38 | 
39 | 
40 | mkdir -p $dir/log
41 | 
42 | info=$dir/nnet_info
43 | nnet-am-info $src1/final.mdl >$info
44 | nc=$(grep num-components $info | awk '{print $2}');
45 | if grep SumGroupComponent $info >/dev/null; then 
46 |   nc_truncate=$[$nc-3]  # we did mix-up: remove AffineComponent,
47 |                         # SumGroupComponent, SoftmaxComponent
48 | else
49 |                         # we didn't mix-up:
50 |   nc_truncate=$[$nc-2]  # remove AffineComponent, SoftmaxComponent
51 | fi
52 | 
53 | $cmd $dir/log/get_raw_nnet.log \
54 |  nnet-to-raw-nnet --truncate=$nc_truncate $src1/final.mdl $dir/first_nnet.raw || exit 1;
55 | 
56 | $cmd $dir/log/append_nnet.log \
57 |   nnet-insert --randomize-next-component=false --insert-at=0 \
58 |   $src2/final.mdl $dir/first_nnet.raw $dir/final.mdl || exit 1;
59 | 
60 | $cleanup && rm $dir/first_nnet.raw
61 | 
62 | # Copy the tree etc., 
63 | 
64 | cp $src2/tree $dir || exit 1;
65 | 
66 | # Copy feature-related things from src1 where we built the initial model.
67 | # Note: if you've done anything like mess with the feature-extraction configs,
68 | # or changed the feature type, you have to keep track of that yourself.
69 | for f in final.mat cmvn_opts splice_opts; do
70 |   if [ -f $src1/$f ]; then
71 |     cp $src1/$f $dir || exit 1;
72 |   fi
73 | done
74 | 
75 | echo "$0: created appended model in $dir"
76 | 


--------------------------------------------------------------------------------
/steps/nnet2/get_num_frames.sh:
--------------------------------------------------------------------------------
1 | ../../utils/data/get_num_frames.sh


--------------------------------------------------------------------------------
/steps/nnet2/relabel_egs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014  Vimal Manohar. Apache 2.0.
 4 | # This script, which will generally be called during the neural-net training
 5 | # relabels existing examples with better labels obtained by realigning the data
 6 | # with the current nnet model
 7 | 
 8 | # Begin configuration section
 9 | cmd=run.pl
10 | stage=0
11 | extra_egs=        # Names of additional egs files that need to relabelled
12 |                   # other than egs.*.*.ark, combine.egs, train_diagnostic.egs,
13 |                   # valid_diagnostic.egs
14 | iter=final
15 | echo "$0 $@"  # Print the command line for logging
16 | 
17 | if [ -f path.sh ]; then . ./path.sh; fi
18 | . parse_options.sh || exit 1;
19 | 
20 | if [ $# != 3 ]; then
21 |   echo "Usage: steps/nnet2/relabel_egs.sh [opts] <ali-dir> <egs-in-dir> <egs-out-dir>"
22 |   echo "  e.g: steps/nnet2/relabel_egs.sh exp/tri6_nnet/ali_1.5 exp/tri6_nnet/egs exp/tri6_nnet/egs_1.5"
23 |   echo ""
24 |   echo "Main options (for others, see top of script file)"
25 |   echo "  --config <config-file>                           # config file containing options"
26 |   echo "  --cmd (utils/run.pl;utils/queue.pl <queue opts>) # how to run jobs."
27 | 
28 |   exit 1;
29 | fi
30 | 
31 | alidir=$1
32 | egs_in_dir=$2
33 | dir=$3
34 | 
35 | model=$alidir/$iter.mdl
36 | 
37 | # Check some files.
38 | 
39 | for f in $alidir/ali.1.gz $model $egs_in_dir/egs.1.0.ark $egs_in_dir/combine.egs \
40 |   $egs_in_dir/valid_diagnostic.egs $egs_in_dir/train_diagnostic.egs \
41 |   $egs_in_dir/num_jobs_nnet $egs_in_dir/iters_per_epoch $egs_in_dir/samples_per_iter; do
42 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
43 | done
44 | 
45 | num_jobs_nnet=`cat $egs_in_dir/num_jobs_nnet`
46 | iters_per_epoch=`cat $egs_in_dir/iters_per_epoch`
47 | samples_per_iter_real=`cat $egs_in_dir/samples_per_iter`
48 | num_jobs_align=`cat $alidir/num_jobs`
49 | 
50 | mkdir -p $dir/log
51 | 
52 | echo $num_jobs_nnet > $dir/num_jobs_nnet
53 | echo $iters_per_epoch > $dir/iters_per_epoch
54 | echo $samples_per_iter_real > $dir/samples_per_iter
55 | 
56 | alignments=$(for n in $(seq $num_jobs_align); do echo -n "$alidir/ali.$n.gz "; done)
57 | 
58 | if [ $stage -le 0 ]; then
59 |   egs_in=
60 |   egs_out=
61 |   for x in `seq 1 $num_jobs_nnet`; do
62 |     for y in `seq 0 $[$iters_per_epoch-1]`; do
63 |       utils/create_data_link.pl $dir/egs.$x.$y.ark
64 |       if [ $x -eq 1 ]; then
65 |         egs_in="$egs_in ark:$egs_in_dir/egs.JOB.$y.ark "
66 |         egs_out="$egs_out ark:$dir/egs.JOB.$y.ark "
67 |       fi
68 |     done
69 |   done
70 | 
71 |   $cmd JOB=1:$num_jobs_nnet $dir/log/relabel_egs.JOB.log \
72 |     nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \
73 |     $egs_in $egs_out || exit 1
74 | fi
75 | 
76 | if [ $stage -le 1 ]; then
77 |   egs_in=
78 |   egs_out=
79 |   for x in combine.egs valid_diagnostic.egs train_diagnostic.egs $extra_egs; do
80 |     utils/create_data_link.pl $dir/$x
81 |     egs_in="$egs_in ark:$egs_in_dir/$x"
82 |     egs_out="$egs_out ark:$dir/$x"
83 |   done
84 | 
85 |   $cmd $dir/log/relabel_egs_extra.log \
86 |     nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \
87 |     $egs_in $egs_out || exit 1
88 | fi
89 | 
90 | echo "$0: Finished relabeling training examples"
91 | 


--------------------------------------------------------------------------------
/steps/nnet2/relabel_egs2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014  Vimal Manohar.
 4 | #           2014  Johns Hopkins University (author: Daniel Povey)
 5 | # Apache 2.0.
 6 | #
 7 | # This script, which will generally be called during the neural-net training 
 8 | # relabels existing examples with better labels obtained by realigning the data
 9 | # with the current nnet model.
10 | # This script is as relabel_egs.sh, but is adapted to work with the newer
11 | # egs format that is written by get_egs2.sh
12 | 
13 | # Begin configuration section
14 | cmd=run.pl
15 | stage=0
16 | extra_egs=        # Names of additional egs files that need to relabelled 
17 |                   # other than egs.*.*.ark, combine.egs, train_diagnostic.egs,
18 |                   # valid_diagnostic.egs
19 | iter=final
20 | parallel_opts=
21 | echo "$0 $@"  # Print the command line for logging
22 | 
23 | if [ -f path.sh ]; then . ./path.sh; fi
24 | . parse_options.sh || exit 1;
25 | 
26 | if [ $# != 3 ]; then
27 |   echo "Usage: steps/nnet2/relabel_egs.sh [opts] <ali-dir> <egs-in-dir> <egs-out-dir>"
28 |   echo "  e.g: steps/nnet2/relabel_egs.sh exp/tri6_nnet/ali_1.5 exp/tri6_nnet/egs exp/tri6_nnet/egs_1.5"
29 |   echo ""
30 |   echo "Main options (for others, see top of script file)"
31 |   echo "  --config <config-file>                           # config file containing options"
32 |   echo "  --cmd (utils/run.pl;utils/queue.pl <queue opts>) # how to run jobs."
33 | 
34 |   exit 1;
35 | fi
36 | 
37 | alidir=$1
38 | egs_in_dir=$2
39 | dir=$3
40 | 
41 | model=$alidir/$iter.mdl
42 | 
43 | # Check some files.
44 | 
45 | [ -f $egs_in_dir/iters_per_epoch ] && \
46 |   echo "$0: this script does not work with the old egs directory format" && exit 1;
47 | 
48 | for f in $alidir/ali.1.gz $model $egs_in_dir/egs.1.ark $egs_in_dir/combine.egs \
49 |   $egs_in_dir/valid_diagnostic.egs $egs_in_dir/train_diagnostic.egs \
50 |   $egs_in_dir/info/num_archives; do
51 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
52 | done
53 | 
54 | num_archives=$(cat $egs_in_dir/info/num_archives) || exit 1;
55 | num_jobs_align=$(cat $alidir/num_jobs) || exit 1;
56 | 
57 | mkdir -p $dir/log
58 | 
59 | mkdir -p $dir/info
60 | cp -r $egs_in_dir/info/*  $dir/info
61 | 
62 | alignments=$(for n in $(seq $num_jobs_align); do echo $alidir/ali.$n.gz; done)
63 | 
64 | if [ $stage -le 0 ]; then
65 |   for x in $(seq $num_archives); do
66 |     # if $dir/storage exists, make the soft links that we'll
67 |     # use to distribute the data across machines
68 |     utils/create_data_link.pl $dir/egs.$x.ark
69 |   done
70 | 
71 |   $cmd $parallel_opts JOB=1:$num_archives $dir/log/relabel_egs.JOB.log \
72 |     nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \
73 |      ark:$egs_in_dir/egs.JOB.ark ark:$dir/egs.JOB.ark || exit 1
74 | fi
75 | 
76 | if [ $stage -le 1 ]; then
77 |   egs_in=
78 |   egs_out=
79 |   for x in combine.egs valid_diagnostic.egs train_diagnostic.egs $extra_egs; do
80 |     utils/create_data_link.pl $dir/$x
81 |     egs_in="$egs_in ark:$egs_in_dir/$x"
82 |     egs_out="$egs_out ark:$dir/$x"
83 |   done
84 | 
85 |   $cmd $dir/log/relabel_egs_extra.log \
86 |     nnet-relabel-egs "ark:gunzip -c $alignments | ali-to-pdf $model ark:- ark:- |" \
87 |     $egs_in $egs_out || exit 1
88 | fi
89 | 
90 | echo "$0: Finished relabeling training examples"
91 | 


--------------------------------------------------------------------------------
/steps/nnet2/remove_egs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014  Johns Hopkins University (Author: Daniel Povey).  
 4 | # Apache 2.0.
 5 | 
 6 | # This script removes the examples in an egs/ directory, e.g.
 7 | # steps/nnet2/remove_egs.sh exp/nnet4b/egs/
 8 | # We give it its own script because we need to be careful about
 9 | # things that are soft links to something in storage/ (i.e. remove the
10 | # data that's linked to as well as the soft link), and we want to not
11 | # delete the examples if someone has done "touch $dir/egs/.nodelete".
12 | 
13 | 
14 | if [ $# != 1 ]; then
15 |   echo "Usage: $0 <egs-dir>"
16 |   echo "e.g.: $0 data/nnet4b/egs/"
17 |   echo "e.g.: $0 data/nnet4b_mpe/degs/"
18 |   echo "This script is usually equivalent to 'rm <egs-dir>/egs.* <egs-dir>/degs.*' but it follows"
19 |   echo "soft links to <egs-dir>/storage/; and it avoids deleting anything in the directory if"
20 |   echo "someone did 'touch <egs-dir>/.nodelete"
21 |   exit 1;
22 | fi
23 | 
24 | egs=$1
25 | 
26 | if [ ! -d $egs ]; then
27 |   echo "$0: expected directory $egs to exist"
28 |   exit 1;
29 | fi
30 | 
31 | if [ -f $egs/.nodelete ]; then
32 |   echo "$0: not deleting egs in $egs since $egs/.nodelete exists"
33 |   exit 0;
34 | fi
35 | 
36 | 
37 | 
38 | for f in $egs/egs.*.ark $egs/degs.*.ark $egs/cegs.*.ark; do
39 |   if [ -L $f ]; then
40 |     rm $(dirname $f)/$(readlink $f)  # this will print a warning if it fails.
41 |   fi
42 |   rm $f 2>/dev/null
43 | done
44 | 
45 | 
46 | echo "$0: Finished deleting examples in $egs"
47 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Generate a topology file.  This allows control of the number of states in the
 6 | # non-silence HMMs, and in the silence HMMs.  This is a modified version of
 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
 8 | # believe should be useful in the 'chain' model.  Note: right now it doesn't
 9 | # have any real options, and it treats silence and nonsilence the same.  The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 | 
13 | if (@ARGV != 2) {
14 |   print STDERR "Usage: utils/gen_topo.pl <colon-separated-nonsilence-phones> <colon-separated-silence-phones>\n";
15 |   print STDERR "e.g.:  utils/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n";
16 |   exit (1);
17 | }
18 | 
19 | ($nonsil_phones, $sil_phones) = @ARGV;
20 | 
21 | $nonsil_phones =~ s/:/ /g;
22 | $sil_phones =~ s/:/ /g;
23 | $nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n";
24 | $sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n";
25 | 
26 | print "<Topology>\n";
27 | print "<TopologyEntry>\n";
28 | print "<ForPhones>\n";
29 | print "$nonsil_phones $sil_phones\n";
30 | print "</ForPhones>\n";
31 | # The next two lines may look like a bug, but they are as intended.  State 0 has
32 | # no self-loop, it happens exactly once.  And it can go either to state 1 (with
33 | # a self-loop) or to state 2, so we can have zero or more instances of state 1
34 | # following state 0.
35 | # We make the transition-probs 0.5 so they normalize, to keep the code happy.
36 | # In fact, we always set the transition probability scale to 0.0 in the 'chain'
37 | # code, so they are never used.
38 | print "<State> 0 <PdfClass> 0 <Transition> 1 0.5 <Transition> 2 0.5 </State>\n";
39 | print "<State> 1 <PdfClass> 1 <Transition> 1 0.5 <Transition> 2 0.5 </State>\n";
40 | print "<State> 2 </State>\n";
41 | print "</TopologyEntry>\n";
42 | print "</Topology>\n";
43 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Generate a topology file.  This allows control of the number of states in the
 6 | # non-silence HMMs, and in the silence HMMs.  This is a modified version of
 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
 8 | # believe should be useful in the 'chain' model.  Note: right now it doesn't
 9 | # have any real options, and it treats silence and nonsilence the same.  The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 | 
13 | from __future__ import print_function
14 | import argparse
15 | 
16 | 
17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
18 |                                              "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
19 |                                              "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
20 |                                  epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
21 | parser.add_argument("nonsilence_phones", type=str,
22 |                     help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
23 | parser.add_argument("silence_phones", type=str,
24 |                     help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
25 | 
26 | args = parser.parse_args()
27 | 
28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
30 | all_phones = silence_phones +  nonsilence_phones
31 | 
32 | print("<Topology>")
33 | print("<TopologyEntry>")
34 | print("<ForPhones>")
35 | print(" ".join([str(x) for x in all_phones]))
36 | print("</ForPhones>")
37 | # The next two lines may look like a bug, but they are as intended.  State 0 has
38 | # no self-loop, it happens exactly once.  And it can go either to state 1 (with
39 | # a self-loop) or to state 2, so we can have zero or more instances of state 1
40 | # following state 0.
41 | # We make the transition-probs 0.5 so they normalize, to keep the code happy.
42 | # In fact, we always set the transition probability scale to 0.0 in the 'chain'
43 | # code, so they are never used.
44 | print("<State> 0 <PdfClass> 0 <Transition> 1 0.5 <Transition> 2 0.5 </State>")
45 | print("<State> 1 <PdfClass> 1 <Transition> 1 0.5 <Transition> 2 0.5 </State>")
46 | print("<State> 2 </State>")
47 | print("</TopologyEntry>")
48 | print("</Topology>")
49 | 
50 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Generate a topology file.  This allows control of the number of states in the
 6 | # non-silence HMMs, and in the silence HMMs.  This is a modified version of
 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
 8 | # believe should be useful in the 'chain' model.  Note: right now it doesn't
 9 | # have any real options, and it treats silence and nonsilence the same.  The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 | 
13 | from __future__ import print_function
14 | import argparse
15 | 
16 | 
17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
18 |                                              "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
19 |                                              "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
20 |                                  epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
21 | parser.add_argument("nonsilence_phones", type=str,
22 |                     help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
23 | parser.add_argument("silence_phones", type=str,
24 |                     help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
25 | 
26 | args = parser.parse_args()
27 | 
28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
30 | all_phones = silence_phones +  nonsilence_phones
31 | 
32 | print("<Topology>")
33 | print("<TopologyEntry>")
34 | print("<ForPhones>")
35 | print(" ".join([str(x) for x in all_phones]))
36 | print("</ForPhones>")
37 | 
38 | # the pdf-classes are as follows:
39 | #  pdf-class 0 is in a 1-frame sequence, the initial and final state.
40 | #  pdf-class 1 is in a sequence with >=3 frames, the 'middle' states.  (important that
41 | #   it be numbered 1, which is the default list of pdf-classes used in 'cluster-phones').
42 | #  pdf-class 2 is the initial-state in a sequence with >= 2 frames.
43 | #  pdf-class 3 is the final-state in a sequence with >= 2 frames.
44 | # state 0 is nonemitting in this topology.
45 | 
46 | print("<State> 0 <Transition> 1 0.5 <Transition> 2 0.5 </State>")  # initial nonemitting state.
47 | print("<State> 1 <PdfClass> 0 <Transition> 5 1.0 </State>")  # 1-frame sequence.
48 | print("<State> 2 <PdfClass> 2 <Transition> 3 0.5 <Transition> 4 0.5 </State>")  # 2 or more frames
49 | print("<State> 3 <PdfClass> 1 <Transition> 3 0.5 <Transition> 4 0.5 </State>")  # 3 or more frames
50 | print("<State> 4 <PdfClass> 3 <Transition> 5 1.0 </State>") # 2 or more frames.
51 | print("<State> 5 </State>")  # final nonemitting state
52 | 
53 | print("</TopologyEntry>")
54 | print("</Topology>")
55 | 
56 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Generate a topology file.  This allows control of the number of states in the
 6 | # non-silence HMMs, and in the silence HMMs.  This is a modified version of
 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
 8 | # believe should be useful in the 'chain' model.  Note: right now it doesn't
 9 | # have any real options, and it treats silence and nonsilence the same.  The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 | 
13 | from __future__ import print_function
14 | import argparse
15 | 
16 | 
17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
18 |                                              "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
19 |                                              "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
20 |                                  epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
21 | parser.add_argument("nonsilence_phones", type=str,
22 |                     help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
23 | parser.add_argument("silence_phones", type=str,
24 |                     help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
25 | 
26 | args = parser.parse_args()
27 | 
28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
30 | all_phones = silence_phones +  nonsilence_phones
31 | 
32 | print("<Topology>")
33 | print("<TopologyEntry>")
34 | print("<ForPhones>")
35 | print(" ".join([str(x) for x in all_phones]))
36 | print("</ForPhones>")
37 | print("<State> 0 <PdfClass> 0 <Transition> 0 0.5 <Transition> 1 0.5 </State>")
38 | print("<State> 1 </State>")
39 | print("</TopologyEntry>")
40 | print("</Topology>")
41 | 
42 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo4.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Generate a topology file.  This allows control of the number of states in the
 6 | # non-silence HMMs, and in the silence HMMs.  This is a modified version of
 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
 8 | # believe should be useful in the 'chain' model.  Note: right now it doesn't
 9 | # have any real options, and it treats silence and nonsilence the same.  The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 | 
13 | from __future__ import print_function
14 | import argparse
15 | 
16 | 
17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
18 |                                              "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
19 |                                              "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
20 |                                  epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
21 | parser.add_argument("nonsilence_phones", type=str,
22 |                     help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
23 | parser.add_argument("silence_phones", type=str,
24 |                     help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
25 | 
26 | args = parser.parse_args()
27 | 
28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
30 | all_phones = silence_phones +  nonsilence_phones
31 | 
32 | print("<Topology>")
33 | print("<TopologyEntry>")
34 | print("<ForPhones>")
35 | print(" ".join([str(x) for x in all_phones]))
36 | print("</ForPhones>")
37 | # state 0 is obligatory (occurs once)
38 | print("<State> 0 <PdfClass> 0 <Transition> 1 0.3333 <Transition> 2 0.3333 <Transition> 3 0.3333 </State> ")
39 | # state 1 is used only when >2 frames
40 | print("<State> 1 <PdfClass> 1 <Transition> 1 0.5 <Transition> 2 0.5 </State>")
41 | # state 2 is used only when >=2 frames (and occurs once)
42 | print("<State> 2 <PdfClass> 2 <Transition> 3 1.0 </State>")
43 | print("<State> 3 </State>")  # final nonemitting state
44 | print("</TopologyEntry>")
45 | print("</Topology>")
46 | 
47 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo5.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Generate a topology file.  This allows control of the number of states in the
 6 | # non-silence HMMs, and in the silence HMMs.  This is a modified version of
 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
 8 | # believe should be useful in the 'chain' model.  Note: right now it doesn't
 9 | # have any real options, and it treats silence and nonsilence the same.  The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 | 
13 | from __future__ import print_function
14 | import argparse
15 | 
16 | 
17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
18 |                                              "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
19 |                                              "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
20 |                                  epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
21 | parser.add_argument("nonsilence_phones", type=str,
22 |                     help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
23 | parser.add_argument("silence_phones", type=str,
24 |                     help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
25 | 
26 | args = parser.parse_args()
27 | 
28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
30 | all_phones = silence_phones +  nonsilence_phones
31 | 
32 | print("<Topology>")
33 | print("<TopologyEntry>")
34 | print("<ForPhones>")
35 | print(" ".join([str(x) for x in all_phones]))
36 | print("</ForPhones>")
37 | # state 0 is nonemitting
38 | print("<State> 0 <Transition> 1 0.5 <Transition> 2 0.5 </State>")
39 | # state 1 is for when we traverse it in 1 state
40 | print("<State> 1 <PdfClass> 0 <Transition> 4 1.0 </State>")
41 | # state 2 is for when we traverse it in >1 state, for the first state.
42 | print("<State> 2 <PdfClass> 2 <Transition> 3 1.0 </State>")
43 | # state 3 is for the self-loop.  Use pdf-class 1 here so that the default
44 | # phone-class clustering (which uses only pdf-class 1 by default) gets only
45 | # stats from longer phones.
46 | print("<State> 3 <PdfClass> 1 <Transition> 3 0.5 <Transition> 4 0.5 </State>")
47 | print("<State> 4 </State>")
48 | print("</TopologyEntry>")
49 | print("</Topology>")
50 | 
51 | 


--------------------------------------------------------------------------------
/steps/nnet3/get_successful_models.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import print_function
 4 | import re
 5 | import os
 6 | import argparse
 7 | import sys
 8 | import warnings
 9 | import copy
10 | import glob
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     # we add compulsory arguments as named arguments for readability
15 |     parser = argparse.ArgumentParser(description="Create a list of models suitable for averaging "
16 |                                                  "based on their train objf values.",
17 |                                      epilog="See steps/nnet3/lstm/train.sh for example.")
18 | 
19 |     parser.add_argument("--difference-threshold", type=float,
20 |                         help="The threshold for discarding models, "
21 |                         "when objf of the model differs more than this value from the best model "
22 |                         "it is discarded.",
23 |                         default=1.0)
24 | 
25 |     parser.add_argument("num_models", type=int,
26 |                         help="Number of models.")
27 | 
28 |     parser.add_argument("logfile_pattern", type=str,
29 |                         help="Pattern for identifying the log-file names. "
30 |                         "It specifies the entire log file name, except for the job number, "
31 |                         "which is replaced with '%'. e.g. exp/nneet3/tdnn_sp/log/train.4.%.log")
32 | 
33 | 
34 |     args = parser.parse_args()
35 | 
36 |     assert(args.num_models > 0)
37 | 
38 |     parse_regex = re.compile("LOG .* Overall average objective function for 'output' is ([0-9e.\-+]+) over ([0-9e.\-+]+) frames")
39 |     loss = []
40 |     for i in range(args.num_models):
41 |         model_num = i + 1
42 |         logfile = re.sub('%', str(model_num), args.logfile_pattern)
43 |         lines = open(logfile, 'r').readlines()
44 |         this_loss = -100000
45 |         for line_num in range(1, len(lines) + 1):
46 |             # we search from the end as this would result in
47 |             # lesser number of regex searches. Python regex is slow !
48 |             mat_obj = parse_regex.search(lines[-1*line_num])
49 |             if mat_obj is not None:
50 |                 this_loss = float(mat_obj.groups()[0])
51 |                 break;
52 |         loss.append(this_loss);
53 |     max_index = loss.index(max(loss))
54 |     accepted_models = []
55 |     for i in range(args.num_models):
56 |         if (loss[max_index] - loss[i]) <= args.difference_threshold:
57 |             accepted_models.append(i+1)
58 | 
59 |     model_list = " ".join(map(lambda x: str(x), accepted_models))
60 |     print(model_list)
61 | 
62 |     if len(accepted_models) != args.num_models:
63 |         print("WARNING: Only {0}/{1} of the models have been accepted for averaging, based on log files {2}.".format(len(accepted_models), args.num_models, args.logfile_pattern), file=sys.stderr)
64 |         print("         Using models {0}".format(model_list), file=sys.stderr)
65 | 


--------------------------------------------------------------------------------
/steps/nnet3/nnet3_to_dot.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # script showing use of nnet3_to_dot.py
 4 | # Copyright 2015  Johns Hopkins University (Author: Vijayaditya Peddinti).
 5 | 
 6 | # Begin configuration section.
 7 | component_attributes="name,type"
 8 | node_prefixes=""
 9 | info_bin=nnet3-am-info
10 | echo "$0 $@"  # Print the command line for logging
11 | 
12 | [ -f ./path.sh ] && . ./path.sh; # source the path.
13 | . parse_options.sh || exit 1;
14 | 
15 | if [ $# != 3 ]; then
16 |   echo "Usage: $0 [opts] <nnet3-mdl-file> <output-dot-file> <output-png-file>"
17 |   echo " e.g.: $0 exp/sdm1/nnet3/lstm_sp/0.mdl lstm.dot lstm.png"
18 |   echo ""
19 |   echo "Main options (for others, see top of script file)"
20 |   echo "  --component-attributes <string|name,type>     # attributes to be printed in nnet3 components"
21 |   echo "  --node-prefixes <string|Lstm1,Lstm2>          # list of prefixes. Nnet3 components/component-nodes with the same prefix"
22 |   echo "                                                # will be clustered together in the dot-graph"
23 | 
24 | 
25 |   exit 1;
26 | fi
27 | 
28 | model=$1
29 | dot_file=$2
30 | output_file=$3
31 | 
32 | attr=${node_prefixes:+ --node-prefixes "$node_prefixes"}
33 | $info_bin $model | \
34 |   steps/nnet3/dot/nnet3_to_dot.py \
35 |     --component-attributes "$component_attributes" \
36 |     $attr $dot_file
37 | 
38 | command -v dot >/dev/null 2>&1 || { echo >&2 "This script requires dot but it's not installed. Please compile $dot_file with dot"; exit 1; }
39 | dot -Tpdf $dot_file -o $output_file
40 | 


--------------------------------------------------------------------------------
/steps/online/nnet2/align.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright      2012  Brno University of Technology (Author: Karel Vesely)
 3 | #           2013-2014  Johns Hopkins University (Author: Daniel Povey)
 4 | # Apache 2.0
 5 | 
 6 | # Computes training alignments using DNN.  This takes as input a directory
 7 | # prepared as for online-nnet2 decoding (e.g. by
 8 | # steps/online/nnet2/prepare_online_decoding.sh), and it computes the features
 9 | # directly from the wav.scp instead of relying on features dumped on disk;
10 | # this avoids the hassle of having to dump suitably matched features.
11 | 
12 | 
13 | # Begin configuration section.  
14 | nj=4
15 | cmd=run.pl
16 | # Begin configuration.
17 | scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
18 | beam=10
19 | retry_beam=40
20 | iter=final
21 | use_gpu=no
22 | 
23 | echo "$0 $@"  # Print the command line for logging
24 | 
25 | [ -f path.sh ] && . ./path.sh # source the path.
26 | . parse_options.sh || exit 1;
27 | 
28 | if [ $# != 4 ]; then
29 |    echo "Usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>"
30 |    echo "e.g.: $0 data/train data/lang exp/nnet4 exp/nnet4_ali"
31 |    echo "main options (for others, see top of script file)"
32 |    echo "  --config <config-file>                           # config containing options"
33 |    echo "  --nj <nj>                                        # number of parallel jobs"
34 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
35 |    exit 1;
36 | fi
37 | 
38 | data=$1
39 | lang=$2
40 | srcdir=$3
41 | dir=$4
42 | 
43 | oov=`cat $lang/oov.int` || exit 1;
44 | mkdir -p $dir/log
45 | echo $nj > $dir/num_jobs
46 | sdata=$data/split$nj
47 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
48 | 
49 | 
50 | for f in $srcdir/tree $srcdir/${iter}.mdl $data/wav.scp $lang/L.fst \
51 |       $srcdir/conf/online_nnet2_decoding.conf; do
52 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
53 | done
54 | 
55 | utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1;
56 | cp $srcdir/phones.txt $dir || exit 1;
57 | cp $srcdir/{tree,${iter}.mdl} $dir || exit 1;
58 | 
59 | grep -v '^--endpoint' $srcdir/conf/online_nnet2_decoding.conf >$dir/feature.conf || exit 1;
60 | 
61 | 
62 | if [ -f $data/segments ]; then
63 |   # note: in the feature extraction, because the program online2-wav-dump-features is sensitive to the
64 |   # previous utterances within a speaker, we do the filtering after extracting the features.
65 |   echo "$0 [info]: segments file exists: using that."
66 |   feats="ark,s,cs:extract-segments scp:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt ark,s,cs:- ark:- |"
67 | else
68 |   echo "$0 [info]: no segments file exists, using wav.scp."
69 |   feats="ark,s,cs:online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt scp:$sdata/JOB/wav.scp ark:- |"
70 | fi
71 | 
72 | echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
73 | 
74 | tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
75 | 
76 | $cmd JOB=1:$nj $dir/log/align.JOB.log \
77 |   compile-train-graphs $dir/tree $srcdir/${iter}.mdl  $lang/L.fst "$tra" ark:- \| \
78 |   nnet-align-compiled $scale_opts --use-gpu=$use_gpu --beam=$beam --retry-beam=$retry_beam \
79 |     $srcdir/${iter}.mdl ark:- "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
80 | 
81 | echo "$0: done aligning data."
82 | 
83 | 


--------------------------------------------------------------------------------
/steps/online/nnet2/copy_data_dir.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2013-2014  Johns Hopkins University (author: Daniel Povey)
 4 | # Apache 2.0
 5 | 
 6 | # Warning, this script is deprecated, please use utils/data/modify_speaker_info.sh
 7 | 
 8 | # This script is as utils/copy_data_dir.sh in that it copies a data-dir,
 9 | # but it supports the --utts-per-spk-max option.  If nonzero, it modifies
10 | # the utt2spk and spk2utt files by splitting each speaker into multiple
11 | # versions, so that each speaker has no more than --utts-per-spk-max
12 | # utterances.
13 | 
14 | # begin configuration section
15 | utts_per_spk_max=-1
16 | # end configuration section
17 | 
18 | . utils/parse_options.sh
19 | 
20 | if [ $# != 2 ]; then
21 |   echo "Usage: "
22 |   echo "  $0 [options] <srcdir> <destdir>"
23 |   echo "e.g.:"
24 |   echo " $0 --utts-per-spk-max 2 data/train data/train-max2"
25 |   echo "Options"
26 |   echo "   --utts-per-spk-max <n>  # number of utterances per speaker maximum,"
27 |   echo "                           # default -1 (meaning no maximum).  E.g. 2."
28 |   exit 1;
29 | fi
30 | 
31 | 
32 | echo "$0: this script is deprecated, please use utils/data/modify_speaker_info.sh."
33 | 
34 | export LC_ALL=C
35 | 
36 | srcdir=$1
37 | destdir=$2
38 | 
39 | if [ ! -f $srcdir/utt2spk ]; then
40 |   echo "$0: no such file $srcdir/utt2spk"
41 |   exit 1;
42 | fi
43 | 
44 | set -e;
45 | set -o pipefail
46 | 
47 | mkdir -p $destdir
48 | 
49 | 
50 | if [ "$utts_per_spk_max" != -1 ]; then
51 |   # create spk2utt file with reduced number of utterances per speaker.
52 |   awk -v max=$utts_per_spk_max '{ n=2; count=0;
53 |     while(n<=NF) {
54 |       int_max=int(max)+ (rand() < (max-int(max))?1:0);
55 |       nmax=n+int_max; count++; printf("%s-%06x", $1, count);
56 |       for (;n<nmax&&n<=NF; n++) printf(" %s", $n); print "";} }' \
57 |    <$srcdir/spk2utt >$destdir/spk2utt
58 |   utils/spk2utt_to_utt2spk.pl <$destdir/spk2utt >$destdir/utt2spk
59 | 
60 |   if [ -f $srcdir/cmvn.scp ]; then
61 |     # below, the first apply_map command outputs a cmvn.scp indexed by utt;
62 |     # the second one outputs a cmvn.scp indexed by new speaker-id.
63 |     utils/apply_map.pl -f 2 $srcdir/cmvn.scp <$srcdir/utt2spk | \
64 |       utils/apply_map.pl -f 1 $destdir/utt2spk | sort | uniq > $destdir/cmvn.scp
65 |     echo "$0: mapping cmvn.scp, but you may want to recompute it if it's needed,"
66 |     echo " as it would probably change."
67 |   fi
68 |   if [ -f $srcdir/spk2gender ]; then
69 |     utils/apply_map.pl -f 2 $srcdir/spk2gender <$srcdir/utt2spk | \
70 |       utils/apply_map.pl -f 1 $destdir/utt2spk | sort | uniq >$destdir/spk2gender
71 |   fi
72 | else
73 |   cp $srcdir/spk2utt $srcdir/utt2spk $destdir/
74 |   [ -f $srcdir/spk2gender ] && cp $srcdir/spk2gender $destdir/
75 |   [ -f $srcdir/cmvn.scp ] && cp $srcdir/cmvn.scp $destdir/
76 | fi
77 | 
78 | 
79 | for f in feats.scp segments wav.scp reco2file_and_channel text stm glm ctm; do
80 |   [ -f $srcdir/$f ] && cp $srcdir/$f $destdir/
81 | done
82 | 
83 | echo "$0: copied data from $srcdir to $destdir, with --utts-per-spk-max $utts_per_spk_max"
84 | opts=
85 | [ ! -f $srcdir/feats.scp ] && opts="--no-feats"
86 | [ ! -f $srcdir/text ] && opts="$opts --no-text"
87 | [ ! -f $srcdir/wav.scp ] && opts="$opts --no-wav"
88 | 
89 | utils/validate_data_dir.sh $opts $destdir
90 | 


--------------------------------------------------------------------------------
/steps/online/nnet2/prepare_online_decoding_transfer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 4 | # Apache 2.0
 5 | 
 6 | # This is as prepare_online_decoding.sh, but for transfer learning-- the case where
 7 | # you have an existing online-decoding directory where you have all the feature
 8 | # stuff, that you don't want to change, but 
 9 | 
10 | # Begin configuration.
11 | stage=0 # This allows restarting after partway, when something when wrong.
12 | cmd=run.pl
13 | iter=final
14 | # End configuration.
15 | 
16 | echo "$0 $@"  # Print the command line for logging
17 | 
18 | [ -f path.sh ] && . ./path.sh;
19 | . parse_options.sh || exit 1;
20 | 
21 | if [ $# -ne 4 ]; then    
22 |   echo "Usage: $0 [options] <orig-nnet-online-dir> <new-lang-dir> <new-nnet-dir> <new-nnet-online-dir>"
23 |   echo "e.g.: $0 exp_other/nnet2_online/nnet_a_online data/lang exp/nnet2_online/nnet_a exp/nnet2_online/nnet_a_online"
24 |   echo "main options (for others, see top of script file)"
25 |   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
26 |   echo "  --config <config-file>                           # config containing options"
27 |   echo "  --stage <stage>                                  # stage to do partial re-run from."
28 |   exit 1;
29 | fi
30 | 
31 | online_src=$1
32 | lang=$2
33 | nnet_src=$3
34 | dir=$4
35 | 
36 | for f in $online_src/conf/online_nnet2_decoding.conf $nnet_src/final.mdl $nnet_src/tree $lang/words.txt; do
37 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
38 | done
39 | 
40 | 
41 | dir_as_given=$dir
42 | dir=$(readlink -f $dir) # Convert $dir to an absolute pathname, so that the
43 |                         # configuration files we write will contain absolute
44 |                         # pathnames.
45 | mkdir -p $dir/conf $dir/log
46 | 
47 | utils/lang/check_phones_compatible.sh $lang/phones.txt $nnet_src/phones.txt || exit 1;
48 | cp $nnet_src/phones.txt $dir || exit 1;
49 | 
50 | cp $nnet_src/tree $dir/ || exit 1;
51 | 
52 | cp $nnet_src/$iter.mdl $dir/ || exit 1;
53 | 
54 | 
55 | # There are a bunch of files that we will need to copy from $online_src, because
56 | # we're aiming to have one self-contained directory that has everything in it.
57 | mkdir -p $dir/ivector_extractor
58 | cp -r $online_src/ivector_extractor/* $dir/ivector_extractor
59 | 
60 | [ ! -d $online_src/conf ] && \
61 |   echo "Expected directory $online_src/conf to exist" && exit 1;
62 | 
63 | for x in $online_src/conf/*conf; do
64 |   # Replace directory name starting $online_src with those starting with $dir.
65 |   # We actually replace any directory names ending in /ivector_extractor/ or /conf/ 
66 |   # with $dir/ivector_extractor/ or $dir/conf/
67 |   cat $x | perl -ape "s:=(.+)/(ivector_extractor|conf)/:=$dir/\$2/:;" > $dir/conf/$(basename $x)
68 | done
69 | 
70 | 
71 | # modify the silence-phones in the config; these are only used for the
72 | # endpointing code.
73 | cp $dir/conf/online_nnet2_decoding.conf{,.tmp}
74 | silphones=$(cat $lang/phones/silence.csl) || exit 1;
75 | cat $dir/conf/online_nnet2_decoding.conf.tmp | \
76 |   sed s/silence-phones=.\\+/silence-phones=$silphones/ > $dir/conf/online_nnet2_decoding.conf
77 | rm $dir/conf/online_nnet2_decoding.conf.tmp
78 | 
79 | echo "$0: formatted neural net for online decoding in $dir_as_given"
80 | 


--------------------------------------------------------------------------------
/steps/paste_feats.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014  Brno University of Technology (Author: Karel Vesely)
 4 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 5 | # Apache 2.0
 6 | # This script appends the features in two or more data directories.
 7 | 
 8 | # To be run from .. (one directory up from here)
 9 | # see ../run.sh for example
10 | 
11 | # Begin configuration section.
12 | cmd=run.pl
13 | nj=4
14 | length_tolerance=10 # length tolerance in frames (trim to shortest)
15 | compress=true
16 | # End configuration section.
17 | 
18 | echo "$0 $@"  # Print the command line for logging
19 | 
20 | if [ -f path.sh ]; then . ./path.sh; fi
21 | . parse_options.sh || exit 1;
22 | 
23 | if [ $# -lt 5 ]; then
24 |    echo "usage: $0 [options] <src-data-dir1> <src-data-dir2> [<src-data-dirN>] <dest-data-dir> <log-dir> <path-to-storage-dir>";
25 |    echo "e.g.: $0 data/train_mfcc data/train_bottleneck data/train_combined exp/append_mfcc_plp mfcc"
26 |    echo "options: "
27 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
28 |    exit 1;
29 | fi
30 | 
31 | data_src_arr=(${@:1:$(($#-3))}) #array of source data-dirs
32 | data=${@: -3: 1}
33 | logdir=${@: -2: 1}
34 | ark_dir=${@: -1: 1} #last arg.
35 | 
36 | data_src_first=${data_src_arr[0]} # get 1st src dir
37 | 
38 | # make $ark_dir an absolute pathname.
39 | ark_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $ark_dir ${PWD}`
40 | 
41 | for data_src in ${data_src_arr[@]}; do
42 |   utils/split_data.sh $data_src $nj || exit 1;
43 | done
44 | 
45 | mkdir -p $ark_dir $logdir
46 | 
47 | mkdir -p $data
48 | cp $data_src_first/* $data/ 2>/dev/null # so we get the other files, such as utt2spk.
49 | rm $data/cmvn.scp 2>/dev/null
50 | rm $data/feats.scp 2>/dev/null
51 | 
52 | # use "name" as part of name of the archive.
53 | name=`basename $data`
54 | 
55 | # get list of source scp's for pasting
56 | data_src_args=
57 | for data_src in ${data_src_arr[@]}; do
58 |   data_src_args="$data_src_args scp:$data_src/split$nj/JOB/feats.scp"
59 | done
60 | 
61 | for n in $(seq $nj); do
62 |   # the next command does nothing unless $ark_dir/storage/ exists, see
63 |   # utils/create_data_link.pl for more info.
64 |   utils/create_data_link.pl $ark_dir/pasted_$name.$n.ark
65 | done
66 | 
67 | $cmd JOB=1:$nj $logdir/append.JOB.log \
68 |    paste-feats --length-tolerance=$length_tolerance $data_src_args ark:- \| \
69 |    copy-feats --compress=$compress ark:- \
70 |     ark,scp:$ark_dir/pasted_$name.JOB.ark,$ark_dir/pasted_$name.JOB.scp || exit 1;
71 | 
72 | # concatenate the .scp files together.
73 | for ((n=1; n<=nj; n++)); do
74 |   cat $ark_dir/pasted_$name.$n.scp >> $data/feats.scp || exit 1;
75 | done > $data/feats.scp || exit 1;
76 | 
77 | 
78 | nf=`cat $data/feats.scp | wc -l`
79 | nu=`cat $data/utt2spk | wc -l`
80 | if [ $nf -ne $nu ]; then
81 |   echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
82 |   echo "consider using utils/fix_data_dir.sh $data"
83 | fi
84 | 
85 | echo "Succeeded pasting features for $name into $data"
86 | 


--------------------------------------------------------------------------------
/steps/score_kaldi_compare.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2016 Nicolas Serrano
 3 | # Apache 2.0
 4 | 
 5 | [ -f ./path.sh ] && . ./path.sh
 6 | 
 7 | # begin configuration section.
 8 | cmd=run.pl
 9 | replications=10000
10 | #end configuration section.
11 | 
12 | echo "$0 $@"  # Print the command line for logging
13 | [ -f ./path.sh ] && . ./path.sh
14 | . parse_options.sh || exit 1;
15 | 
16 | if [ $# -ne 3 ]; then
17 |   echo "Usage: local/score_compare.sh [--cmd (run.pl|queue.pl...)] <score-dir1> <score-dir2> <score-compare-dir>"
18 |   echo " Options:"
19 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
20 |   echo "    --replications <int>            # number of bootstrap evaluation to compute confidence."
21 |   exit 1;
22 | fi
23 | 
24 | dir1=$1
25 | dir2=$2
26 | dir_compare=$3
27 | 
28 | mkdir -p $dir_compare/log
29 | 
30 | for d in $dir1 $dir2; do
31 |   for f in test_filt.txt best_wer; do
32 |     [ ! -f $d/$f ] && echo "score_compare.sh: no such file $d/$f" && exit 1;
33 |   done
34 | done
35 | 
36 | 
37 | best_wer_file1=$(awk '{print $NF}' $dir1/best_wer)
38 | best_transcript_file1=$(echo $best_wer_file1 | sed -e 's=.*/wer_==' | \
39 |         awk -v FS='_' -v dir=$dir1 '{print dir"/penalty_"$2"/"$1".txt"}')
40 | 
41 | best_wer_file2=$(awk '{print $NF}' $dir2/best_wer)
42 | best_transcript_file2=$(echo $best_wer_file2 | sed -e 's=.*/wer_==' | \
43 |         awk -v FS='_' -v dir=$dir2 '{print dir"/penalty_"$2"/"$1".txt"}')
44 | 
45 | $cmd $dir_compare/log/score_compare.log \
46 |   compute-wer-bootci --replications=$replications \
47 |     ark:$dir1/test_filt.txt ark:$best_transcript_file1 ark:$best_transcript_file2 \
48 |     '>' $dir_compare/wer_bootci_comparison || exit 1;
49 | 
50 | exit 0;
51 | 


--------------------------------------------------------------------------------
/steps/search_index.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 4 | # Apache 2.0
 5 | 
 6 | # Begin configuration section.  
 7 | cmd=run.pl
 8 | nbest=-1
 9 | strict=true
10 | indices_dir=
11 | # End configuration section.
12 | 
13 | echo "$0 $@"  # Print the command line for logging
14 | 
15 | [ -f ./path.sh ] && . ./path.sh; # source the path.
16 | . parse_options.sh || exit 1;
17 | 
18 | if [ $# != 2 ]; then
19 |    echo "Usage: steps/search_index.sh [options] <kws-data-dir> <kws-dir>"
20 |    echo " e.g.: steps/search_index.sh data/kws exp/sgmm2_5a_mmi/decode/kws/"
21 |    echo ""
22 |    echo "main options (for others, see top of script file)"
23 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
24 |    echo "  --nbest <int>                                    # return n best results. (-1 means all)"
25 |    echo "  --indices-dir <path>                             # where the indices should be stored, by default it will be in <kws-dir>"
26 |    exit 1;
27 | fi
28 | 
29 | 
30 | kwsdatadir=$1;
31 | kwsdir=$2;
32 | 
33 | if [ -z $indices_dir ] ; then
34 |   indices_dir=$kwsdir
35 | fi
36 | 
37 | mkdir -p $kwsdir/log;
38 | nj=`cat $indices_dir/num_jobs` || exit 1;
39 | keywords=$kwsdatadir/keywords.fsts;
40 | 
41 | for f in $indices_dir/index.1.gz $keywords; do
42 |   [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1;
43 | done
44 | 
45 | $cmd JOB=1:$nj $kwsdir/log/search.JOB.log \
46 |   kws-search --strict=$strict --negative-tolerance=-1 \
47 |   "ark:gzip -cdf $indices_dir/index.JOB.gz|" ark:$keywords \
48 |   "ark,t:|int2sym.pl -f 2 $kwsdatadir/utter_id > $kwsdir/result.JOB" || exit 1;
49 | 
50 | exit 0;
51 | 


--------------------------------------------------------------------------------
/steps/select_feats.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
 4 | # Apache 2.0
 5 | # This script selects some specified dimensions of the features in the
 6 | # input data directory.
 7 | 
 8 | # To be run from .. (one directory up from here)
 9 | # see ../run.sh for example
10 | 
11 | # Begin configuration section.
12 | cmd=run.pl
13 | nj=4
14 | compress=true
15 | # End configuration section.
16 | 
17 | echo "$0 $@"  # Print the command line for logging
18 | 
19 | if [ -f path.sh ]; then . ./path.sh; fi
20 | . parse_options.sh || exit 1;
21 | 
22 | if [ $# -ne 5 ]; then
23 |    echo "usage: $0 [options] <selector> <src-data-dir>  <dest-data-dir> <log-dir> <path-to-storage-dir>";
24 |    echo "e.g.: $0 0-12 data/train_mfcc_pitch data/train_mfcconly exp/select_pitch_train mfcc"
25 |    echo "options: "
26 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
27 |    exit 1;
28 | fi
29 | 
30 | selector="$1"
31 | data_in=$2
32 | data=$3
33 | logdir=$4
34 | ark_dir=$5
35 | 
36 | # make $ark_dir an absolute pathname.
37 | ark_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $ark_dir ${PWD}`
38 | 
39 | 
40 | utils/split_data.sh $data_in $nj || exit 1;
41 | 
42 | mkdir -p $ark_dir $logdir
43 | mkdir -p $data
44 | 
45 | cp $data_in/* $data/ 2>/dev/null # so we get the other files, such as utt2spk.
46 | rm $data/cmvn.scp 2>/dev/null
47 | rm $data/feats.scp 2>/dev/null
48 | 
49 | # use "name" as part of name of the archive.
50 | name=`basename $data`
51 | 
52 | for j in $(seq $nj); do
53 |   # the next command does nothing unless $mfccdir/storage/ exists, see
54 |   # utils/create_data_link.pl for more info.
55 |   utils/create_data_link.pl $ark_dir/selected_$name.$j.ark
56 | done
57 | 
58 | $cmd JOB=1:$nj $logdir/append.JOB.log \
59 |    select-feats "$selector" scp:$data_in/split$nj/JOB/feats.scp ark:- \| \
60 |    copy-feats --compress=$compress ark:- \
61 |     ark,scp:$ark_dir/selected_$name.JOB.ark,$ark_dir/selected_$name.JOB.scp || exit 1;
62 | 
63 | # concatenate the .scp files together.
64 | for ((n=1; n<=nj; n++)); do
65 |   cat $ark_dir/selected_$name.$n.scp >> $data/feats.scp || exit 1;
66 | done > $data/feats.scp || exit 1;
67 | 
68 | 
69 | nf=`cat $data/feats.scp | wc -l`
70 | nu=`cat $data/utt2spk | wc -l`
71 | if [ $nf -ne $nu ]; then
72 |   echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
73 |   exit 1;
74 | fi
75 | 
76 | echo "Succeeded selecting features for $name into $data"
77 | 


--------------------------------------------------------------------------------
/steps/shift_feats.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2016    Vimal Manohar
 4 | # Apache 2.0
 5 | 
 6 | # This script shifts the feats in the input data directory and creates a
 7 | # new directory <input-data>_fs<num-frames-shift> with shifted feats.
 8 | # If the shift is negative, the initial frames get truncated.
 9 | # If the shift is positive, the first frame is repeated.
10 | # Usually applicable for sequence training
11 | 
12 | # To be run from .. (one directory up from here)
13 | # see ../run.sh for example
14 | 
15 | # Begin configuration section.
16 | cmd=run.pl
17 | nj=4
18 | compress=true
19 | # End configuration section.
20 | 
21 | echo "$0 $@"  # Print the command line for logging
22 | 
23 | if [ -f path.sh ]; then . ./path.sh; fi
24 | . parse_options.sh || exit 1;
25 | 
26 | if [ $# -ne 4 ]; then
27 |    echo "usage: $0 [options] <frame-shift> <src-data-dir> <log-dir> <path-to-storage-dir>";
28 |    echo "e.g.: $0 -1 data/train exp/shift-1_train mfcc"
29 |    echo "options: "
30 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
31 |    exit 1;
32 | fi
33 | 
34 | num_frames_shift=$1
35 | data_in=$2
36 | logdir=$3
37 | featdir=$4
38 | 
39 | utt_prefix="fs$num_frames_shift-"
40 | spk_prefix="fs$num_frames_shift-"
41 | 
42 | # make $featdir an absolute pathname.
43 | featdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $featdir ${PWD}`
44 | 
45 | utils/split_data.sh $data_in $nj || exit 1;
46 | 
47 | data=${data_in}_fs$num_frames_shift
48 | 
49 | mkdir -p $featdir $logdir
50 | mkdir -p $data
51 | 
52 | utils/copy_data_dir.sh --utt-prefix $utt_prefix --spk-prefix $spk_prefix \
53 |   $data_in $data
54 | 
55 | rm $data/feats.scp 2>/dev/null
56 | 
57 | # use "name" as part of name of the archive.
58 | name=`basename $data`
59 | 
60 | for j in $(seq $nj); do
61 |   # the next command does nothing unless $mfccdir/storage/ exists, see
62 |   # utils/create_data_link.pl for more info.
63 |   utils/create_data_link.pl $featdir/raw_feats_$name.$j.ark
64 | done
65 | 
66 | $cmd JOB=1:$nj $logdir/shift.JOB.log \
67 |   shift-feats --shift=$num_frames_shift \
68 |   scp:$data_in/split$nj/JOB/feats.scp ark:- \| \
69 |   copy-feats --compress=$compress ark:- \
70 |   ark,scp:$featdir/raw_feats_$name.JOB.ark,$featdir/raw_feats_$name.JOB.scp || exit 1;
71 | 
72 | # concatenate the .scp files together.
73 | for ((n=1; n<=nj; n++)); do
74 |   cat $featdir/raw_feats_$name.$n.scp
75 | done | awk -v nfs=$num_frames_shift '{print "fs"nfs"-"$0}'>$data/feats.scp || exit 1;
76 | 
77 | nf=`cat $data/feats.scp | wc -l`
78 | nu=`cat $data/utt2spk | wc -l`
79 | if [ $nf -ne $nu ]; then
80 |   echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
81 |   exit 1;
82 | fi
83 | 
84 | echo "Succeeded shifting features for $name into $data"
85 | 
86 | 


--------------------------------------------------------------------------------
/steps/tandem/decode_si.sh:
--------------------------------------------------------------------------------
1 | decode.sh


--------------------------------------------------------------------------------
/steps/train_nnet.sh:
--------------------------------------------------------------------------------
1 | nnet/train.sh


--------------------------------------------------------------------------------
/steps/word_align_lattices.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright Johns Hopkins University (Author: Daniel Povey)  2012
 4 | # Apache 2.0.
 5 | 
 6 | # Begin configuration section.
 7 | silence_label=0
 8 | cmd=run.pl
 9 | # End configuration section.
10 | 
11 | echo "$0 $@"  # Print the command line for logging
12 | 
13 | for x in `seq 2`; do
14 |   [ "$1" == "--silence-label" ] && silence_label=$2 && shift 2;
15 |   [ "$1" == "--cmd" ] && cmd="$2" && shift 2;
16 | done
17 | 
18 | if [ $# != 3 ]; then
19 |    echo "Word-align lattices (make the arcs sync up with words)"
20 |    echo ""
21 |    echo "Usage: $0 [options] <lang-dir> <decode-dir-in> <decode-dir-out>"
22 |    echo "options: [--cmd (run.pl|queue.pl [queue opts])] [--silence-label <integer-id-of-silence-word>]"
23 |    exit 1;
24 | fi
25 | 
26 | . ./path.sh || exit 1;
27 | 
28 | lang=$1
29 | indir=$2
30 | outdir=$3
31 | 
32 | mdl=`dirname $indir`/final.mdl
33 | wbfile=$lang/phones/word_boundary.int
34 | 
35 | for f in $mdl $wbfile $indir/num_jobs; do
36 |   [ ! -f $f ] && echo "word_align_lattices.sh: no such file $f" && exit 1;
37 | done
38 | 
39 | mkdir -p $outdir/log
40 | 
41 | 
42 | cp $indir/num_jobs $outdir;
43 | nj=`cat $indir/num_jobs`
44 | 
45 | $cmd JOB=1:$nj $outdir/log/align.JOB.log \
46 |   lattice-align-words --silence-label=$silence_label --test=true \
47 |    $wbfile $mdl "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c >$outdir/lat.JOB.gz" || exit 1;
48 | 
49 | 


--------------------------------------------------------------------------------
/utils/add_disambig.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | # Adds some specified number of disambig symbols to a symbol table.
19 | # Adds these as #1, #2, etc.
20 | # If the --include-zero option is specified, includes an extra one
21 | # #0.
22 | 
23 | $include_zero = 0;
24 | if($ARGV[0] eq "--include-zero") {
25 |     $include_zero = 1;
26 |     shift @ARGV;
27 | }
28 | 
29 | if(@ARGV != 2) {
30 |     die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
31 | }
32 | 
33 | 
34 | $input = $ARGV[0];
35 | $nsyms = $ARGV[1];
36 | 
37 | open(F, "<$input") || die "Opening file $input";
38 | 
39 | while(<F>) {
40 |     @A = split(" ", $_);
41 |     @A == 2 || die "Bad line $_";
42 |     $lastsym = $A[1];
43 |     print;
44 | }
45 | 
46 | if(!defined($lastsym)){
47 |  die "Empty symbol file?";
48 | }
49 | 
50 | if($include_zero) {
51 |     $lastsym++;
52 |     print "#0  $lastsym\n";
53 | }
54 | 
55 | for($n = 1; $n <= $nsyms; $n++) {
56 |     $y = $n + $lastsym;
57 |     print "#$n  $y\n";
58 | }
59 | 


--------------------------------------------------------------------------------
/utils/analyze_segments.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Copyright 2015 GoVivace Inc. (Author: Nagendra Kumar Goel)
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Analyze a segments file and print important stats on it.
18 | 
19 | $dur = $total = 0;
20 | $maxDur = 0;
21 | $minDur = 9999999999;
22 | $n = 0;
23 | while(<>){
24 |     chomp;
25 |     @t = split(/\s+/);
26 |     $dur = $t[3] - $t[2];
27 |     $total += $dur;
28 |     if ($dur > $maxDur) {
29 |         $maxSegId = $t[0];
30 |         $maxDur = $dur;
31 |     }
32 |     if ($dur < $minDur) {
33 |         $minSegId = $t[0];
34 |         $minDur = $dur;
35 |     }
36 |     $n++;
37 | }
38 | $avg=$total/$n;
39 | $hrs = $total/3600;
40 | print "Total $hrs hours of data\n";
41 | print "Average segment length $avg seconds\n";
42 | print "Segment $maxSegId has length of $maxDur seconds\n";
43 | print "Segment $minSegId has length of $minDur seconds\n";
44 | 


--------------------------------------------------------------------------------
/utils/apply_map.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use warnings; #sed replacement for -w perl parameter
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 4 | # Apache 2.0.
 5 | 
 6 | # This program is a bit like ./sym2int.pl in that it applies a map
 7 | # to things in a file, but it's a bit more general in that it doesn't
 8 | # assume the things being mapped to are single tokens, they could
 9 | # be sequences of tokens.  See the usage message.
10 | 
11 | 
12 | if (@ARGV > 0 && $ARGV[0] eq "-f") {
13 |   shift @ARGV;
14 |   $field_spec = shift @ARGV; 
15 |   if ($field_spec =~ m/^\d+$/) {
16 |     $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
17 |   }
18 |   if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
19 |     if ($1 ne "") {
20 |       $field_begin = $1 - 1;    # Change to zero-based indexing.
21 |     }
22 |     if ($2 ne "") {
23 |       $field_end = $2 - 1;      # Change to zero-based indexing.
24 |     }
25 |   }
26 |   if (!defined $field_begin && !defined $field_end) {
27 |     die "Bad argument to -f option: $field_spec"; 
28 |   }
29 | }
30 | 
31 | # Mapping is obligatory
32 | $permissive = 0;
33 | if (@ARGV > 0 && $ARGV[0] eq '--permissive') {
34 |   shift @ARGV;
35 |   # Mapping is optional (missing key is printed to output)
36 |   $permissive = 1;
37 | }
38 | 
39 | if(@ARGV != 1) {
40 |   print STDERR "Invalid usage: " . join(" ", @ARGV) . "\n";
41 |   print STDERR "Usage: apply_map.pl [options] map <input >output\n" .
42 |     "options: [-f <field-range> ]\n" .
43 |     "Applies the map 'map' to all input text, where each line of the map\n" .
44 |     "is interpreted as a map from the first field to the list of the other fields\n" .
45 |     "Note: <field-range> can look like 4-5, or 4-, or 5-, or 1, it means the field\n" .
46 |     "range in the input to apply the map to.\n" .
47 |     "e.g.: echo A B | apply_map.pl a.txt\n" .
48 |     "where a.txt is:\n" .
49 |     "A a1 a2\n" .
50 |     "B b\n" .
51 |     "will produce:\n" .
52 |     "a1 a2 b\n";
53 |   exit(1);
54 | }
55 | 
56 | ($map) = @ARGV;
57 | open(M, "<$map") || die "Error opening map file $map: $!";
58 | 
59 | while (<M>) {
60 |   @A = split(" ", $_);
61 |   @A >= 1 || die "apply_map.pl: empty line.";
62 |   $i = shift @A;
63 |   $o = join(" ", @A);
64 |   $map{$i} = $o;
65 | }
66 | 
67 | while(<STDIN>) {
68 |   @A = split(" ", $_);
69 |   for ($x = 0; $x < @A; $x++) {
70 |     if ( (!defined $field_begin || $x >= $field_begin)
71 |          && (!defined $field_end || $x <= $field_end)) {
72 |       $a = $A[$x];
73 |       if (!defined $map{$a}) {
74 |         if (!$permissive) {
75 |           die "apply_map.pl: undefined key $a\n"; 
76 |         } else {
77 |           print STDERR "apply_map.pl: warning! missing key $a\n";
78 |         }
79 |       } else {
80 |         $A[$x] = $map{$a}; 
81 |       }
82 |     }
83 |   }
84 |   print join(" ", @A) . "\n";
85 | }
86 | 


--------------------------------------------------------------------------------
/utils/best_wer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2010-2011 Microsoft Corporation
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # To be run from one directory above this script.
19 | 
20 | perl -e 'while(<>){ 
21 |     s/\|(\d)/\| $1/g; s/(\d)\|/$1 \|/g;
22 |     if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool.
23 |     elsif (m: (Mean|Sum/Avg|)\s*\|\s*\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|:
24 |         && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } }  # sclite.
25 |    if (defined $bestline){ print $bestline; } ' | \
26 |   awk 'BEGIN{ FS="%WER"; } { if(NF == 2) { print FS$2" "$1; } else { print $0; }}' | \
27 |   awk 'BEGIN{ FS="Sum/Avg"; } { if(NF == 2) { print $2" "$1; } else { print $0; }}' | \
28 |   awk '{ if($1!~/%WER/) { print "%WER "$9" "$0; } else { print $0; }}' | \
29 |   sed -e 's|\s\s*| |g' -e 's|\:$||' -e 's|\:\s*\|\s*$||'
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/utils/build_const_arpa_lm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014  Guoguo Chen
 4 | # Apache 2.0
 5 | 
 6 | # This script reads in an Arpa format language model, and converts it into the
 7 | # ConstArpaLm format language model.
 8 | 
 9 | # begin configuration section
10 | # end configuration section
11 | 
12 | [ -f path.sh ] && . ./path.sh;
13 | 
14 | . utils/parse_options.sh
15 | 
16 | if [ $# != 3 ]; then
17 |   echo "Usage: "
18 |   echo "  $0 [options] <arpa-lm-path> <old-lang-dir> <new-lang-dir>"
19 |   echo "e.g.:"
20 |   echo "  $0 data/local/lm/3-gram.full.arpa.gz data/lang/ data/lang_test_tgmed"
21 |   echo "Options"
22 |   exit 1;
23 | fi
24 | 
25 | export LC_ALL=C
26 | 
27 | arpa_lm=$1
28 | old_lang=$2
29 | new_lang=$3
30 | 
31 | mkdir -p $new_lang
32 | 
33 | mkdir -p $new_lang
34 | cp -r $old_lang/* $new_lang
35 | 
36 | unk=`cat $new_lang/oov.int`
37 | bos=`grep "<s>" $new_lang/words.txt | awk '{print $2}'`
38 | eos=`grep "</s>" $new_lang/words.txt | awk '{print $2}'`
39 | if [[ -z $bos || -z $eos ]]; then
40 |   echo "$0: <s> and </s> symbols are not in $new_lang/words.txt"
41 |   exit 1
42 | fi
43 | 
44 | 
45 | arpa-to-const-arpa --bos-symbol=$bos \
46 |   --eos-symbol=$eos --unk-symbol=$unk \
47 |   "gunzip -c $arpa_lm | utils/map_arpa_lm.pl $new_lang/words.txt|"  $new_lang/G.carpa  || exit 1;
48 | 
49 | exit 0;
50 | 


--------------------------------------------------------------------------------
/utils/convert_ctm.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | 
 5 | # This takes as standard input a ctm file that's "relative to the utterance",
 6 | # i.e. times are measured relative to the beginning of the segments, and it
 7 | # uses a "segments" file (format:
 8 | # utterance-id recording-id start-time end-time
 9 | # ) and a "reco2file_and_channel" file (format:
10 | # recording-id basename-of-file
11 | 
12 | $skip_unknown=undef;
13 | if ( $ARGV[0] eq "--skip-unknown" ) {
14 |   $skip_unknown=1;
15 |   shift @ARGV;
16 | }
17 | 
18 | if (@ARGV < 2 || @ARGV > 3) {
19 |   print STDERR "Usage: convert_ctm.pl <segments-file> <reco2file_and_channel-file> [<utterance-ctm>] > real-ctm\n";
20 |   exit(1);
21 | }
22 | 
23 | $segments = shift @ARGV;
24 | $reco2file_and_channel = shift @ARGV;
25 | 
26 | open(S, "<$segments") || die "opening segments file $segments";
27 | while(<S>) {
28 |   @A = split(" ", $_);
29 |   @A == 4 || die "Bad line in segments file: $_";
30 |   ($utt, $recording_id, $begin_time, $end_time) = @A;
31 |   $utt2reco{$utt} = $recording_id;
32 |   $begin{$utt} = $begin_time;
33 |   $end{$utt} = $end_time;
34 | }
35 | close(S);
36 | open(R, "<$reco2file_and_channel") || die "open reco2file_and_channel file $reco2file_and_channel";
37 | while(<R>) {
38 |   @A = split(" ", $_);
39 |   @A == 3 || die "Bad line in reco2file_and_channel file: $_";
40 |   ($recording_id, $file, $channel) = @A;
41 |   $reco2file{$recording_id} = $file;
42 |   $reco2channel{$recording_id} = $channel;
43 | }
44 | 
45 | 
46 | # Now process the ctm file, which is either the standard input or the third
47 | # command-line argument.
48 | $num_done = 0;
49 | while(<>) {
50 |   @A= split(" ", $_);
51 |   ( @A == 5 || @A == 6 ) || die "Unexpected ctm format: $_";
52 |   # lines look like:
53 |   # <utterance-id> 1 <begin-time> <length> <word> [ confidence ]
54 |   ($utt, $one, $wbegin, $wlen, $w, $conf) = @A;
55 |   $reco = $utt2reco{$utt};
56 |   if (!defined $reco) { 
57 |       next if defined $skip_unknown;
58 |       die "Utterance-id $utt not defined in segments file $segments"; 
59 |   }
60 |   $file = $reco2file{$reco};
61 |   $channel = $reco2channel{$reco};
62 |   if (!defined $file || !defined $channel) { 
63 |     die "Recording-id $reco not defined in reco2file_and_channel file $reco2file_and_channel"; 
64 |   }
65 |   $b = $begin{$utt};
66 |   $e = $end{$utt};
67 |   $wbegin_r = $wbegin + $b; # Make it relative to beginning of the recording.
68 |   $wbegin_r = sprintf("%.2f", $wbegin_r);
69 |   $wlen = sprintf("%.2f", $wlen);
70 |   if (defined $conf) {
71 |     $line = "$file $channel $wbegin_r $wlen $w $conf\n"; 
72 |   } else {
73 |     $line = "$file $channel $wbegin_r $wlen $w\n"; 
74 |   }
75 |   if ($wbegin_r + $wlen > $e + 0.01) {
76 |     print STDERR "Warning: word appears to be past end of recording; line is $line";
77 |   }
78 |   print $line; # goes to stdout.
79 |   $num_done++;
80 | }
81 | 
82 | if ($num_done == 0) { exit 1; } else { exit 0; }
83 | 
84 | __END__
85 | 
86 | # Test example [also test it without the 0.5's]
87 | echo utt reco 10.0 20.0 > segments
88 | echo reco file A > reco2file_and_channel
89 | echo utt 1 8.0 1.0 word 0.5 > ctm_in
90 | echo file A 18.00 1.00 word 0.5 > ctm_out
91 | utils/convert_ctm.pl segments reco2file_and_channel ctm_in | cmp - ctm_out || echo error
92 | rm segments reco2file_and_channel ctm_in ctm_out
93 | 
94 | 
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/utils/convert_slf_parallel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright Brno University of Technology (Author: Karel Vesely) 2014.  Apache 2.0.
 3 | 
 4 | # This script converts lattices to HTK format compatible with other toolkits.
 5 | # We can choose to put words to nodes or arcs, as both is valid in the SLF format.
 6 | 
 7 | # begin configuration section.
 8 | cmd=run.pl
 9 | dirname=lats-in-htk-slf
10 | parallel_opts="-tc 50" # We should limit disk stress
11 | word_to_node=false # Words in arcs or nodes? [default:arcs]
12 | #end configuration section.
13 | 
14 | echo "$0 $@"
15 | 
16 | [ -f ./path.sh ] && . ./path.sh
17 | . parse_options.sh || exit 1;
18 | 
19 | if [ $# -ne 3 ]; then
20 |   echo "Usage: $0 [options] <data-dir> <lang-dir|graph-dir> <decode-dir>"
21 |   echo " Options:"
22 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
23 |   echo "    --word-to-link (true|false)     # put word symbols on links or nodes."
24 |   echo "    --parallel-opts STR             # parallelization options (def.: '-tc 50')."
25 |   echo "e.g.:"
26 |   echo "$0 data/dev data/lang exp/tri4a/decode_dev"
27 |   exit 1;
28 | fi
29 | 
30 | data=$1
31 | lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
32 | dir=$3
33 | 
34 | model=$(dirname $dir)/final.mdl # assume model one level up from decoding dir.
35 | 
36 | for f in $lang/words.txt $lang/phones/align_lexicon.int $model $dir/lat.1.gz; do
37 |   [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
38 | done
39 | 
40 | [ ! -d $dir/$dirname/log ] && mkdir -p $dir/$dirname
41 | 
42 | echo "$0: Converting lattices into '$dir/$dirname'"
43 | 
44 | # Words in arcs or nodes? [default:nodes]
45 | word_to_link_arg=
46 | $word_to_node && word_to_node_arg="--word-to-node"
47 | 
48 | nj=$(cat $dir/num_jobs)
49 | 
50 | # convert the lattices (individually, gzipped)
51 | $cmd $parallel_opts JOB=1:$nj $dir/$dirname/log/lat_convert.JOB.log \
52 |   mkdir -p $dir/$dirname/JOB/ '&&' \
53 |   lattice-align-words-lexicon --output-error-lats=true --output-if-empty=true \
54 |     $lang/phones/align_lexicon.int $model "ark:gunzip -c $dir/lat.JOB.gz |" ark,t:- \| \
55 |   utils/int2sym.pl -f 3 $lang/words.txt \| \
56 |   utils/convert_slf.pl $word_to_node_arg - $dir/$dirname/JOB/ || exit 1
57 | 
58 | # make list of lattices
59 | find -L $PWD/$dir/$dirname -name *.lat.gz > $dir/$dirname/lat_htk.scp || exit 1
60 | 
61 | # check number of lattices:
62 | nseg=$(cat $data/segments | wc -l)
63 | nlat_out=$(cat $dir/$dirname/lat_htk.scp | wc -l)
64 | echo "segments $nseg, saved-lattices $nlat_out"
65 | #
66 | [ $nseg -ne $nlat_out ] && echo "WARNING: missing $((nseg-nlat_out)) lattices for some segments!" \
67 |   && exit 1
68 | 
69 | echo "success, converted lats to HTK : $PWD/$dir/$dirname/lat_htk.scp"
70 | exit 0
71 | 
72 | 


--------------------------------------------------------------------------------
/utils/create_split_dir.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2013  Guoguo Chen
 4 | # Apache 2.0.
 5 | #
 6 | # This script creates storage directories on different file systems, and creates
 7 | # symbolic links to those directories. For example, a command
 8 | #
 9 | #   utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage
10 | #
11 | # will mkdir -p all of those directories, and will create links
12 | #
13 | #   egs/storage/1 -> /export/gpu-03/egs/storage
14 | #   egs/storage/2 -> /export/gpu-03/egs/storage
15 | #   ...
16 | #
17 | use strict;
18 | use warnings;
19 | use File::Spec;
20 | use Getopt::Long;
21 | 
22 | my $Usage = <<EOU;
23 | create_split_dir.pl:
24 | This script creates storage directories on different file systems, and creates
25 | symbolic links to those directories.
26 | 
27 | Usage: utils/create_split_dir.pl <actual_storage_dirs> <pseudo_storage_dir>
28 |  e.g.: utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage
29 | 
30 | Allowed options:
31 |   --suffix    : Common suffix to <actual_storage_dirs>    (string, default = "")
32 | 
33 | See also create_data_link.pl, which is intended to work with the resulting
34 | directory structure, and remove_data_links.sh
35 | EOU
36 | 
37 | my $suffix="";
38 | GetOptions('suffix=s' => \$suffix);
39 | 
40 | if (@ARGV < 2) {
41 |   die $Usage;
42 | }
43 | 
44 | my $ans = 1;
45 | 
46 | my $dir = pop(@ARGV);
47 | system("mkdir -p $dir 2>/dev/null");
48 | 
49 | my @all_actual_storage = ();
50 | foreach my $file (@ARGV) {
51 |   push @all_actual_storage, File::Spec->rel2abs($file . "/" . $suffix);
52 | }
53 | 
54 | my $index = 1;
55 | foreach my $actual_storage (@all_actual_storage) {
56 |   my $pseudo_storage = "$dir/$index";
57 | 
58 |   # If the symbolic link already exists, delete it.
59 |   if (-l $pseudo_storage) {
60 |     print STDERR "$0: link $pseudo_storage already exists, not overwriting.\n";
61 |     $index++;
62 |     next;
63 |   }
64 | 
65 |   # Create the destination directory and make the link.
66 |   system("mkdir -p $actual_storage 2>/dev/null");
67 |   if ($? != 0) {
68 |     print STDERR "$0: error creating directory $actual_storage\n";
69 |     exit(1);
70 |   }
71 |   { # create a README file for easier deletion.
72 |     open(R, ">$actual_storage/README.txt");
73 |     my $storage_dir = File::Spec->rel2abs($dir);
74 |     print R "# This directory is linked from $storage_dir, as part of Kaldi striped data\n";
75 |     print R "# The full list of directories where this data resides is:\n";
76 |     foreach my $d (@all_actual_storage) {
77 |       print R "$d\n";
78 |     }
79 |     close(R);
80 |   }
81 |   my $ret = symlink($actual_storage, $pseudo_storage);
82 | 
83 |   # Process the returned values
84 |   $ans = $ans && $ret;
85 |   if (! $ret) {
86 |     print STDERR "Error linking $actual_storage to $pseudo_storage\n";
87 |   }
88 | 
89 |   $index++;
90 | }
91 | 
92 | exit($ans == 1 ? 0 : 1);
93 | 


--------------------------------------------------------------------------------
/utils/data/combine_data.sh:
--------------------------------------------------------------------------------
1 | ../combine_data.sh


--------------------------------------------------------------------------------
/utils/data/copy_data_dir.sh:
--------------------------------------------------------------------------------
1 | ../copy_data_dir.sh


--------------------------------------------------------------------------------
/utils/data/fix_data_dir.sh:
--------------------------------------------------------------------------------
1 | ../fix_data_dir.sh


--------------------------------------------------------------------------------
/utils/data/get_frame_shift.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
 4 | # Apache 2.0
 5 | 
 6 | # This script takes as input a data directory, such as data/train/, preferably
 7 | # with utt2dur file already existing (or the utt2dur file will be created if
 8 | # not), and it attempts to work out the approximate frame shift by comparing the
 9 | # utt2dur with the output of feat-to-len on the feats.scp.  It prints it out.
10 | # if the shift is very close to, but above, 0.01 (the normal frame shift) it
11 | # rounds it down.
12 | 
13 | . utils/parse_options.sh
14 | . ./path.sh
15 | 
16 | if [ $# != 1 ]; then
17 |   echo "Usage: $0 <datadir>"
18 |   echo "e.g.:"
19 |   echo " $0 data/train"
20 |   echo "This script prints the frame-shift (e.g. 0.01) to the standard out."
21 |   echo "If <datadir> does not contain utt2dur, this script may call utils/data/get_utt2dur.sh,"
22 |   echo "which will require write permission to <datadir>"
23 |   exit 1
24 | fi
25 | 
26 | export LC_ALL=C
27 | 
28 | dir=$1
29 | 
30 | 
31 | if [ ! -s $dir/utt2dur ]; then
32 |   if [ ! -e $dir/wav.scp ] && [ ! -s $dir/segments ]; then
33 |     echo "$0: neither $dir/wav.scp nor $dir/segments exist; assuming a frame shift of 0.01." 1>&2
34 |     echo 0.01
35 |     exit 0
36 |   fi
37 |   echo "$0: $dir/utt2dur does not exist: creating it" 1>&2
38 |   utils/data/get_utt2dur.sh $dir 1>&2
39 | fi
40 | 
41 | if [ ! -f $dir/feats.scp ]; then
42 |   echo "$0: $dir/feats.scp does not exist" 1>&2
43 |   exit 1
44 | fi
45 | 
46 | temp=$(mktemp /tmp/tmp.XXXX)
47 | 
48 | feat-to-len scp:$dir/feats.scp ark,t:- | head -n 10 > $temp
49 | 
50 | if [ -z $temp ]; then
51 |   echo "$0: error running feat-to-len" 1>&2
52 |   exit 1
53 | fi
54 | 
55 | head -n 10 $dir/utt2dur | paste - $temp | \
56 |    awk '{ dur += $2; frames += $4; } END { shift = dur / frames; if (shift > 0.01 && shift < 0.0102) shift = 0.01; print shift; }' || exit 1;
57 | 
58 | rm $temp
59 | 
60 | exit 0
61 | 


--------------------------------------------------------------------------------
/utils/data/get_num_frames.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script works out the approximate number of frames in a training directory.
 4 | # This is sometimes needed by higher-level scripts
 5 | 
 6 | 
 7 | if [ -f path.sh ]; then . ./path.sh; fi
 8 | . parse_options.sh || exit 1;
 9 | 
10 | if [ $# -ne 1 ]; then
11 |   (
12 |     echo "Usage: $0 <data-dir>"
13 |     echo "Prints the number of frames of data in the data-dir"
14 |   ) 1>&2
15 | fi
16 | 
17 | data=$1
18 | 
19 | if [ ! -f $data/utt2dur ]; then
20 |   utils/data/get_utt2dur.sh $data 1>&2 || exit 1
21 | fi
22 | 
23 | frame_shift=$(utils/data/get_frame_shift.sh $data) || exit 1
24 | 
25 | awk -v s=$frame_shift '{n += $2} END{print int(n / s)}' <$data/utt2dur
26 | 


--------------------------------------------------------------------------------
/utils/data/get_segments_for_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script operates on a data directory, such as in data/train/,
 4 | # and writes new segments to stdout. The file 'segments' maps from
 5 | # utterance to time offsets into a recording, with the format:
 6 | #   <utterance-id> <recording-id> <segment-begin> <segment-end>
 7 | # This script assumes utterance and recording ids are the same (i.e., that
 8 | # wav.scp is indexed by utterance), and uses durations from 'utt2dur', 
 9 | # created if necessary by get_utt2dur.sh.
10 | 
11 | . ./path.sh
12 | 
13 | if [ $# != 1 ]; then
14 |   echo "Usage: $0 [options] <datadir>"
15 |   echo "e.g.:"
16 |   echo " $0 data/train > data/train/segments"
17 |   exit 1
18 | fi
19 | 
20 | data=$1
21 | 
22 | if [ ! -f $data/utt2dur ]; then
23 |   utils/data/get_utt2dur.sh $data 1>&2 || exit 1;
24 | fi
25 | 
26 | # <utt-id> <utt-id> 0 <utt-dur>
27 | awk '{ print $1, $1, 0, $2 }' $data/utt2dur
28 | 
29 | exit 0
30 | 


--------------------------------------------------------------------------------
/utils/data/perturb_data_dir_speed.sh:
--------------------------------------------------------------------------------
1 | ../perturb_data_dir_speed.sh


--------------------------------------------------------------------------------
/utils/data/perturb_data_dir_speed_3way.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Apache 2.0
 6 | 
 7 | # This script does the standard 3-way speed perturbing of
 8 | # a data directory (it operates on the wav.scp).
 9 | 
10 | . utils/parse_options.sh
11 | 
12 | if [ $# != 2 ]; then
13 |   echo "Usage: perturb_data_dir_speed_3way.sh <srcdir> <destdir>"
14 |   echo "Applies standard 3-way speed perturbation using factors of 0.9, 1.0 and 1.1."
15 |   echo "e.g.:"
16 |   echo " $0 data/train data/train_sp"
17 |   echo "Note: if <destdir>/feats.scp already exists, this will refuse to run."
18 |   exit 1
19 | fi
20 | 
21 | srcdir=$1
22 | destdir=$2
23 | 
24 | if [ ! -f $srcdir/wav.scp ]; then
25 |   echo "$0: expected $srcdir/wav.scp to exist"
26 |   exit 1
27 | fi
28 | 
29 | if [ -f $destdir/feats.scp ]; then
30 |   echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)"
31 |   exit 1
32 | fi
33 | 
34 | echo "$0: making sure the utt2dur file is present in ${srcdir}, because "
35 | echo "... obtaining it after speed-perturbing would be very slow, and"
36 | echo "... you might need it."
37 | utils/data/get_utt2dur.sh ${srcdir}
38 | 
39 | utils/data/perturb_data_dir_speed.sh 0.9 ${srcdir} ${destdir}_speed0.9 || exit 1
40 | utils/data/perturb_data_dir_speed.sh 1.1 ${srcdir} ${destdir}_speed1.1 || exit 1
41 | utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1
42 | 
43 | rm -r ${destdir}_speed0.9 ${destdir}_speed1.1
44 | 
45 | echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir"
46 | utils/validate_data_dir.sh --no-feats $destdir
47 | 
48 | 


--------------------------------------------------------------------------------
/utils/data/perturb_data_dir_volume.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
 4 | # Apache 2.0
 5 | 
 6 | # This script operates on a data directory, such as in data/train/, and modifies
 7 | # the wav.scp to perturb the volume (typically useful for training data when
 8 | # using systems that don't have cepstral mean normalization).
 9 | 
10 | . utils/parse_options.sh
11 | 
12 | if [ $# != 1 ]; then
13 |   echo "Usage: $0 <datadir>"
14 |   echo "e.g.:"
15 |   echo " $0 data/train"
16 |   exit 1
17 | fi
18 | 
19 | export LC_ALL=C
20 | 
21 | data=$1
22 | 
23 | if [ ! -f $data/wav.scp ]; then
24 |   echo "$0: Expected $data/wav.scp to exist"
25 |   exit 1
26 | fi
27 | 
28 | if grep -q "sox --vol" $data/wav.scp; then
29 |   echo "$0: It looks like the data was already volume perturbed.  Not doing anything."
30 |   exit 0
31 | fi
32 | 
33 | cat $data/wav.scp | python -c "
34 | import sys, os, subprocess, re, random
35 | random.seed(0)
36 | scale_low = 1.0/8
37 | scale_high = 2.0
38 | for line in sys.stdin.readlines():
39 |   if len(line.strip()) == 0:
40 |     continue
41 |   # Handle three cases of rxfilenames appropriately; 'input piped command', 'file offset' and 'filename'
42 |   if line.strip()[-1] == '|':
43 |     print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high))
44 |   elif re.search(':[0-9]+$', line.strip()) is not None:
45 |     parts = line.split()
46 |     print '{id} wav-copy {wav} - | sox --vol {vol} -t wav - -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high))
47 |   else:
48 |     parts = line.split()
49 |     print '{id} sox --vol {vol} -t wav {wav} -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high))
50 | "  > $data/wav.scp_scaled || exit 1;
51 | 
52 | len1=$(cat $data/wav.scp | wc -l)
53 | len2=$(cat $data/wav.scp_scaled | wc -l)
54 | if [ "$len1" != "$len2" ]; then
55 |   echo "$0: error detected: number of lines changed $len1 vs $len2";
56 |   exit 1
57 | fi
58 | 
59 | mv $data/wav.scp_scaled $data/wav.scp
60 | 
61 | if [ -f $data/feats.scp ]; then
62 |   echo "$0: $data/feats.scp exists; moving it to $data/.backup/ as it wouldn't be valid any more."
63 |   mkdir -p $data/.backup/
64 |   mv $data/feats.scp $data/.backup/
65 | fi
66 | 
67 | echo "$0: added volume perturbation to the data in $data"
68 | exit 0
69 | 
70 | 


--------------------------------------------------------------------------------
/utils/data/remove_dup_utts.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Remove excess utterances once they appear  more than a specified
 4 | # number of times with the same transcription, in a data set.
 5 | # E.g. useful for removing excess "uh-huh" from training.
 6 | 
 7 | if [ $# != 3 ]; then
 8 |   echo "Usage: remove_dup_utts.sh max-count <src-data-dir> <dest-data-dir>"
 9 |   echo "e.g.: remove_dup_utts.sh 10 data/train data/train_nodup"
10 |   echo "This script is used to filter out utterances that have from over-represented"
11 |   echo "transcriptions (such as 'uh-huh'), by limiting the number of repetitions of"
12 |   echo "any given word-sequence to a specified value.  It's often used to get"
13 |   echo "subsets for early stages of training."
14 |   exit 1;
15 | fi
16 | 
17 | maxcount=$1
18 | srcdir=$2
19 | destdir=$3
20 | mkdir -p $destdir
21 | 
22 | [ ! -f $srcdir/text ] && echo "$0: Invalid input directory $srcdir" && exit 1;
23 | 
24 | ! mkdir -p $destdir && echo "$0: could not create directory $destdir" && exit 1;
25 | 
26 | ! [ "$maxcount" -gt 1 ] && echo "$0: invalid max-count '$maxcount'" && exit 1;
27 | 
28 | cp $srcdir/* $destdir
29 | cat $srcdir/text | \
30 |   perl -e '
31 |   $maxcount = shift @ARGV;
32 |   @all = ();
33 |    $p1 = 103349; $p2 = 71147; $k = 0;
34 |    sub random { # our own random number generator: predictable.
35 |      $k = ($k + $p1) % $p2;
36 |      return ($k / $p2);
37 |   }
38 |   while(<>) {
39 |     push @all, $_;
40 |     @A = split(" ", $_);
41 |     shift @A;
42 |     $text = join(" ", @A);
43 |     $count{$text} ++;
44 |   }
45 |   foreach $line (@all) {
46 |     @A = split(" ", $line);
47 |     shift @A;
48 |     $text = join(" ", @A);
49 |     $n = $count{$text};
50 |     if ($n < $maxcount || random() < ($maxcount / $n)) {
51 |       print $line;
52 |     }
53 |   }'  $maxcount >$destdir/text
54 | 
55 | echo "Reduced number of utterances from `cat $srcdir/text | wc -l` to `cat $destdir/text | wc -l`"
56 | 
57 | echo "Using fix_data_dir.sh to reconcile the other files."
58 | utils/fix_data_dir.sh $destdir
59 | rm -r $destdir/.backup
60 | 
61 | exit 0
62 | 


--------------------------------------------------------------------------------
/utils/data/split_data.sh:
--------------------------------------------------------------------------------
1 | ../split_data.sh


--------------------------------------------------------------------------------
/utils/data/subset_data_dir.sh:
--------------------------------------------------------------------------------
1 | ../subset_data_dir.sh


--------------------------------------------------------------------------------
/utils/data/validate_data_dir.sh:
--------------------------------------------------------------------------------
1 | ../validate_data_dir.sh


--------------------------------------------------------------------------------
/utils/eps2disambig.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | #                2015 Guoguo Chen
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # This script replaces epsilon with #0 on the input side only, of the G.fst
19 | # acceptor.  
20 | 
21 | while(<>){
22 |   if (/\s+#0\s+/) {
23 |     print STDERR "$0: ERROR: LM has word #0, " .
24 |                  "which is reserved as disambiguation symbol\n";
25 |     exit 1;
26 |   }
27 |   s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
28 |   print;
29 | }
30 | 


--------------------------------------------------------------------------------
/utils/filt.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Apache 2.0
 4 | 
 5 | import sys
 6 | 
 7 | vocab=set()
 8 | with open(sys.argv[1]) as vocabfile:
 9 |     for line in vocabfile:
10 |         vocab.add(line.strip())
11 | 
12 | with open(sys.argv[2]) as textfile:
13 |     for line in textfile:
14 |         print " ".join(map(lambda word: word if word in vocab else '<UNK>', line.strip().split()))
15 | 


--------------------------------------------------------------------------------
/utils/filter_scp.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2012 Microsoft Corporation
 3 | #                     Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | # This script takes a list of utterance-ids or any file whose first field
20 | # of each line is an utterance-id, and filters an scp
21 | # file (or any file whose "n-th" field is an utterance id), printing
22 | # out only those lines whose "n-th" field is in id_list. The index of
23 | # the "n-th" field is 1, by default, but can be changed by using
24 | # the -f <n> switch
25 | 
26 | $exclude = 0;
27 | $field = 1;
28 | $shifted = 0;
29 | 
30 | do {
31 |   $shifted=0;
32 |   if ($ARGV[0] eq "--exclude") {
33 |     $exclude = 1;
34 |     shift @ARGV;
35 |     $shifted=1;
36 |   }
37 |   if ($ARGV[0] eq "-f") {
38 |     $field = $ARGV[1];
39 |     shift @ARGV; shift @ARGV;
40 |     $shifted=1
41 |   }
42 | } while ($shifted);
43 | 
44 | if(@ARGV < 1 || @ARGV > 2) {
45 |   die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
46 |       "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
47 |       "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
48 |       "only the lines that were *not* in id_list.\n" .
49 |       "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
50 |       "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
51 |       "-f option, add 1 to the argument.\n" .
52 |       "See also: utils/filter_scp.pl .\n";
53 | }
54 | 
55 | 
56 | $idlist = shift @ARGV;
57 | open(F, "<$idlist") || die "Could not open id-list file $idlist";
58 | while(<F>) {
59 |   @A = split;
60 |   @A>=1 || die "Invalid id-list file line $_";
61 |   $seen{$A[0]} = 1;
62 | }
63 | 
64 | if ($field == 1) { # Treat this as special case, since it is common.
65 |   while(<>) {
66 |     $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
67 |     # $1 is what we filter on.
68 |     if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
69 |       print $_;
70 |     }
71 |   }
72 | } else {
73 |   while(<>) {
74 |     @A = split;
75 |     @A > 0 || die "Invalid scp file line $_";
76 |     @A >= $field || die "Invalid scp file line $_";
77 |     if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
78 |       print $_;
79 |     }
80 |   }
81 | }
82 | 
83 | # tests:
84 | # the following should print "foo 1"
85 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
86 | # the following should print "bar 2".
87 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
88 | 


--------------------------------------------------------------------------------
/utils/find_arpa_oovs.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | if (  @ARGV < 1 && @ARGV > 2) {
19 |     die "Usage: find_arpa_oovs.pl words.txt [lm.arpa]\n";
20 |     # This program finds words in the arpa file that are not symbols
21 |     # in the OpenFst-format symbol table words.txt.  It prints them
22 |     # on the standard output, one per line.
23 | }
24 | 
25 | $symtab = shift @ARGV;
26 | open(S, "<$symtab") || die "Failed opening symbol table file $symtab\n";
27 | while(<S>){
28 |     @A = split(" ", $_);
29 |     @A == 2 || die "Bad line in symbol table file: $_";
30 |     $seen{$A[0]} = 1;
31 | }
32 | 
33 | $found_data=0;
34 | $curgram=0;
35 | while(<>) { # Find the \data\ marker.
36 |     if(m:^\\data\\\s*$:) { $found_data=1; last; }
37 | }
38 | 
39 | if ($found_data==0) {
40 |   print STDERR "find_arpa_oovs.pl: found no \\data\\ marker in the ARPA input.\n";
41 |   exit(1);
42 | }
43 | 
44 | while(<>) {
45 |     if(m/^\\(\d+)\-grams:\s*$/) {
46 |         $curgram = $1;
47 |         if($curgram > 1) {
48 |             last; # This is an optimization as we can get the vocab from the 1-grams
49 |         }
50 |     } elsif($curgram > 0) {
51 |         @A = split(" ", $_);
52 |         if(@A > 1) {
53 |             shift @A;
54 |             for($n=0;$n<$curgram;$n++) {
55 |                 $word = $A[$n];
56 |                 if(!defined $word) { print STDERR "Unusual line $_ (line $.) in arpa file.\n"; }
57 |                 $in_arpa{$word} = 1;
58 |             }
59 |         } else {
60 |             if(@A > 0 && $A[0] !~ m:\\end\\:) {
61 |                 print STDERR "Unusual line $_ (line $.) in arpa file\n";
62 |             }
63 |         }
64 |     }
65 | }
66 | 
67 | foreach $w (keys %in_arpa) {
68 |     if(!defined $seen{$w} && $w ne "<s>" && $w ne "</s>") {
69 |         print "$w\n";
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/utils/fix_ctm.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | stmfile=$1
 4 | ctmfile=$2
 5 | 
 6 | segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u`
 7 | segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u`
 8 | 
 9 | segments_stm_count=`echo "$segments_stm" | wc -l `
10 | segments_ctm_count=`echo "$segments_ctm" | wc -l `
11 | 
12 | #echo $segments_stm_count
13 | #echo $segments_ctm_count
14 | 
15 | if [ "$segments_stm_count" -gt "$segments_ctm_count"  ] ; then
16 |   pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g")
17 |   (
18 |     for elem in $pp ; do
19 |       echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE"
20 |     done
21 |   ) >> $ctmfile
22 |   echo "FIXED CTM FILE"
23 |   exit 0
24 | elif [ "$segments_stm_count" -lt "$segments_ctm_count"  ] ; then
25 |   echo "Segment STM count: $segments_stm_count"
26 |   echo "Segment CTM count: $segments_ctm_count"
27 |   echo "FAILURE FIXING CTM FILE"
28 |   exit 1
29 | else
30 |   exit 0
31 | fi
32 | 
33 | 


--------------------------------------------------------------------------------
/utils/format_lm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -u
 2 | 
 3 | # Copyright 2012  Arnab Ghoshal
 4 | # Copyright 2010-2011  Microsoft Corporation
 5 | 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #  http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
16 | # See the Apache 2 License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | set -o errexit
20 | 
21 | if [ $# -ne 4 ]; then
22 |   printf "Usage: %s lang_dir LM lexicon out_dir\n" `basename $0`
23 |   echo "  Convert ARPA-format language models to FSTs.";
24 |   exit 1;
25 | fi
26 | 
27 | lang_dir=$1
28 | lm=$2
29 | lexicon=$3
30 | out_dir=$4
31 | mkdir -p $out_dir
32 | 
33 | [ -f ./path.sh ] && . ./path.sh
34 | 
35 | echo "Converting '$lm' to FST"
36 | 
37 | for f in phones.txt words.txt L.fst L_disambig.fst phones/ oov.int oov.txt; do
38 |   cp -r $lang_dir/$f $out_dir
39 | done
40 | 
41 | lm_base=$(basename $lm '.gz')
42 | gunzip -c $lm \
43 |   | arpa2fst --disambig-symbol=#0 \
44 |              --read-symbol-table=$out_dir/words.txt - $out_dir/G.fst
45 | set +e
46 | fstisstochastic $out_dir/G.fst
47 | set -e
48 | # The output is like:
49 | # 9.14233e-05 -0.259833
50 | # we do expect the first of these 2 numbers to be close to zero (the second is
51 | # nonzero because the backoff weights make the states sum to >1).
52 | 
53 | # Everything below is only for diagnostic.
54 | # Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
55 | # this might cause determinization failure of CLG.
56 | # #0 is treated as an empty word.
57 | mkdir -p $out_dir/tmpdir.g
58 | awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }}
59 |      END{print "0 0 #0 #0"; print "0";}' \
60 |      < "$lexicon" > $out_dir/tmpdir.g/select_empty.fst.txt
61 | 
62 | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
63 |   $out_dir/tmpdir.g/select_empty.fst.txt \
64 |   | fstarcsort --sort_type=olabel \
65 |   | fstcompose - $out_dir/G.fst > $out_dir/tmpdir.g/empty_words.fst
66 | 
67 | fstinfo $out_dir/tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' \
68 |   && echo "Language model has cycles with empty words" && exit 1
69 | 
70 | rm -r $out_dir/tmpdir.g
71 | 
72 | 
73 | echo "Succeeded in formatting LM: '$lm'"
74 | 


--------------------------------------------------------------------------------
/utils/gen_topo.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Generate a topology file.  This allows control of the number of states in the
 6 | # non-silence HMMs, and in the silence HMMs.
 7 | 
 8 | if (@ARGV != 4) {
 9 |   print STDERR "Usage: utils/gen_topo.pl <num-nonsilence-states> <num-silence-states> <colon-separated-nonsilence-phones> <colon-separated-silence-phones>\n";
10 |   print STDERR "e.g.:  utils/gen_topo.pl 3 5 4:5:6:7:8:9:10 1:2:3\n";
11 |   exit (1);
12 | }
13 | 
14 | ($num_nonsil_states, $num_sil_states, $nonsil_phones, $sil_phones) = @ARGV;
15 | 
16 | ( $num_nonsil_states >= 1 && $num_nonsil_states <= 100 ) ||
17 |   die "Unexpected number of nonsilence-model states $num_nonsil_states\n";
18 | (( $num_sil_states == 1 || $num_sil_states >= 3) && $num_sil_states <= 100 ) ||
19 |   die "Unexpected number of silence-model states $num_sil_states\n";
20 | 
21 | $nonsil_phones =~ s/:/ /g;
22 | $sil_phones =~ s/:/ /g;
23 | $nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n";
24 | $sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n";
25 | 
26 | print "<Topology>\n";
27 | print "<TopologyEntry>\n";
28 | print "<ForPhones>\n";
29 | print "$nonsil_phones\n";
30 | print "</ForPhones>\n";
31 | for ($state = 0; $state < $num_nonsil_states; $state++) {
32 |   $statep1 = $state+1;
33 |   print "<State> $state <PdfClass> $state <Transition> $state 0.75 <Transition> $statep1 0.25 </State>\n";
34 | }
35 | print "<State> $num_nonsil_states </State>\n"; # non-emitting final state.
36 | print "</TopologyEntry>\n";
37 | # Now silence phones.  They have a different topology-- apart from the first and
38 | # last states, it's fully connected, as long as you have >= 3 states.
39 | 
40 | if ($num_sil_states > 1) {
41 |   $transp = 1.0 / ($num_sil_states-1);
42 |   print "<TopologyEntry>\n";
43 |   print "<ForPhones>\n";
44 |   print "$sil_phones\n";
45 |   print "</ForPhones>\n";
46 |   print "<State> 0 <PdfClass> 0 ";
47 |   for ($nextstate = 0; $nextstate < $num_sil_states-1; $nextstate++) { # Transitions to all but last
48 |     # emitting state.
49 |     print "<Transition> $nextstate $transp ";
50 |   }
51 |   print "</State>\n";
52 |   for ($state = 1; $state < $num_sil_states-1; $state++) { # the central states all have transitions to
53 |     # themselves and to the last emitting state.
54 |     print "<State> $state <PdfClass> $state ";
55 |     for ($nextstate = 1; $nextstate < $num_sil_states; $nextstate++) {
56 |       print "<Transition> $nextstate $transp ";
57 |     }
58 |     print "</State>\n";
59 |   }
60 |   # Final emitting state (non-skippable).
61 |   $state = $num_sil_states-1;
62 |   print "<State> $state <PdfClass> $state <Transition> $state 0.75 <Transition> $num_sil_states 0.25 </State>\n";
63 |   # Final nonemitting state:
64 |   print "<State> $num_sil_states </State>\n";
65 |   print "</TopologyEntry>\n";
66 | } else {
67 |   print "<TopologyEntry>\n";
68 |   print "<ForPhones>\n";
69 |   print "$sil_phones\n";
70 |   print "</ForPhones>\n";
71 |   print "<State> 0 <PdfClass> 0 ";
72 |   print "<Transition> 0 0.75 ";
73 |   print "<Transition> 1 0.25 ";
74 |   print "</State>\n";
75 |   print "<State> $num_nonsil_states </State>\n"; # non-emitting final state.
76 |   print "</TopologyEntry>\n";
77 | }
78 | 
79 | print "</Topology>\n";
80 | 


--------------------------------------------------------------------------------
/utils/int2sym.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 3 | # Apache 2.0.
 4 | 
 5 | undef $field_begin;
 6 | undef $field_end;
 7 | 
 8 | 
 9 | if ($ARGV[0] eq "-f") {
10 |   shift @ARGV; 
11 |   $field_spec = shift @ARGV; 
12 |   if ($field_spec =~ m/^\d+$/) {
13 |     $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
14 |   }
15 |   if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
16 |     if ($1 ne "") {
17 |       $field_begin = $1 - 1; # Change to zero-based indexing.
18 |     }
19 |     if ($2 ne "") {
20 |       $field_end = $2 - 1; # Change to zero-based indexing.
21 |     }
22 |   }
23 |   if (!defined $field_begin && !defined $field_end) {
24 |     die "Bad argument to -f option: $field_spec"; 
25 |   }
26 | }
27 | $symtab = shift @ARGV;
28 | if(!defined $symtab) {
29 |     print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" .
30 |       "options: [-f (<field>|<field_start>-<field-end>)]\n" .
31 |       "e.g.: -f 2, or -f 3-4\n";
32 |     exit(1);
33 | }
34 | 
35 | open(F, "<$symtab") || die "Error opening symbol table file $symtab";
36 | while(<F>) {
37 |     @A = split(" ", $_);
38 |     @A == 2 || die "bad line in symbol table file: $_";
39 |     $int2sym{$A[1]} = $A[0];
40 | }
41 | 
42 | sub int2sym {
43 |     my $a = shift @_;
44 |     my $pos = shift @_;
45 |     if($a !~  m:^\d+$:) { # not all digits..
46 |       $pos1 = $pos+1; # make it one-based.
47 |       die "int2sym.pl: found noninteger token $a [in position $pos1]\n";
48 |     }
49 |     $s = $int2sym{$a};
50 |     if(!defined ($s)) {
51 |       die "int2sym.pl: integer $a not in symbol table $symtab.";
52 |     }
53 |     return $s;
54 | }
55 | 
56 | $error = 0;
57 | while (<>) {
58 |   @A = split(" ", $_);
59 |   for ($pos = 0; $pos <= $#A; $pos++) {
60 |     $a = $A[$pos];
61 |     if ( (!defined $field_begin || $pos >= $field_begin)
62 |          && (!defined $field_end || $pos <= $field_end)) {
63 |       $a = int2sym($a, $pos);
64 |     }
65 |     print $a . " ";
66 |   }
67 |   print "\n";
68 | }
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/utils/lang/add_lex_disambig.pl:
--------------------------------------------------------------------------------
1 | ../add_lex_disambig.pl


--------------------------------------------------------------------------------
/utils/lang/check_g_properties.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use IPC::Open2;
 4 | 
 5 | if (@ARGV != 1) {
 6 |   print "Usage: $0 [options] <lang_directory>\n";
 7 |   print "e.g.:  $0 data/lang\n";
 8 |   exit(1);
 9 | }
10 | 
11 | $lang = shift @ARGV;
12 | 
13 | # This script checks that G.fst in the lang.fst directory is OK with respect
14 | # to certain expected properties, and returns nonzero exit status if a problem was
15 | # detected.  It is called from validate_lang.pl.
16 | # This only checks the properties of G that relate to disambiguation symbols,
17 | # epsilons and forbidden symbols <s> and </s>.
18 | 
19 | if (! -e "$lang/G.fst") {
20 |   print "$0: error: $lang/G.fst does not exist\n";
21 |   exit(1);
22 | }
23 | 
24 | open(W, "<$lang/words.txt") || die "opening $lang/words.txt";
25 | $hash_zero = -1;
26 | while (<W>) {
27 |   @A = split(" ", $_);
28 |   ($sym, $int) = @A;
29 |   if ($sym eq "<s>" || $sym eq "</s>") { $is_forbidden{$int} = 1; }
30 |   if ($sym eq "#0") { $hash_zero = $int; }
31 | }
32 | 
33 | if (-e "$lang/phones/wdisambig_words.int") {
34 |   open(F, "<$lang/phones/wdisambig_words.int") || die "opening $lang/phones/wdisambig_words.int";
35 |   while (<F>) {
36 |     chop;
37 |     $is_disambig{$_} = 1;
38 |   }
39 | } else {
40 |   $is_disambig{$hash_zero} = 1;
41 | }
42 | 
43 | $input_cmd = ". ./path.sh; fstprint $lang/G.fst|";
44 | open(G, $input_cmd) || die "running command $input_cmd";
45 | 
46 | $info_cmd = ". ./path.sh; fstcompile | fstinfo ";
47 | open2(O, I, "$info_cmd") || die "running command $info_cmd";
48 | 
49 | $has_epsilons = 0;
50 | 
51 | while (<G>) {
52 |   @A = split(" ", $_);
53 |   if (@A >= 4) {
54 |     if ($is_forbidden{$A[2]} || $is_forbidden{$A[3]}) {
55 |       chop;
56 |       print "$0: validating $lang: error: line $_ in G.fst contains forbidden symbol <s> or </s>\n";
57 |       exit(1);
58 |     } elsif ($is_disambig{$A[2]}) {
59 |       print I $_;
60 |       if ($A[3] != 0) {
61 |         chop;
62 |         print "$0: validating $lang: error: line $_ in G.fst has disambig on input but no epsilon on output\n";
63 |         exit(1);
64 |       }
65 |     } elsif ($A[2] == 0) {
66 |       print I $_;
67 |       $has_epsilons = 1;
68 |     } elsif ($A[2] != $A[3]) {
69 |       chop;
70 |       print "$0: validating $lang: error: line $_ in G.fst has inputs and outputs different but input is not disambig symbol.\n";
71 |       exit(1);
72 |     }
73 |   }
74 | }
75 | 
76 | close(I);  # tell 'fstcompile | fstinfo' pipeline that its input is done.
77 | while (<O>) {
78 |   if (m/cyclic\s+y/) {
79 |     print "$0: validating $lang: error: G.fst has cycles containing only disambig symbols and epsilons.  Would cause determinization failure\n";
80 |     exit(1);
81 |   }
82 | }
83 | 
84 | if ($has_epsilons) {
85 |   print "$0: warning: validating $lang: G.fst has epsilon-input arcs.  We don't expect these in most setups.\n";
86 | }
87 | 
88 | print "--> $0 successfully validated $lang/G.fst\n";
89 | exit(0);
90 | 


--------------------------------------------------------------------------------
/utils/lang/check_phones_compatible.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2016 Hang Lyu
 3 | 
 4 | # Licensed udner the Apache License, Version 2.0 (the "Lincense");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OF IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script exits with status zero if the phone symbols tables are the same
18 | # except for possible differences in disambiguation symbols (meaning that all
19 | # symbols except those beginning with a # are mapped to the same values).
20 | # Otherwise it prints a warning and exits with status 1.
21 | # For the sake of compatibility with other scripts that did not write the 
22 | # phones.txt to model directories, this script exits silently with status 0 
23 | # if one of the phone symbol tables does not exist.
24 | # For the sake of compatibility with other scripts that did not write the 
25 | # phones.txt to model directories, this script exits silently with status 0 
26 | # if one of the phone symbol tables does not exist.
27 | 
28 | . utils/parse_options.sh || exit 1;
29 | 
30 | if [ $# -ne 2 ]; then
31 |   echo "Usage: utils/lang/check_phones_compatible.sh <phones-symbol-table1> <phones-symbol-table2>"
32 |   echo "e.g.: utils/lang/check_phones_compatible.sh data/lang/phones.txt exp/tri3/phones.txt"
33 |   exit 1;
34 | fi
35 | 
36 | table_first=$1
37 | table_second=$2
38 | 
39 | # check the files exist or not 
40 | if [ ! -f $table_first ]; then
41 |   if [ ! -f $table_second ]; then
42 |     echo "$0: Error! Both of the two phones-symbol tables are absent."
43 |     echo "Please check your command"
44 |     exit 1;
45 |   else
46 |     #The phones-symbol-table1 is absent. The model directory maybe created by old script.
47 |     #For back compatibility, this script exits silently with status 0.
48 |     exit 0;
49 |   fi
50 | elif [ ! -f $table_second ]; then
51 |   #The phones-symbol-table2 is absent. The model directory maybe created by old script.
52 |   #For back compatibility, this script exits silently with status 0.
53 |   exit 0;
54 | fi
55 | 
56 | #Check the two tables are same or not (except for possible difference in disambiguation symbols).
57 | if ! cmp -s <(grep -v "^#" $table_first) <(grep -v "^#" $table_second); then
58 |   echo "$0: phone symbol tables $table_first and $table_second are not compatible."
59 |   exit 1;
60 | fi
61 | 
62 | exit 0;
63 | 


--------------------------------------------------------------------------------
/utils/lang/prepare_lang.sh:
--------------------------------------------------------------------------------
1 | ../prepare_lang.sh


--------------------------------------------------------------------------------
/utils/lang/validate_lang.pl:
--------------------------------------------------------------------------------
1 | ../validate_lang.pl


--------------------------------------------------------------------------------
/utils/ln.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use File::Spec;
 3 | 
 4 | if ( @ARGV < 2 ) {
 5 |   print STDERR "usage: ln.pl input1 input2 dest-dir\n" .
 6 |     "This script does a soft link of input1, input2, etc." .
 7 |     "to dest-dir, using relative links where possible\n" .
 8 |     "Note: input-n and dest-dir may both be absolute pathnames,\n" .
 9 |     "or relative pathnames, relative to the current directlory.\n";
10 |   exit(1);
11 | }  
12 | 
13 | $dir = pop @ARGV;
14 | if ( ! -d $dir ) {
15 |   print STDERR "ln.pl: last argument must be a directory ($dir is not a directory)\n";
16 |   exit(1);
17 | }
18 | 
19 | $ans = 1; # true.
20 | 
21 | $absdir = File::Spec->rel2abs($dir); # Get $dir as abs path.
22 | defined $absdir || die "No such directory $dir";
23 | foreach $file (@ARGV) {
24 |   $absfile =  File::Spec->rel2abs($file); # Get $file as abs path.
25 |   defined $absfile || die "No such file or directory: $file";
26 |   @absdir_split = split("/", $absdir);
27 |   @absfile_split = split("/", $absfile);
28 | 
29 |   $newfile = $absdir . "/" . $absfile_split[$#absfile_split]; # we'll use this
30 |   # as the destination in the link command.
31 |   $num_removed = 0;
32 |   while (@absdir_split > 0 && $absdir_split[0] eq $absfile_split[0]) {
33 |     shift @absdir_split;
34 |     shift @absfile_split;
35 |     $num_removed++;
36 |   }
37 |   if (-l $newfile) { # newfile is already a link -> safe to delete it.
38 |     unlink($newfile); # "unlink" just means delete.
39 |   }
40 |   if ($num_removed == 0) { # will use absolute pathnames.
41 |     $oldfile = "/" . join("/", @absfile_split);
42 |     $ret = symlink($oldfile, $newfile);
43 |   } else {
44 |     $num_dots = @absdir_split;
45 |     $oldfile = join("/", @absfile_split);
46 |     for ($n = 0; $n < $num_dots; $n++) {
47 |       $oldfile = "../" . $oldfile;
48 |     }
49 |     $ret = symlink($oldfile, $newfile);
50 |   }
51 |   $ans = $ans && $ret;
52 |   if (! $ret) {
53 |     print STDERR "Error linking $oldfile to $newfile\n";
54 |   }
55 | }
56 | 
57 | exit ($ans == 1 ? 0 : 1);
58 | 
59 | 


--------------------------------------------------------------------------------
/utils/make_unigram_grammar.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script is used in discriminative training.
18 | # This script makes a simple unigram-loop version of G.fst
19 | # using a unigram grammar estimated from some training transcripts.
20 | # This is for MMI training.
21 | # We don't have any silences in G.fst; these are supplied by the
22 | # optional silences in the lexicon.
23 | 
24 | # Note: the symbols in the transcripts become the input and output
25 | # symbols of G.txt; these can be numeric or not.
26 | 
27 | if(@ARGV != 0) {
28 |     die "Usage: make_unigram_grammar.pl < text-transcripts > G.txt"
29 | }
30 | 
31 | $totcount = 0;
32 | $nl = 0;
33 | while (<>) {
34 |   @A = split(" ", $_);
35 |   foreach $a (@A) {
36 |     $count{$a}++;
37 |     $totcount++;
38 |   }
39 |   $nl++;
40 |   $totcount++; # Treat end-of-sentence as a symbol for purposes of
41 |   # $totcount, so the grammar is properly stochastic.  This doesn't
42 |   # become </s>, it just becomes the final-prob.
43 | }
44 | 
45 | foreach $a (keys %count) {
46 |   $prob = $count{$a} / $totcount;
47 |   $cost = -log($prob);          # Negated natural-log probs.
48 |   print "0\t0\t$a\t$a\t$cost\n";
49 | }
50 | # Zero final-cost.
51 | $final_prob = $nl / $totcount;
52 | $final_cost = -log($final_prob);
53 | print "0\t$final_cost\n";
54 | 
55 | 


--------------------------------------------------------------------------------
/utils/nnet/gen_dct_mat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # ./gen_dct_mat.py
19 | # script generates matrix with DCT transform, which is sparse 
20 | # and takes into account that data-layout is along frequency axis, 
21 | # while DCT is done along temporal axis.
22 | 
23 | from math import *
24 | import sys
25 | 
26 | 
27 | from optparse import OptionParser
28 | 
29 | parser = OptionParser()
30 | parser.add_option('--fea-dim', dest='dim', help='feature dimension')
31 | parser.add_option('--splice', dest='splice', help='applied splice value')
32 | parser.add_option('--dct-basis', dest='dct_basis', help='number of DCT basis')
33 | (options, args) = parser.parse_args()
34 | 
35 | if(options.dim == None):
36 |     parser.print_help()
37 |     sys.exit(1)
38 | 
39 | dim=int(options.dim)
40 | splice=int(options.splice)
41 | dct_basis=int(options.dct_basis)
42 | 
43 | timeContext=2*splice+1
44 | 
45 | 
46 | #generate the DCT matrix
47 | M_PI = 3.1415926535897932384626433832795
48 | M_SQRT2 = 1.4142135623730950488016887
49 | 
50 | 
51 | #generate sparse DCT matrix
52 | print '['
53 | for k in range(dct_basis):
54 |     for m in range(dim):
55 |         for n in range(timeContext):
56 |           if(n==0): 
57 |               print m*'0 ',
58 |           else: 
59 |               print (dim-1)*'0 ',
60 |           print str(sqrt(2.0/timeContext)*cos(M_PI/timeContext*k*(n+0.5))),
61 |           if(n==timeContext-1):
62 |               print (dim-m-1)*'0 ',
63 |         print
64 |     print 
65 | 
66 | print ']'
67 | 
68 | 


--------------------------------------------------------------------------------
/utils/nnet/gen_hamm_mat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # ./gen_hamm_mat.py
19 | # script generates diagonal matrix with hamming window values
20 | 
21 | from math import *
22 | import sys
23 | 
24 | 
25 | from optparse import OptionParser
26 | 
27 | parser = OptionParser()
28 | parser.add_option('--fea-dim', dest='dim', help='feature dimension')
29 | parser.add_option('--splice', dest='splice', help='applied splice value')
30 | (options, args) = parser.parse_args()
31 | 
32 | if(options.dim == None):
33 |     parser.print_help()
34 |     sys.exit(1)
35 | 
36 | dim=int(options.dim)
37 | splice=int(options.splice)
38 | 
39 | 
40 | #generate the diagonal matrix with hammings
41 | M_2PI = 6.283185307179586476925286766559005
42 | 
43 | dim_mat=(2*splice+1)*dim
44 | timeContext=2*splice+1
45 | print '['
46 | for row in range(dim_mat):
47 |     for col in range(dim_mat):
48 |         if col!=row:
49 |             print '0',
50 |         else:
51 |             i=int(row/dim)
52 |             print str(0.54 - 0.46*cos((M_2PI * i) / (timeContext-1))),
53 |     print
54 | 
55 | print ']'
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/utils/nnet/gen_splice.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # ./gen_splice.py
19 | # generates <splice> Component
20 | 
21 | from math import *
22 | import sys
23 | 
24 | 
25 | from optparse import OptionParser
26 | 
27 | parser = OptionParser()
28 | parser.add_option('--fea-dim', dest='dim_in', help='feature dimension')
29 | parser.add_option('--splice', dest='splice', help='number of frames to concatenate with the central frame')
30 | parser.add_option('--splice-step', dest='splice_step', help='splicing step (frames dont need to be consecutive, --splice 3 --splice-step 2 will select offsets: -6 -4 -2 0 2 4 6)', default='1' )
31 | (options, args) = parser.parse_args()
32 | 
33 | if(options.dim_in == None):
34 |     parser.print_help()
35 |     sys.exit(1)
36 | 
37 | dim_in=int(options.dim_in)
38 | splice=int(options.splice)
39 | splice_step=int(options.splice_step)
40 | 
41 | dim_out=(2*splice+1)*dim_in
42 | 
43 | print '<splice>', dim_out, dim_in
44 | print '[',
45 | 
46 | splice_vec = range(-splice*splice_step, splice*splice_step+1, splice_step)
47 | for idx in range(len(splice_vec)):
48 |     print splice_vec[idx],
49 | 
50 | print ']'
51 | 
52 | 


--------------------------------------------------------------------------------
/utils/prepare_online_nnet_dist_build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2015  Johns Hopkins University (Author: Vijayaditya Peddinti)
 4 | #                 Guoguo Chen
 5 | # Apache 2.0
 6 | # Script to prepare the distribution from the online-nnet build
 7 | 
 8 | other_files= #other files to be included in the build
 9 | other_dirs=
10 | conf_files="ivector_extractor.conf mfcc.conf online_cmvn.conf online_nnet2_decoding.conf splice.conf"
11 | ivec_extractor_files="final.dubm final.ie final.mat global_cmvn.stats online_cmvn.conf splice_opts"
12 | 
13 | echo "$0 $@"  # Print the command line for logging
14 | [ -f path.sh ] && . ./path.sh;
15 | . parse_options.sh || exit 1;
16 | 
17 | if [ $# -ne 3 ]; then
18 |    echo "Usage: $0 <lang-dir> <model-dir> <output-tgz>"
19 |    echo "e.g.: $0 data/lang exp/nnet2_online/nnet_ms_a_online tedlium.tgz"
20 |    exit 1;
21 | fi
22 | 
23 | lang=$1
24 | modeldir=$2
25 | tgzfile=$3
26 | 
27 | for f in $lang/phones.txt $other_files; do
28 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
29 | done
30 | 
31 | build_files=
32 | for d in $modeldir/conf $modeldir/ivector_extractor; do
33 |   [ ! -d $d ] && echo "$0: no such directory $d" && exit 1;
34 | done
35 | 
36 | for f in $ivec_extractor_files; do
37 |   f=$modeldir/ivector_extractor/$f
38 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
39 |   build_files="$build_files $f"
40 | done
41 | 
42 | # Makes a copy of the original config files, as we will change the absolute path
43 | # to relative.
44 | rm -rf $modeldir/conf_abs_path
45 | mkdir -p $modeldir/conf_abs_path
46 | cp -r $modeldir/conf/* $modeldir/conf_abs_path
47 | 
48 | for f in $conf_files; do 
49 |   [ ! -f $modeldir/conf/$f ] && \
50 |     echo "$0: no such file $modeldir/conf/$f" && exit 1;
51 |   # Changes absolute path to relative path. The path entries in the config file
52 |   # are generated by scripts and it is safe to assume that they have structure:
53 |   # variable=path
54 |   cat $modeldir/conf_abs_path/$f | perl -e '
55 |     use File::Spec;
56 |     while(<STDIN>) {
57 |       chomp;
58 |       @col = split("=", $_);
59 |       if (@col == 2 && (-f $col[1])) {
60 |         $col[1] = File::Spec->abs2rel($col[1]);
61 |         print "$col[0]=$col[1]\n";
62 |       } else {
63 |         print "$_\n";
64 |       }
65 |     }
66 |   ' > $modeldir/conf/$f
67 |   build_files="$build_files $modeldir/conf/$f"
68 | done
69 | 
70 | tar -hczvf $tgzfile $lang $build_files $other_files $other_dirs \
71 |   $modeldir/final.mdl $modeldir/tree >/dev/null
72 | 
73 | # Changes back to absolute path.
74 | rm -rf $modeldir/conf
75 | mv $modeldir/conf_abs_path $modeldir/conf
76 | 


--------------------------------------------------------------------------------
/utils/reduce_data_dir.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # koried, 10/29/2012
 4 | 
 5 | # Reduce a data set based on a list of turn-ids
 6 | 
 7 | if [ $# != 3 ]; then
 8 | echo "usage: $0 srcdir turnlist destdir"
 9 | exit 1;
10 | fi
11 | 
12 | srcdir=$1
13 | reclist=$2
14 | destdir=$3
15 | 
16 | if [ ! -f $srcdir/utt2spk ]; then 
17 | echo "$0: no such file $srcdir/utt2spk"
18 | exit 1;
19 | fi
20 | 
21 | function do_filtering {
22 | # assumes the utt2spk and spk2utt files already exist.
23 | 	[ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
24 | 	[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
25 | 	[ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
26 | 	[ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
27 | 	[ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
28 | 	if [ -f $srcdir/segments ]; then
29 | 		utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
30 | 		awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings.
31 | 		# The next line would override the command above for wav.scp, which would be incorrect.
32 | 		[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
33 | 		[ -f $srcdir/reco2file_and_channel ] && \
34 | 			utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
35 | 		
36 | 		# Filter the STM file for proper sclite scoring (this will also remove the comments lines)
37 | 		[ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm
38 | 		rm $destdir/reco
39 | 	fi
40 | 	srcutts=`cat $srcdir/utt2spk | wc -l`
41 | 	destutts=`cat $destdir/utt2spk | wc -l`
42 | 	echo "Reduced #utt from $srcutts to $destutts"
43 | }
44 | 
45 | mkdir -p $destdir
46 | 
47 | # filter the utt2spk based on the set of recordings
48 | utils/filter_scp.pl $reclist < $srcdir/utt2spk > $destdir/utt2spk
49 | 
50 | utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
51 | do_filtering;
52 | 
53 | 


--------------------------------------------------------------------------------
/utils/reduce_data_dir_by_reclist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # koried, 10/29/2012
 4 | 
 5 | # Reduce a data set based on a list of recordings
 6 | 
 7 | if [ $# != 3 ]; then
 8 | echo "usage: $0 srcdir reclist destdir"
 9 | exit 1;
10 | fi
11 | 
12 | srcdir=$1
13 | reclist=$2
14 | destdir=$3
15 | 
16 | if [ ! -f $srcdir/utt2spk ]; then 
17 | echo "$0: no such file $srcdir/utt2spk"
18 | exit 1;
19 | fi
20 | 
21 | function do_filtering {
22 | # assumes the utt2spk and spk2utt files already exist.
23 | 	[ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
24 | 	[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
25 | 	[ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
26 | 	[ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
27 | 	[ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp
28 | 	if [ -f $srcdir/segments ]; then
29 | 		utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments
30 | 		awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings.
31 | # The next line would override the command above for wav.scp, which would be incorrect.
32 | 		[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp
33 | 		[ -f $srcdir/reco2file_and_channel ] && \
34 | 			utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
35 | 		[ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm
36 | 		rm $destdir/reco
37 | 	fi
38 | 	srcutts=`cat $srcdir/utt2spk | wc -l`
39 | 	destutts=`cat $destdir/utt2spk | wc -l`
40 | 	echo "Reduced #utt from $srcutts to $destutts"
41 | }
42 | 
43 | mkdir -p $destdir
44 | 
45 | # filter the utt2spk based on the set of recordings
46 | rm -f $destdir/utt2spk
47 | for i in `cat $reclist`; do
48 | 	cat $srcdir/utt2spk | grep ^$i >> $destdir/utt2spk
49 | done
50 | 
51 | utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt
52 | do_filtering;
53 | 
54 | 


--------------------------------------------------------------------------------
/utils/remove_data_links.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This program searches within a directory for soft links that
 4 | # appear to be created by 'create_data_link.pl' to a 'storage/' subdirectory,
 5 | # and it removes both the soft links and the things they point to.
 6 | # for instance, if you have a soft link 
 7 | #   foo/egs/1.1.egs -> storage/2/1.1.egs
 8 | # it will remove both foo/egs/storage/2/1.1.egs, and foo/egs/1.1.egs.
 9 | 
10 | ret=0
11 | 
12 | dry_run=false
13 | 
14 | if [ "$1" == "--dry-run" ]; then
15 |   dry_run=true
16 |   shift
17 | fi
18 | 
19 | if [ $# == 0 ]; then
20 |   echo "Usage:  $0 [--dry-run] <list-of-directories>"
21 |   echo "e.g.: $0 exp/nnet4a/egs/"
22 |   echo " Removes from any subdirectories of the command-line arguments, soft links that "
23 |   echo " appear to have been created by utils/create_data_link.pl, as well as the things"
24 |   echo " that those soft links point to.  Will typically be called on a directory prior"
25 |   echo " to 'rm -r' on that directory, to ensure that data that was distributed on other"
26 |   echo " volumes also gets deleted."
27 |   echo " With --dry-run, just prints what it would do."
28 | fi
29 | 
30 | for dir in $*; do
31 |   if [ ! -d $dir ]; then
32 |     echo "$0: not a directory: $dir"
33 |     ret=1
34 |   else
35 |     for subdir in $(find $dir -type d); do
36 |       if [ -d $subdir/storage ]; then
37 |         for x in $(ls $subdir); do
38 |           f=$subdir/$x
39 |           if [ -L $f ] && [[ $(readlink $f) == storage/* ]]; then
40 |             target=$subdir/$(readlink $f)
41 |             if $dry_run; then
42 |               echo rm $f $target
43 |             else
44 |               rm $f $target
45 |             fi
46 |           fi
47 |         done
48 |       fi
49 |     done
50 |   fi
51 | done
52 | 
53 | exit $ret
54 | 


--------------------------------------------------------------------------------
/utils/remove_oovs.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script removes lines that contain these OOVs on either the
18 | # third or fourth fields  of the line.  It is intended to remove arcs
19 | # with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
20 | 
21 | if (  @ARGV < 1 && @ARGV > 2) {
22 |     die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
23 | }
24 | 
25 | $unklist = shift @ARGV;
26 | open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
27 | while(<S>){ 
28 |     @A = split(" ", $_);
29 |     @A == 1 || die "Bad line in unknown-symbol list: $_";
30 |     $unk{$A[0]} = 1;
31 | }
32 | 
33 | $num_removed = 0;
34 | while(<>){ 
35 |     @A = split(" ", $_);
36 |     if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
37 |         $num_removed++;
38 |     } else {
39 |         print;
40 |     }
41 | }
42 | print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
43 | 
44 | 


--------------------------------------------------------------------------------
/utils/rnnlm_compute_scores.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Compute scores from RNNLM.  This script takes a directory
 4 | # $dir (e.g. dir=local/rnnlm/rnnlm.voc30.hl30 ),
 5 | # where it expects the files:
 6 | #  rnnlm  wordlist.rnn  unk.probs,
 7 | # and also an input file location where it can get the sentences to score, and
 8 | # an output file location to put the scores (negated logprobs) for each
 9 | # sentence.  This script uses the Kaldi-style "archive" format, so the input and
10 | # output files will have a first field that corresponds to some kind of
11 | # utterance-id or, in practice, utterance-id-1, utterance-id-2, etc., for the
12 | # N-best list.
13 | #
14 | # Here, "wordlist.rnn" is the set of words, like a vocabulary,
15 | # that the RNN was trained on (note, it won't include <s> or </s>),
16 | # plus <RNN_UNK> which is a kind of class where we put low-frequency
17 | # words; unk.probs gives the probs for words given this class, and it
18 | # has, on each line, "word prob".
19 | 
20 | rnnlm_ver=rnnlm-0.3e
21 | ensure_normalized_probs=false  # if true then we add the neccesary options to
22 |                                # normalize the probabilities of RNNLM
23 |                                # e.g. when using faster-rnnlm in the nce mode
24 | 
25 | . ./path.sh || exit 1;
26 | . utils/parse_options.sh
27 | 
28 | rnnlm=$KALDI_ROOT/tools/$rnnlm_ver/rnnlm
29 | 
30 | [ ! -f $rnnlm ] && echo No such program $rnnlm && exit 1;
31 | 
32 | if [ $# != 4 ]; then
33 |   echo "Usage: rnnlm_compute_scores.sh <rnn-dir> <temp-dir> <input-text> <output-scores>"
34 |   exit 1;
35 | fi
36 | 
37 | dir=$1
38 | tempdir=$2
39 | text_in=$3
40 | scores_out=$4
41 | 
42 | for x in rnnlm wordlist.rnn unk.probs; do
43 |   if [ ! -f $dir/$x ]; then 
44 |     echo "rnnlm_compute_scores.sh: expected file $dir/$x to exist."
45 |     exit 1;
46 |   fi
47 | done
48 | 
49 | mkdir -p $tempdir
50 | cat $text_in | awk '{for (x=2;x<=NF;x++) {printf("%s ", $x)} printf("\n");}' >$tempdir/text
51 | cat $text_in | awk '{print $1}' > $tempdir/ids # e.g. utterance ids.
52 | cat $tempdir/text | awk -v voc=$dir/wordlist.rnn -v unk=$dir/unk.probs \
53 |   -v logprobs=$tempdir/loglikes.oov \
54 |  'BEGIN{ while((getline<voc)>0) { invoc[$1]=1; } while ((getline<unk)>0){ unkprob[$1]=$2;} }
55 |   { logprob=0;
56 |     if (NF==0) { printf "<RNN_UNK>"; logprob = log(1.0e-07);
57 |       print "Warning: empty sequence." | "cat 1>&2"; }
58 |     for (x=1;x<=NF;x++) { w=$x;  
59 |     if (invoc[w]) { printf("%s ",w); } else {
60 |       printf("<RNN_UNK> ");
61 |       if (unkprob[w] != 0) { logprob += log(unkprob[w]); }
62 |       else { print "Warning: unknown word ", w | "cat 1>&2"; logprob += log(1.0e-07); }}}
63 |     printf("\n"); print logprob > logprobs } ' > $tempdir/text.nounk
64 | 
65 | # OK, now we compute the scores on the text with OOVs replaced
66 | # with <RNN_UNK>
67 | 
68 | if [ $rnnlm_ver == "faster-rnnlm" ]; then
69 |   extra_options=
70 |   if [ "$ensure_normalized_probs" = true ]; then
71 |     extra_options="--nce-accurate-test 1"
72 |   fi
73 |   $rnnlm $extra_options -independent -rnnlm $dir/rnnlm -test $tempdir/text.nounk -nbest -debug 0 | \
74 |      awk '{print $1*log(10);}' > $tempdir/loglikes.rnn
75 | else
76 |   # add the utterance_id as required by Mikolove's rnnlm
77 |   paste $tempdir/ids $tempdir/text.nounk > $tempdir/id_text.nounk
78 | 
79 |   $rnnlm -independent -rnnlm $dir/rnnlm -test $tempdir/id_text.nounk -nbest -debug 0 | \
80 |      awk '{print $1*log(10);}' > $tempdir/loglikes.rnn
81 | fi
82 | 
83 | [ `cat $tempdir/loglikes.rnn | wc -l` -ne `cat $tempdir/loglikes.oov | wc -l` ] && \
84 |   echo "rnnlm rescoring failed" && exit 1;
85 | 
86 | paste $tempdir/loglikes.rnn $tempdir/loglikes.oov | awk '{print -($1+$2);}' >$tempdir/scores
87 | 
88 | # scores out, with utterance-ids.
89 | paste $tempdir/ids $tempdir/scores  > $scores_out
90 | 
91 | 


--------------------------------------------------------------------------------
/utils/s2eps.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script replaces <s> and </s> with <eps> (on both input and output sides),
18 | # for the G.fst acceptor.
19 | 
20 | while(<>){
21 |     @A = split(" ", $_);
22 |     if ( @A >= 4 ) {
23 |         if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
24 |         if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
25 |     }
26 |     print join("\t", @A) . "\n";
27 | }
28 | 


--------------------------------------------------------------------------------
/utils/scoring/wer_report.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2015 Johns Hopkins University (author: Jan Trmal <jtrmal@gmail.com>)
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | # This script reads per-utt table generated for example during scoring
19 | # and outpus the WER similar to the format the compute-wer utility 
20 | # or the utils/best_wer.pl produces
21 | # i.e. from table containing lines in this format
22 | # SUM raw 23344 243230 176178 46771 9975 20281 77027 16463
23 | # produces something output like this
24 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] 
25 | # NB: if the STDIN stream will contain more of the SUM raw entries,
26 | #     the best one will be found and printed 
27 | #
28 | # If the script is called with parameters, it uses them pro provide 
29 | # a description of the output
30 | # i.e.
31 | # cat per-spk-report | utils/scoring/wer_report.pl Full set
32 | # the following output will be produced
33 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] Full set
34 | 
35 | 
36 | while (<STDIN>) {
37 |   if ( m:SUM\s+raw:) {
38 |     @F = split;
39 |     if ((!defined $wer) || ($wer > $F[8])) {
40 |       $corr=$F[4];
41 |       $sub=$F[5];
42 |       $ins=$F[6];
43 |       $del=$F[7];
44 |       $wer=$F[8];
45 |       $words=$F[3];
46 |     }
47 |   }
48 | }
49 | 
50 | if (defined $wer) {
51 |   $wer_str = sprintf("%.2f", (100.0 * $wer) / $words);
52 |   print "%WER $wer_str [ $wer / $words,  $ins ins, $del del, $sub sub ]";
53 |   print " " . join(" ", @ARGV) if @ARGV > 0;
54 |   print "\n";
55 | }
56 | 


--------------------------------------------------------------------------------
/utils/show_lattice.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | format=pdf # pdf svg
 4 | mode=save # display save
 5 | lm_scale=0.0
 6 | acoustic_scale=0.0
 7 | #end of config
 8 | 
 9 | . utils/parse_options.sh
10 | 
11 | if [ $# != 3 ]; then
12 |    echo "usage: $0 [--mode display|save] [--format pdf|svg] <utt-id> <lattice-ark> <word-list>"
13 |    echo "e.g.:  $0 utt-0001 \"test/lat.*.gz\" tri1/graph/words.txt"
14 |    exit 1;
15 | fi
16 | 
17 | . path.sh
18 | 
19 | uttid=$1
20 | lat=$2
21 | words=$3
22 | 
23 | tmpdir=$(mktemp -d /tmp/kaldi.XXXX); # trap "rm -r $tmpdir" EXIT # cleanup
24 | 
25 | gunzip -c $lat | lattice-to-fst --lm-scale=$lm_scale --acoustic-scale=$acoustic_scale ark:- "scp,p:echo $uttid $tmpdir/$uttid.fst|" || exit 1;
26 | ! [ -s $tmpdir/$uttid.fst ] && \
27 |   echo "Failed to extract lattice for utterance $uttid (not present?)" && exit 1;
28 | fstdraw --portrait=true --osymbols=$words $tmpdir/$uttid.fst | dot -T${format} > $tmpdir/$uttid.${format}
29 | 
30 | if [ "$(uname)" == "Darwin" ]; then
31 |     doc_open=open
32 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then
33 |     doc_open=xdg-open
34 | elif [ $mode == "display" ] ; then
35 |         echo "Can not automaticaly open file on your operating system"
36 |         mode=save
37 | fi
38 | 
39 | [ $mode == "display" ] && $doc_open $tmpdir/$uttid.${format}
40 | [[ $mode == "display" && $? -ne 0 ]] && echo "Failed to open ${format} format." && mode=save
41 | [ $mode == "save" ] && echo "Saving to $uttid.${format}" && cp $tmpdir/$uttid.${format} .
42 | 
43 | exit 0
44 | 


--------------------------------------------------------------------------------
/utils/shuffle_list.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | if ($ARGV[0] eq "--srand") {
20 |   $n = $ARGV[1];
21 |   $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\"";
22 |   srand($ARGV[1]);
23 |   shift;
24 |   shift;
25 | } else {
26 |   srand(0); # Gives inconsistent behavior if we don't seed.
27 | }
28 | 
29 | if (@ARGV > 1 || $ARGV[0] =~ m/^-.+/) { # >1 args, or an option we 
30 |   # don't understand.
31 |   print "Usage: shuffle_list.pl [--srand N] [input file]  > output\n";
32 |   print "randomizes the order of lines of input.\n";
33 |   exit(1);
34 | }
35 | 
36 | @lines;
37 | while (<>) {
38 |   push @lines, [ (rand(), $_)] ;
39 | }
40 | 
41 | @lines = sort { $a->[0] cmp $b->[0] } @lines;
42 | foreach $l (@lines) {
43 |     print $l->[1];
44 | }
45 | 


--------------------------------------------------------------------------------
/utils/spk2utt_to_utt2spk.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | while(<>){ 
19 |     @A = split(" ", $_);
20 |     @A > 1 || die "Invalid line in spk2utt file: $_";
21 |     $s = shift @A;
22 |     foreach $u ( @A ) {
23 |         print "$u $s\n";
24 |     }
25 | }
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/utils/subset_scp.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | use warnings; #sed replacement for -w perl parameter
  3 | # Copyright 2010-2011 Microsoft Corporation
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #  http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
 15 | # See the Apache 2 License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | # This program selects a subset of N elements in the scp.
 19 | 
 20 | # By default, it selects them evenly from throughout the scp, in order to avoid
 21 | # selecting too many from the same speaker.  It prints them on the standard
 22 | # output.
 23 | # With the option --first, it just selects the N first utterances.
 24 | # With the option --last, it just selects the N last utterances.
 25 | 
 26 | # Last modified by JHU & HKUST @2013
 27 | 
 28 | 
 29 | $quiet = 0;
 30 | $first = 0;
 31 | $last = 0;
 32 | 
 33 | if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
 34 |   shift;
 35 |   $quiet = 1;
 36 | }
 37 | if (@ARGV > 0 && $ARGV[0] eq "--first") {
 38 |   shift;
 39 |   $first = 1;
 40 | }
 41 | if (@ARGV > 0 && $ARGV[0] eq "--last") {
 42 |   shift;
 43 |   $last = 1;
 44 | }
 45 | 
 46 | if(@ARGV < 2 ) {
 47 |     die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
 48 |         " --quiet  causes it to not die if N < num lines in scp.\n" .
 49 |         " --first and --last make it equivalent to head or tail.\n" .
 50 |         "See also: filter_scp.pl\n";
 51 | }
 52 | 
 53 | $N = shift @ARGV;
 54 | if($N == 0) {
 55 |     die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
 56 | }
 57 | $inscp = shift @ARGV;
 58 | open(I, "<$inscp") || die "Opening input scp file $inscp";
 59 | 
 60 | @F = ();
 61 | while(<I>) {
 62 |     push @F, $_;
 63 | }
 64 | $numlines = @F;
 65 | if($N > $numlines) {
 66 |   if ($quiet) {
 67 |     $N = $numlines;
 68 |   } else {
 69 |     die "You requested from subset_scp.pl more elements than available: $N > $numlines";
 70 |   }
 71 | }
 72 | 
 73 | sub select_n {
 74 |   my ($start,$end,$num_needed) = @_;
 75 |   my $diff = $end - $start;
 76 |   if ($num_needed > $diff) {
 77 |     die "select_n: code error";
 78 |   }
 79 |   if ($diff == 1 ) {
 80 |     if ($num_needed  > 0) {
 81 |       print $F[$start];
 82 |     }
 83 |   } else {
 84 |     my $halfdiff = int($diff/2);
 85 |     my $halfneeded = int($num_needed/2);
 86 |     select_n($start, $start+$halfdiff, $halfneeded);
 87 |     select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
 88 |   }
 89 | }
 90 | 
 91 | if ( ! $first && ! $last) {
 92 |   if ($N > 0) {
 93 |     select_n(0, $numlines, $N);
 94 |   }
 95 | } else {
 96 |   if ($first) { # --first option: same as head.
 97 |     for ($n = 0; $n < $N; $n++) {
 98 |       print $F[$n];
 99 |     }
100 |   } else { # --last option: same as tail.
101 |     for ($n = @F - $N; $n < @F; $n++) {
102 |       print $F[$n];
103 |     }
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/utils/summarize_logs.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
  4 | 
  5 | #scalar(@ARGV) >= 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;
  6 | 
  7 | sub split_hundreds { # split list of filenames into groups of 100.
  8 |   my $names = shift @_;
  9 |   my @A = split(" ", $names);
 10 |   my @ans = ();
 11 |   while (@A > 0) {
 12 |     my $group = "";
 13 |     for ($x = 0; $x < 100 && @A>0; $x++) {
 14 |       $fname = pop @A;
 15 |       $group .= "$fname ";
 16 |     }
 17 |     push @ans, $group;
 18 |   }
 19 |   return @ans;
 20 | }
 21 | 
 22 | sub parse_accounting_entry {
 23 |   $entry= shift @_;
 24 | 
 25 |   @elems = split " ", $entry;
 26 |   
 27 |   $time=undef;
 28 |   $threads=undef;
 29 |   foreach $elem (@elems) {
 30 |     if ( $elem=~ m/time=(\d+)/ ) {
 31 |       $elem =~ s/time=(\d+)/$1/;
 32 |       $time = $elem;
 33 |     } elsif ( $elem=~ m/threads=(\d+)/ ) {
 34 |       $elem =~ s/threads=(\d+)/$1/g;
 35 |       $threads = $elem;
 36 |     } else {
 37 |       die "Unknown entry \"$elem\" when parsing \"$entry\" \n";
 38 |     }
 39 |   }
 40 | 
 41 |   if (defined($time) and defined($threads) ) {
 42 |     return ($time, $threads);
 43 |   } else {
 44 |     die "The accounting entry \"$entry\" did not contain all necessary attributes";
 45 |   }
 46 | }
 47 | 
 48 | foreach $dir (@ARGV) {
 49 | 
 50 |   #$dir = $ARGV[0];
 51 |   print $dir
 52 | 
 53 |   ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" ;
 54 | 
 55 |   $dir =~ s:/$::; # Remove trailing slash.
 56 | 
 57 | 
 58 |   # Group the files into categories where all have the same base-name.
 59 |   foreach $f (glob ("$dir/*.log")) {
 60 |     $f_category = $f;
 61 |     # do next expression twice; s///g doesn't work as they overlap.
 62 |     $f_category =~ s:\.\d+\.(?!\d+):.*.:;
 63 |     #$f_category =~ s:\.\d+\.:.*.:;
 64 |     $fmap{$f_category} .= " $f";
 65 |   }
 66 | }
 67 | 
 68 | foreach $c (sort (keys %fmap) ) {
 69 |   $n = 0;
 70 |   foreach $fgroup (split_hundreds($fmap{$c})) {
 71 |     $n += `grep -w WARNING $fgroup | wc -l`;
 72 |   }
 73 |   if ($n != 0) {
 74 |     print "$n warnings in $c\n"
 75 |   }
 76 | }
 77 | foreach $c (sort (keys %fmap)) {
 78 |   $n = 0;
 79 |   foreach $fgroup (split_hundreds($fmap{$c})) {
 80 |     $n += `grep -w ERROR $fgroup | wc -l`;
 81 |   }
 82 |   if ($n != 0) {
 83 |     print "$n errors in $c\n"
 84 |   }
 85 | }
 86 | 
 87 | $supertotal_cpu_time=0.0;
 88 | $supertotal_clock_time=0.0;
 89 | $supertotal_threads=0.0;
 90 | 
 91 | foreach $c (sort (keys %fmap)) {
 92 |   $n = 0;
 93 | 
 94 |   $total_cpu_time=0.0;
 95 |   $total_clock_time=0.0;
 96 |   $total_threads=0.0;
 97 |   foreach $fgroup (split_hundreds($fmap{$c})) {
 98 |     $lines=`grep -a "# Accounting: " $fgroup |sed 's/.* Accounting: *//g'`;
 99 |     
100 |     #print $lines ."\n";
101 | 
102 |     @entries = split "\n", $lines;
103 | 
104 |     foreach $line (@entries) {
105 |       $time, $threads = parse_accounting_entry($line);
106 | 
107 |       $total_cpu_time += $time * $threads;
108 |       $total_threads += $threads;
109 |       if ( $time > $total_clock_time ) {
110 |         $total_clock_time = $time;
111 |       }
112 |     }
113 |   }
114 |   print "total_cpu_time=$total_cpu_time clock_time=$total_clock_time total_threads=$total_threads group=$c\n";
115 | 
116 |   $supertotal_cpu_time += $total_cpu_time;
117 |   $supertotal_clock_time += $total_clock_time;
118 |   $supertotal_threads += $total_threads;
119 | }
120 | print "total_cpu_time=$supertotal_cpu_time clock_time=$supertotal_clock_time total_threads=$supertotal_threads group=all\n";
121 | 
122 | 


--------------------------------------------------------------------------------
/utils/summarize_warnings.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | 
 5 |  @ARGV != 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;
 6 | 
 7 | $dir = $ARGV[0];
 8 | 
 9 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" && exit 1;
10 | 
11 | $dir =~ s:/$::; # Remove trailing slash.
12 | 
13 | 
14 | # Group the files into categories where all have the same base-name.
15 | foreach $f (glob ("$dir/*.log")) {
16 |   $f_category = $f;
17 |   # do next expression twice; s///g doesn't work as they overlap.
18 |   $f_category =~ s:\.\d+\.:.*.:;
19 |   $f_category =~ s:\.\d+\.:.*.:;
20 |   $fmap{$f_category} .= " $f";
21 | }
22 | 
23 | sub split_hundreds { # split list of filenames into groups of 100.
24 |   my $names = shift @_;
25 |   my @A = split(" ", $names);
26 |   my @ans = ();
27 |   while (@A > 0) {
28 |     my $group = "";
29 |     for ($x = 0; $x < 100 && @A>0; $x++) {
30 |       $fname = pop @A;
31 |       $group .= "$fname ";
32 |     }
33 |     push @ans, $group;
34 |   }
35 |   return @ans;
36 | }
37 | 
38 | foreach $c (keys %fmap) {
39 |   $n = 0;
40 |   foreach $fgroup (split_hundreds($fmap{$c})) {
41 |     $n += `grep -w WARNING $fgroup | wc -l`;
42 |   }
43 |   if ($n != 0) {
44 |     print "$n warnings in $c\n"
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/utils/sym2int.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
  3 | 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #  http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
 14 | # See the Apache 2 License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | 
 18 | $ignore_oov = 0;
 19 | 
 20 | for($x = 0; $x < 2; $x++) {
 21 |   if ($ARGV[0] eq "--map-oov") {
 22 |     shift @ARGV; 
 23 |     $map_oov = shift @ARGV;
 24 |     if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
 25 |       # disallow '-f', the empty string and anything ending in words.txt as the
 26 |       # OOV symbol because these are likely command-line errors.
 27 |       die "the --map-oov option requires an argument";
 28 |     }
 29 |   }
 30 |   if ($ARGV[0] eq "-f") {
 31 |     shift @ARGV;
 32 |     $field_spec = shift @ARGV; 
 33 |     if ($field_spec =~ m/^\d+$/) {
 34 |       $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
 35 |     }
 36 |     if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
 37 |       if ($1 ne "") {
 38 |         $field_begin = $1 - 1;  # Change to zero-based indexing.
 39 |       }
 40 |       if ($2 ne "") {
 41 |         $field_end = $2 - 1;    # Change to zero-based indexing.
 42 |       }
 43 |     }
 44 |     if (!defined $field_begin && !defined $field_end) {
 45 |       die "Bad argument to -f option: $field_spec"; 
 46 |     }
 47 |   }
 48 | }
 49 | 
 50 | $symtab = shift @ARGV;
 51 | if (!defined $symtab) {
 52 |   print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
 53 |     "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
 54 |       "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
 55 | }
 56 | open(F, "<$symtab") || die "Error opening symbol table file $symtab";
 57 | while(<F>) {
 58 |     @A = split(" ", $_);
 59 |     @A == 2 || die "bad line in symbol table file: $_";
 60 |     $sym2int{$A[0]} = $A[1] + 0;
 61 | }
 62 | 
 63 | if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
 64 |   if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
 65 |   $map_oov = $sym2int{$map_oov};
 66 | }
 67 | 
 68 | $num_warning = 0;
 69 | $max_warning = 20;
 70 | 
 71 | while (<>) {
 72 |   @A = split(" ", $_);
 73 |   @B = ();
 74 |   for ($n = 0; $n < @A; $n++) {
 75 |     $a = $A[$n];
 76 |     if ( (!defined $field_begin || $n >= $field_begin)
 77 |          && (!defined $field_end || $n <= $field_end)) {
 78 |       $i = $sym2int{$a};
 79 |       if (!defined ($i)) {
 80 |         if (defined $map_oov) {
 81 |           if ($num_warning++ < $max_warning) {
 82 |             print STDERR "sym2int.pl: replacing $a with $map_oov\n";
 83 |             if ($num_warning == $max_warning) {
 84 |               print STDERR "sym2int.pl: not warning for OOVs any more times\n";
 85 |             }
 86 |           }
 87 |           $i = $map_oov;
 88 |         } else {
 89 |           $pos = $n+1;
 90 |           die "sym2int.pl: undefined symbol $a (in position $pos)\n";
 91 |         }
 92 |       }
 93 |       $a = $i;
 94 |     }
 95 |     push @B, $a;
 96 |   }
 97 |   print join(" ", @B);
 98 |   print "\n";
 99 | }
100 | if ($num_warning > 0) {
101 |   print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n"; 
102 | }
103 | 
104 | exit(0);
105 | 


--------------------------------------------------------------------------------
/utils/utt2spk_to_spk2utt.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # converts an utt2spk file to a spk2utt file.
18 | # Takes input from the stdin or from a file argument;
19 | # output goes to the standard out.
20 | 
21 | if ( @ARGV > 1 ) {
22 |     die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
23 | }
24 | 
25 | while(<>){ 
26 |     @A = split(" ", $_);
27 |     @A == 2 || die "Invalid line in utt2spk file: $_";
28 |     ($u,$s) = @A;
29 |     if(!$seen_spk{$s}) {
30 |         $seen_spk{$s} = 1;
31 |         push @spklist, $s;
32 |     }
33 |     push (@{$spk_hash{$s}}, "$u");
34 | }
35 | foreach $s (@spklist) {
36 |     $l = join(' ',@{$spk_hash{$s}});
37 |     print "$s $l\n";
38 | }
39 | 


--------------------------------------------------------------------------------
/waves_yesno/0_0_0_0_1_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_0_0_1_1_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_0_1_0_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_0_1_0_0_0_1.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_0_1_0_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_0_1_0_1_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_0_0_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_0_0_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_0_0_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_0_1_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_0_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_0_1_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_0_1_0_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_1_0_0_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_0_1_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_1_0_0_1.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_0_1_0_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_0_1_0_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_0_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_0_0_0_1.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_0_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_0_1_0_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_0_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_0_1_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_0_1_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_1_0_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_1_0_0_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_1_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_1_0_0_1.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_1_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_1_1_0_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_0_1_1_1_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_0_1_1_1_1_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_0_0_0_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_0_0_1_0_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_0_0_0_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_0_0_1_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_0_0_1_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_0_1_0_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_0_0_1_0_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_0_1_0_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_0_1_0_0_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_1_0_0_0_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_0_1_1_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_1_1_0_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_0_1_1_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_0_1_1_1_0_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_1_0_0_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_0_0_1_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_1_0_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_0_0_1_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_1_1_0_0_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_0_0_0_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_1_1_0_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_0_0_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_1_1_0_1_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_0_1_0_1.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_1_1_1_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_1_0_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/0_1_1_1_1_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/0_1_1_1_1_1_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_0_0_0_0_0_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_0_0_0_0_0.wav


--------------------------------------------------------------------------------
/waves_yesno/1_0_0_0_0_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_0_0_0_0_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_0_0_0_0_0_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_0_0_0_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_0_0_0_1_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_0_1_0_0_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_0_0_1_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_0_1_0_1_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_0_1_0_1_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_1_0_1_0_0_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_0_1_1_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_1_1_0_1_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_0_1_1_1_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_1_1_1_0_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/1_0_1_1_1_1_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_0_1_1_1_1_0_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_0_0_0_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_0_0_0_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_0_0_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_0_1_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_0_0_1_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_1_0_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_0_0_1_0_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_1_0_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_0_0_1_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_0_1_1_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_0_1_0_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_0_1_0_0.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_0_1_0_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_0_1_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_0_1_1_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_1_0_0_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_0_1_1_0_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_1_0_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_0_1_1_1_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_0_1_1_1_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_1_0_0_0_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_0_0_0_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_1_0_0_1_0_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_0_1_0_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_1_0_0_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_0_1_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_1_0_1_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_1_0_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_1_0_1_0_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_0_1_0_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_1_1_0_0_1_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_0_0_1_0.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_1_1_0_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_0_1_0_0.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_1_1_1_0_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_1_0_0_0.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_1_1_1_1_0_0.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_1_1_0_0.wav


--------------------------------------------------------------------------------
/waves_yesno/1_1_1_1_1_1_1_1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/keighrim/kaldi-yesno-tutorial/f4e2910de0d3b58f5f3b5cd10a8e57712c60b34a/waves_yesno/1_1_1_1_1_1_1_1.wav


--------------------------------------------------------------------------------
/waves_yesno/README:
--------------------------------------------------------------------------------
 1 | This dataset can be found at http://openslr.org/resources/1/waves_yesno.tar.gz
 2 | 
 3 | This dataset was created for the Kaldi project (see kaldi.sf.net),
 4 | by a contributor who prefes to remain anonymous.  The main point of the dataset is
 5 | to provide a way to test out the Kaldi scripts for free.
 6 | 
 7 | The archive "waves_yesno.tar.gz" contains 60 .wav files, sampled at 8 kHz.  All were recorded
 8 | by the same male speaker, in English (although the individual is not a native speaker).
 9 | In each file, the individual says 8 words; each word is either "yes" or "no", so each
10 | file is a random sequence of 8 yes-es or noes.  There is no separate transcription provided; the
11 | sequence is encoded in the filename, with 1 for yes and 0 for no, for instance:
12 | 
13 | # tar -xvzf waves_yesno.tar.gz
14 | waves_yesno/1_0_1_1_1_0_1_0.wav
15 | waves_yesno/0_1_1_0_0_1_1_0.wav
16 | ...
17 | 


--------------------------------------------------------------------------------