├── README.md
├── chime_eval_result.log
├── cmd.sh
├── conf
    ├── fbank.conf
    ├── fbank_16k.conf
    ├── fbank_16k_64ms_16ms.conf
    ├── fbank_8k.conf
    ├── mfcc.conf
    ├── mfcc_hires.conf
    ├── mfcc_hires_16k.conf
    ├── mfcc_hires_8k.conf
    ├── mfcc_sad.conf
    ├── online_cmvn.conf
    └── spectrogram_16k_64ms_16ms.conf
├── data
    ├── chime7_eval_all_CH
    │   ├── cmn_slide_fbank_htk.list
    │   ├── f1.rttm
    │   └── oracle.rttm
    └── dipco_dev_all_CH
    │   ├── cmn_slide_fbank_htk.list
    │   ├── f1.rttm
    │   └── oracle.rttm
├── dipco_dev_result.log
├── doc
    ├── CHiME_2023_DASR_wang.pdf
    ├── ICASS2024.pdf
    ├── NN_v3.jpg
    └── results.jpg
├── embedding_raw
    └── voxceleb
    │   ├── cluster_center_128.npy
    │   ├── cluster_center_256.npy
    │   ├── cluster_center_64.npy
    │   ├── cluster_center_64.txt
    │   ├── cluster_label_128.txt
    │   ├── cluster_label_256.txt
    │   ├── cluster_label_64.txt
    │   ├── speakers.txt
    │   ├── xvector_cluster_center_128.npy
    │   ├── xvector_cluster_center_256.npy
    │   └── xvector_cluster_center_64.npy
├── exp
    ├── S2S
    │   └── Batchsize20_4speakers_Segment800s_Mixup0.5_CHiME6MAMSELabel_SimuCHiME6_Mixer6MAMSELabel_SimuMixer6_SimuDipcoDevNoise_all_data_512_all0Dropout_6layers_weight_input_DIM
    │   │   ├── MULTI_MAM_SE_S2S_model.model6_chime7_eval_all_CH_f1_fusion
    │   │       ├── rttm_th0.35
    │   │       ├── rttm_th0.35_pp
    │   │       ├── rttm_th0.40
    │   │       ├── rttm_th0.40_pp
    │   │       ├── rttm_th0.45
    │   │       ├── rttm_th0.45_pp
    │   │       ├── rttm_th0.50
    │   │       ├── rttm_th0.50_pp
    │   │       ├── rttm_th0.55
    │   │       ├── rttm_th0.55_pp
    │   │       ├── rttm_th0.60
    │   │       ├── rttm_th0.60_pp
    │   │       ├── rttm_th0.65
    │   │       └── rttm_th0.65_pp
    │   │   └── MULTI_MAM_SE_S2S_model.model6_dipco_dev_all_CH_f1_fusion
    │   │       ├── rttm_th0.35
    │   │       ├── rttm_th0.35_pp
    │   │       ├── rttm_th0.40
    │   │       ├── rttm_th0.40_pp
    │   │       ├── rttm_th0.45
    │   │       ├── rttm_th0.45_pp
    │   │       ├── rttm_th0.50
    │   │       ├── rttm_th0.50_pp
    │   │       ├── rttm_th0.55
    │   │       ├── rttm_th0.55_pp
    │   │       ├── rttm_th0.60
    │   │       ├── rttm_th0.60_pp
    │   │       ├── rttm_th0.65
    │   │       └── rttm_th0.65_pp
    └── nnet3_recipe_ivector
    │   ├── extractor
    │       ├── 10.ie
    │       ├── final.dubm
    │       ├── final.ie
    │       ├── final.ie.id
    │       ├── final.mat
    │       ├── global_cmvn.stats
    │       ├── num_jobs
    │       ├── online_cmvn.conf
    │       └── splice_opts
    │   ├── ivectors_chime7_eval_all_CH_f1
    │       └── ivectors_spk.txt
    │   ├── ivectors_chime7_train_array_Oracle
    │       └── ivectors_spk.txt
    │   └── ivectors_dipco_dev_all_CH_f1
    │       └── ivectors_spk.txt
├── local
    ├── HTK.py
    ├── __pycache__
    │   ├── HTK.cpython-39.pyc
    │   ├── config.cpython-310.pyc
    │   ├── config.cpython-39.pyc
    │   ├── conformer2.cpython-310.pyc
    │   ├── conformer2.cpython-39.pyc
    │   ├── model_S2S_weight_input_DIM.cpython-39.pyc
    │   ├── reader_s2s.cpython-39.pyc
    │   ├── utils.cpython-39.pyc
    │   └── utils_s2s.cpython-39.pyc
    ├── analysis_diarization.sh
    ├── config.py
    ├── conformer2.py
    ├── decode_MULTI_SE_MA_MSE_S2S_CH_fusion.py
    ├── decode_MULTI_SE_MA_MSE_S2S_CH_fusion_models_fusion.py
    ├── decode_S2S_model.sh
    ├── decode_S2S_models_fusion.sh
    ├── extract_feature.sh
    ├── extract_ivector_session_level.sh
    ├── loss_function.py
    ├── md-eval-22.pl
    ├── model_S2S_weight_input_DIM.py
    ├── postprocessing_s2s.py
    ├── prepare_ivector_extractor_dir_with_rttm.py
    ├── reader_s2s.py
    ├── reader_sc_s2s.py
    ├── rttm_filter_with_vad.py
    ├── run_MAMSE_S2S_chime7_ws_input_DIM.py
    ├── split_long_segment_s2s.py
    ├── train_Pretrain_DDP_S2S.py
    ├── utils.py
    └── utils_s2s.py
├── path.sh
├── requirements.txt
├── run_decode.sh
├── steps
    ├── align_basis_fmllr.sh
    ├── align_basis_fmllr_lats.sh
    ├── align_fmllr.sh
    ├── align_fmllr_lats.sh
    ├── align_lvtln.sh
    ├── align_raw_fmllr.sh
    ├── align_sgmm2.sh
    ├── align_si.sh
    ├── append_feats.sh
    ├── best_path_weights.sh
    ├── cleanup
    │   ├── clean_and_segment_data.sh
    │   ├── clean_and_segment_data_nnet3.sh
    │   ├── combine_short_segments.py
    │   ├── create_segments_from_ctm.pl
    │   ├── debug_lexicon.sh
    │   ├── decode_fmllr_segmentation.sh
    │   ├── decode_segmentation.sh
    │   ├── decode_segmentation_nnet3.sh
    │   ├── find_bad_utts.sh
    │   ├── find_bad_utts_nnet.sh
    │   ├── internal
    │   │   ├── align_ctm_ref.py
    │   │   ├── compute_tf_idf.py
    │   │   ├── ctm_to_text.pl
    │   │   ├── get_ctm_edits.py
    │   │   ├── get_non_scored_words.py
    │   │   ├── get_pron_stats.py
    │   │   ├── make_one_biased_lm.py
    │   │   ├── modify_ctm_edits.py
    │   │   ├── resolve_ctm_edits_overlaps.py
    │   │   ├── retrieve_similar_docs.py
    │   │   ├── segment_ctm_edits.py
    │   │   ├── segment_ctm_edits_mild.py
    │   │   ├── split_text_into_docs.pl
    │   │   ├── stitch_documents.py
    │   │   ├── taint_ctm_edits.py
    │   │   └── tf_idf.py
    │   ├── lattice_oracle_align.sh
    │   ├── make_biased_lm_graphs.sh
    │   ├── make_biased_lms.py
    │   ├── make_segmentation_data_dir.sh
    │   ├── make_segmentation_graph.sh
    │   ├── make_utterance_fsts.pl
    │   ├── make_utterance_graph.sh
    │   ├── segment_long_utterances.sh
    │   ├── segment_long_utterances_nnet3.sh
    │   └── split_long_utterance.sh
    ├── combine_ali_dirs.sh
    ├── combine_lat_dirs.sh
    ├── combine_trans_dirs.sh
    ├── compare_alignments.sh
    ├── compute_cmvn_stats.sh
    ├── compute_vad_decision.sh
    ├── conf
    │   ├── append_eval_to_ctm.py
    │   ├── append_prf_to_ctm.py
    │   ├── apply_calibration.sh
    │   ├── convert_ctm_to_tra.py
    │   ├── get_ctm_conf.sh
    │   ├── lattice_depth_per_frame.sh
    │   ├── parse_arpa_unigrams.py
    │   ├── prepare_calibration_data.py
    │   ├── prepare_word_categories.py
    │   └── train_calibration.sh
    ├── copy_ali_dir.sh
    ├── copy_lat_dir.sh
    ├── copy_trans_dir.sh
    ├── data
    │   ├── augment_data_dir.py
    │   ├── data_dir_manipulation_lib.py
    │   ├── make_musan.py
    │   ├── make_musan.sh
    │   └── reverberate_data_dir.py
    ├── decode.sh
    ├── decode_basis_fmllr.sh
    ├── decode_biglm.sh
    ├── decode_combine.sh
    ├── decode_fmllr.sh
    ├── decode_fmllr_extra.sh
    ├── decode_fmmi.sh
    ├── decode_fromlats.sh
    ├── decode_lvtln.sh
    ├── decode_nnet.sh
    ├── decode_nolats.sh
    ├── decode_raw_fmllr.sh
    ├── decode_sgmm2.sh
    ├── decode_sgmm2_fromlats.sh
    ├── decode_sgmm2_rescore.sh
    ├── decode_sgmm2_rescore_project.sh
    ├── decode_si.sh
    ├── decode_with_map.sh
    ├── diagnostic
    │   ├── analyze_alignments.sh
    │   ├── analyze_lats.sh
    │   ├── analyze_lattice_depth_stats.py
    │   └── analyze_phone_length_stats.py
    ├── dict
    │   ├── apply_g2p.sh
    │   ├── apply_g2p_phonetisaurus.sh
    │   ├── apply_lexicon_edits.py
    │   ├── get_pron_stats.py
    │   ├── internal
    │   │   ├── get_subsegments.py
    │   │   ├── prune_pron_candidates.py
    │   │   └── sum_arc_info.py
    │   ├── learn_lexicon_bayesian.sh
    │   ├── learn_lexicon_greedy.sh
    │   ├── merge_learned_lexicons.py
    │   ├── prons_to_lexicon.py
    │   ├── prune_pron_candidates.py
    │   ├── select_prons_bayesian.py
    │   ├── select_prons_greedy.py
    │   ├── train_g2p.sh
    │   └── train_g2p_phonetisaurus.sh
    ├── get_ctm.sh
    ├── get_ctm_conf.sh
    ├── get_ctm_conf_fast.sh
    ├── get_ctm_fast.sh
    ├── get_fmllr_basis.sh
    ├── get_lexicon_probs.sh
    ├── get_prons.sh
    ├── get_train_ctm.sh
    ├── info
    │   ├── chain_dir_info.pl
    │   ├── gmm_dir_info.pl
    │   ├── nnet2_dir_info.pl
    │   ├── nnet3_dir_info.pl
    │   └── nnet3_disc_dir_info.pl
    ├── libs
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   └── common.cpython-38.pyc
    │   ├── common.py
    │   └── nnet3
    │   │   ├── __init__.py
    │   │   ├── report
    │   │       ├── __init__.py
    │   │       └── log_parse.py
    │   │   ├── train
    │   │       ├── __init__.py
    │   │       ├── chain_objf
    │   │       │   ├── __init__.py
    │   │       │   └── acoustic_model.py
    │   │       ├── common.py
    │   │       ├── dropout_schedule.py
    │   │       └── frame_level_objf
    │   │       │   ├── __init__.py
    │   │       │   ├── acoustic_model.py
    │   │       │   ├── common.py
    │   │       │   └── raw_model.py
    │   │   └── xconfig
    │   │       ├── __init__.py
    │   │       ├── attention.py
    │   │       ├── basic_layers.py
    │   │       ├── composite_layers.py
    │   │       ├── convolution.py
    │   │       ├── gru.py
    │   │       ├── layers.py
    │   │       ├── lstm.py
    │   │       ├── parser.py
    │   │       ├── stats_layer.py
    │   │       ├── trivial_layers.py
    │   │       └── utils.py
    ├── lmrescore.sh
    ├── lmrescore_const_arpa.sh
    ├── lmrescore_const_arpa_undeterminized.sh
    ├── lmrescore_rnnlm_lat.sh
    ├── make_denlats.sh
    ├── make_denlats_sgmm2.sh
    ├── make_fbank.sh
    ├── make_fbank_pitch.sh
    ├── make_index.sh
    ├── make_mfcc.sh
    ├── make_mfcc_pitch.sh
    ├── make_mfcc_pitch_online.sh
    ├── make_phone_graph.sh
    ├── make_plp.sh
    ├── make_plp_pitch.sh
    ├── nnet
    │   ├── align.sh
    │   ├── decode.sh
    │   ├── ivector
    │   │   ├── extract_ivectors.sh
    │   │   ├── train_diag_ubm.sh
    │   │   └── train_ivector_extractor.sh
    │   ├── make_bn_feats.sh
    │   ├── make_denlats.sh
    │   ├── make_fmllr_feats.sh
    │   ├── make_fmmi_feats.sh
    │   ├── make_priors.sh
    │   ├── pretrain_dbn.sh
    │   ├── train.sh
    │   ├── train_mmi.sh
    │   ├── train_mpe.sh
    │   └── train_scheduler.sh
    ├── nnet2
    │   ├── adjust_priors.sh
    │   ├── align.sh
    │   ├── check_ivectors_compatible.sh
    │   ├── convert_lda_to_raw.sh
    │   ├── convert_nnet1_to_nnet2.sh
    │   ├── create_appended_model.sh
    │   ├── decode.sh
    │   ├── dump_bottleneck_features.sh
    │   ├── get_egs.sh
    │   ├── get_egs2.sh
    │   ├── get_egs_discriminative2.sh
    │   ├── get_ivector_id.sh
    │   ├── get_lda.sh
    │   ├── get_lda_block.sh
    │   ├── get_num_frames.sh
    │   ├── get_perturbed_feats.sh
    │   ├── make_denlats.sh
    │   ├── make_multisplice_configs.py
    │   ├── relabel_egs.sh
    │   ├── relabel_egs2.sh
    │   ├── remove_egs.sh
    │   ├── retrain_fast.sh
    │   ├── retrain_simple2.sh
    │   ├── retrain_tanh.sh
    │   ├── train_block.sh
    │   ├── train_convnet_accel2.sh
    │   ├── train_discriminative.sh
    │   ├── train_discriminative2.sh
    │   ├── train_discriminative_multilang2.sh
    │   ├── train_more.sh
    │   ├── train_more2.sh
    │   ├── train_multilang2.sh
    │   ├── train_multisplice_accel2.sh
    │   ├── train_multisplice_ensemble.sh
    │   ├── train_pnorm.sh
    │   ├── train_pnorm_accel2.sh
    │   ├── train_pnorm_bottleneck_fast.sh
    │   ├── train_pnorm_ensemble.sh
    │   ├── train_pnorm_fast.sh
    │   ├── train_pnorm_multisplice.sh
    │   ├── train_pnorm_multisplice2.sh
    │   ├── train_pnorm_simple.sh
    │   ├── train_pnorm_simple2.sh
    │   ├── train_tanh.sh
    │   ├── train_tanh_bottleneck.sh
    │   ├── train_tanh_fast.sh
    │   └── update_nnet.sh
    ├── nnet3
    │   ├── adjust_priors.sh
    │   ├── align.sh
    │   ├── align_lats.sh
    │   ├── chain
    │   │   ├── align_lats.sh
    │   │   ├── build_tree.sh
    │   │   ├── build_tree_multiple_sources.sh
    │   │   ├── e2e
    │   │   │   ├── README.txt
    │   │   │   ├── compute_biphone_stats.py
    │   │   │   ├── get_egs_e2e.sh
    │   │   │   ├── prepare_e2e.sh
    │   │   │   ├── text_to_phones.py
    │   │   │   └── train_e2e.py
    │   │   ├── gen_topo.pl
    │   │   ├── gen_topo.py
    │   │   ├── gen_topo2.py
    │   │   ├── gen_topo3.py
    │   │   ├── gen_topo4.py
    │   │   ├── gen_topo5.py
    │   │   ├── gen_topo_orig.py
    │   │   ├── get_egs.sh
    │   │   ├── get_model_context.sh
    │   │   ├── get_phone_post.sh
    │   │   ├── make_weighted_den_fst.sh
    │   │   ├── multilingual
    │   │   │   └── combine_egs.sh
    │   │   ├── train.py
    │   │   └── train_tdnn.sh
    │   ├── chain2
    │   │   ├── combine_egs.sh
    │   │   ├── compute_preconditioning_matrix.sh
    │   │   ├── get_raw_egs.sh
    │   │   ├── internal
    │   │   │   ├── get_best_model.sh
    │   │   │   └── get_train_schedule.py
    │   │   ├── process_egs.sh
    │   │   ├── randomize_egs.sh
    │   │   ├── train.sh
    │   │   ├── validate_processed_egs.sh
    │   │   ├── validate_randomized_egs.sh
    │   │   └── validate_raw_egs.sh
    │   ├── components.py
    │   ├── compute_output.sh
    │   ├── convert_nnet2_to_nnet3.py
    │   ├── decode.sh
    │   ├── decode_grammar.sh
    │   ├── decode_lookahead.sh
    │   ├── decode_looped.sh
    │   ├── decode_score_fusion.sh
    │   ├── decode_semisup.sh
    │   ├── dot
    │   │   ├── descriptor_parser.py
    │   │   └── nnet3_to_dot.py
    │   ├── get_degs.sh
    │   ├── get_egs.sh
    │   ├── get_egs_discriminative.sh
    │   ├── get_egs_targets.sh
    │   ├── get_saturation.pl
    │   ├── get_successful_models.py
    │   ├── lstm
    │   │   ├── make_configs.py
    │   │   └── train.sh
    │   ├── make_bottleneck_features.sh
    │   ├── make_denlats.sh
    │   ├── make_tdnn_configs.py
    │   ├── multilingual
    │   │   ├── allocate_multilingual_examples.py
    │   │   └── combine_egs.sh
    │   ├── nnet3_to_dot.sh
    │   ├── remove_egs.sh
    │   ├── report
    │   │   ├── convert_model.py
    │   │   ├── generate_plots.py
    │   │   └── summarize_compute_debug_timing.py
    │   ├── tdnn
    │   │   ├── make_configs.py
    │   │   ├── train.sh
    │   │   └── train_raw_nnet.sh
    │   ├── train_discriminative.sh
    │   ├── train_dnn.py
    │   ├── train_raw_dnn.py
    │   ├── train_raw_rnn.py
    │   ├── train_rnn.py
    │   ├── train_tdnn.sh
    │   ├── xconfig_to_config.py
    │   └── xconfig_to_configs.py
    ├── online
    │   ├── decode.sh
    │   ├── nnet2
    │   │   ├── align.sh
    │   │   ├── copy_data_dir.sh
    │   │   ├── copy_ivector_dir.sh
    │   │   ├── decode.sh
    │   │   ├── dump_nnet_activations.sh
    │   │   ├── extract_ivectors.sh
    │   │   ├── extract_ivectors_online.sh
    │   │   ├── get_egs.sh
    │   │   ├── get_egs2.sh
    │   │   ├── get_egs_discriminative2.sh
    │   │   ├── get_pca_transform.sh
    │   │   ├── make_denlats.sh
    │   │   ├── prepare_online_decoding.sh
    │   │   ├── prepare_online_decoding_retrain.sh
    │   │   ├── prepare_online_decoding_transfer.sh
    │   │   ├── train_diag_ubm.sh
    │   │   └── train_ivector_extractor.sh
    │   ├── nnet3
    │   │   ├── decode.sh
    │   │   ├── decode_wake_word.sh
    │   │   └── prepare_online_decoding.sh
    │   └── prepare_online_decoding.sh
    ├── oracle_wer.sh
    ├── overlap
    │   ├── get_overlap_segments.py
    │   ├── get_overlap_targets.py
    │   ├── output_to_rttm.py
    │   ├── post_process_output.sh
    │   └── prepare_overlap_graph.py
    ├── paste_feats.sh
    ├── pytorchnn
    │   ├── check_py.py
    │   ├── compute_sentence_scores.py
    │   ├── data.py
    │   ├── lmrescore_nbest_pytorchnn.sh
    │   ├── model.py
    │   └── train.py
    ├── resegment_data.sh
    ├── resegment_text.sh
    ├── rnnlmrescore.sh
    ├── score_kaldi.sh
    ├── score_kaldi_compare.sh
    ├── scoring
    │   ├── score_kaldi_cer.sh
    │   ├── score_kaldi_compare.sh
    │   └── score_kaldi_wer.sh
    ├── search_index.sh
    ├── segmentation
    │   ├── ali_to_targets.sh
    │   ├── combine_targets_dirs.sh
    │   ├── convert_targets_dir_to_whole_recording.sh
    │   ├── convert_utt2spk_and_segments_to_rttm.py
    │   ├── copy_targets_dir.sh
    │   ├── decode_sad.sh
    │   ├── detect_speech_activity.sh
    │   ├── evaluate_segmentation.pl
    │   ├── get_targets_for_out_of_segments.sh
    │   ├── internal
    │   │   ├── arc_info_to_targets.py
    │   │   ├── find_oov_phone.py
    │   │   ├── get_default_targets_for_out_of_segments.py
    │   │   ├── get_transform_probs_mat.py
    │   │   ├── merge_segment_targets_to_recording.py
    │   │   ├── merge_targets.py
    │   │   ├── prepare_sad_graph.py
    │   │   ├── resample_targets.py
    │   │   ├── sad_to_segments.py
    │   │   └── verify_phones_list.py
    │   ├── lats_to_targets.sh
    │   ├── merge_targets_dirs.sh
    │   ├── post_process_sad_to_segments.sh
    │   ├── prepare_targets_gmm.sh
    │   ├── resample_targets_dir.sh
    │   └── validate_targets_dir.sh
    ├── select_feats.sh
    ├── shift_feats.sh
    ├── subset_ali_dir.sh
    ├── tandem
    │   ├── align_fmllr.sh
    │   ├── align_sgmm2.sh
    │   ├── align_si.sh
    │   ├── decode.sh
    │   ├── decode_fmllr.sh
    │   ├── decode_sgmm2.sh
    │   ├── decode_si.sh
    │   ├── make_denlats.sh
    │   ├── make_denlats_sgmm2.sh
    │   ├── mk_aslf_lda_mllt.sh
    │   ├── mk_aslf_sgmm2.sh
    │   ├── train_deltas.sh
    │   ├── train_lda_mllt.sh
    │   ├── train_mllt.sh
    │   ├── train_mmi.sh
    │   ├── train_mmi_sgmm2.sh
    │   ├── train_mono.sh
    │   ├── train_sat.sh
    │   ├── train_sgmm2.sh
    │   └── train_ubm.sh
    ├── tfrnnlm
    │   ├── check_py.py
    │   ├── check_tensorflow_installed.sh
    │   ├── lmrescore_rnnlm_lat.sh
    │   ├── lmrescore_rnnlm_lat_pruned.sh
    │   ├── lstm.py
    │   ├── lstm_fast.py
    │   ├── reader.py
    │   └── vanilla_rnnlm.py
    ├── train_deltas.sh
    ├── train_diag_ubm.sh
    ├── train_lda_mllt.sh
    ├── train_lvtln.sh
    ├── train_map.sh
    ├── train_mmi.sh
    ├── train_mmi_fmmi.sh
    ├── train_mmi_fmmi_indirect.sh
    ├── train_mmi_sgmm2.sh
    ├── train_mono.sh
    ├── train_mpe.sh
    ├── train_nnet.sh
    ├── train_quick.sh
    ├── train_raw_sat.sh
    ├── train_sat.sh
    ├── train_sat_basis.sh
    ├── train_segmenter.sh
    ├── train_sgmm2.sh
    ├── train_sgmm2_group.sh
    ├── train_smbr.sh
    ├── train_ubm.sh
    └── word_align_lattices.sh
└── utils
    ├── add_disambig.pl
    ├── add_lex_disambig.pl
    ├── analyze_segments.pl
    ├── apply_map.pl
    ├── best_wer.sh
    ├── build_const_arpa_lm.sh
    ├── build_kenlm_model_from_arpa.sh
    ├── combine_data.sh
    ├── convert_ctm.pl
    ├── convert_slf.pl
    ├── convert_slf_parallel.sh
    ├── copy_data_dir.sh
    ├── create_data_link.pl
    ├── create_split_dir.pl
    ├── ctm
        ├── convert_ctm.pl
        ├── fix_ctm.sh
        └── resolve_ctm_overlaps.py
    ├── data
        ├── combine_data.sh
        ├── combine_short_segments.sh
        ├── convert_data_dir_to_whole.sh
        ├── copy_data_dir.sh
        ├── extend_segment_times.py
        ├── extract_wav_segments_data_dir.sh
        ├── fix_data_dir.sh
        ├── fix_subsegment_feats.pl
        ├── get_allowed_durations.py
        ├── get_frame_shift.sh
        ├── get_num_frames.sh
        ├── get_reco2dur.sh
        ├── get_reco2utt_for_data.sh
        ├── get_segments_for_data.sh
        ├── get_uniform_subsegments.py
        ├── get_utt2dur.sh
        ├── get_utt2num_frames.sh
        ├── internal
        │   ├── choose_utts_to_combine.py
        │   ├── combine_segments_to_recording.py
        │   ├── modify_speaker_info.py
        │   └── perturb_volume.py
        ├── limit_feature_dim.sh
        ├── modify_speaker_info.sh
        ├── modify_speaker_info_to_recording.sh
        ├── normalize_data_range.pl
        ├── perturb_data_dir_speed.sh
        ├── perturb_data_dir_speed_3way.sh
        ├── perturb_data_dir_volume.sh
        ├── perturb_speed_to_allowed_lengths.py
        ├── remove_dup_utts.sh
        ├── resample_data_dir.sh
        ├── shift_and_combine_feats.sh
        ├── shift_feats.sh
        ├── split_data.sh
        ├── subsegment_data_dir.sh
        ├── subset_data_dir.sh
        └── validate_data_dir.sh
    ├── dict_dir_add_pronprobs.sh
    ├── eps2disambig.pl
    ├── filt.py
    ├── filter_scp.pl
    ├── filter_scps.pl
    ├── find_arpa_oovs.pl
    ├── fix_ctm.sh
    ├── fix_data_dir.sh
    ├── format_lm.sh
    ├── format_lm_sri.sh
    ├── gen_topo.pl
    ├── int2sym.pl
    ├── kwslist_post_process.pl
    ├── lang
        ├── add_lex_disambig.pl
        ├── add_unigrams_arpa.pl
        ├── adjust_unk_arpa.pl
        ├── adjust_unk_graph.sh
        ├── bpe
        │   ├── add_final_optional_silence.sh
        │   ├── apply_bpe.py
        │   ├── bidi.py
        │   ├── learn_bpe.py
        │   ├── prepend_words.py
        │   └── reverse.py
        ├── check_g_properties.pl
        ├── check_phones_compatible.sh
        ├── compute_sentence_probs_arpa.py
        ├── extend_lang.sh
        ├── get_word_position_phone_map.pl
        ├── grammar
        │   ├── augment_phones_txt.py
        │   └── augment_words_txt.py
        ├── internal
        │   ├── apply_unk_lm.sh
        │   ├── arpa2fst_constrained.py
        │   └── modify_unk_pron.py
        ├── limit_arpa_unk_history.py
        ├── make_kn_lm.py
        ├── make_lexicon_fst.py
        ├── make_lexicon_fst_silprob.py
        ├── make_phone_bigram_lang.sh
        ├── make_phone_lm.py
        ├── make_position_dependent_subword_lexicon.py
        ├── make_subword_lexicon_fst.py
        ├── make_unk_lm.sh
        ├── prepare_lang.sh
        ├── validate_disambig_sym_file.pl
        └── validate_lang.pl
    ├── ln.pl
    ├── make_absolute.sh
    ├── make_lexicon_fst.pl
    ├── make_lexicon_fst_silprob.pl
    ├── make_unigram_grammar.pl
    ├── map_arpa_lm.pl
    ├── mkgraph.sh
    ├── mkgraph_lookahead.sh
    ├── nnet-cpu
        ├── make_nnet_config.pl
        ├── make_nnet_config_block.pl
        ├── make_nnet_config_preconditioned.pl
        └── update_learning_rates.pl
    ├── nnet
        ├── gen_dct_mat.py
        ├── gen_hamm_mat.py
        ├── gen_splice.py
        ├── make_blstm_proto.py
        ├── make_cnn_proto.py
        ├── make_lstm_proto.py
        ├── make_nnet_proto.py
        └── subset_data_tr_cv.sh
    ├── nnet3
        └── convert_config_tdnn_to_affine.py
    ├── parallel
        ├── limit_num_gpus.sh
        ├── pbs.pl
        ├── queue.pl
        ├── retry.pl
        ├── run.pl
        └── slurm.pl
    ├── parse_options.sh
    ├── pbs.pl
    ├── perturb_data_dir_speed.sh
    ├── pinyin_map.pl
    ├── prepare_extended_lang.sh
    ├── prepare_lang.sh
    ├── prepare_online_nnet_dist_build.sh
    ├── queue.pl
    ├── remove_data_links.sh
    ├── remove_oovs.pl
    ├── retry.pl
    ├── reverse_arpa.py
    ├── rnnlm_compute_scores.sh
    ├── run.pl
    ├── s2eps.pl
    ├── scoring
        ├── wer_ops_details.pl
        ├── wer_per_spk_details.pl
        ├── wer_per_utt_details.pl
        └── wer_report.pl
    ├── segmentation.pl
    ├── show_lattice.sh
    ├── shuffle_list.pl
    ├── slurm.pl
    ├── spk2utt_to_utt2spk.pl
    ├── split_data.sh
    ├── split_scp.pl
    ├── ssh.pl
    ├── subset_data_dir.sh
    ├── subset_data_dir_tr_cv.sh
    ├── subset_scp.pl
    ├── subword
        ├── prepare_lang_subword.sh
        └── prepare_subword_text.sh
    ├── summarize_logs.pl
    ├── summarize_warnings.pl
    ├── sym2int.pl
    ├── train_arpa_with_kenlm.sh
    ├── utt2spk_to_spk2utt.pl
    ├── validate_data_dir.sh
    ├── validate_dict_dir.pl
    ├── validate_lang.pl
    ├── validate_text.pl
    └── write_kwslist.pl


/cmd.sh:
--------------------------------------------------------------------------------
 1 | # you can change cmd.sh depending on what type of queue you are using.
 2 | # If you have no queueing system and want to run on a local machine, you
 3 | # can change all instances 'queue.pl' to run.pl (but be careful and run
 4 | # commands one by one: most recipes will exhaust the memory on your
 5 | # machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
 6 | # with slurm.  Different queues are configured differently, with different
 7 | # queue names and different ways of specifying things like memory;
 8 | # to account for these differences you can create and edit the file
 9 | # conf/queue.conf to match your queue's configuration.  Search for
10 | # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
11 | # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
12 | 
13 | export train_cmd="run.pl"
14 | 


--------------------------------------------------------------------------------
/conf/fbank.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false
2 | --num-mel-bins=40
3 | --low-freq=20
4 | --high-freq=-400
5 | 


--------------------------------------------------------------------------------
/conf/fbank_16k.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false
2 | --num-mel-bins=40
3 | --low-freq=20
4 | --high-freq=-400
5 | 


--------------------------------------------------------------------------------
/conf/fbank_16k_64ms_16ms.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false
2 | --num-mel-bins=40
3 | --low-freq=20
4 | --high-freq=-400
5 | --frame-length=64
6 | --frame-shift=16
7 | 


--------------------------------------------------------------------------------
/conf/fbank_8k.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false
2 | --num-mel-bins=40
3 | --low-freq=20
4 | --high-freq=-400
5 | --sample-frequency=8000
6 | 


--------------------------------------------------------------------------------
/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false
2 | --sample-frequency=16000
3 | 


--------------------------------------------------------------------------------
/conf/mfcc_hires.conf:
--------------------------------------------------------------------------------
 1 | # config for high-resolution MFCC features, intended for neural network training.
 2 | # Note: we keep all cepstra, so it has the same info as filterbank features,
 3 | # but MFCC is more easily compressible (because less correlated) which is why
 4 | # we prefer this method.
 5 | --use-energy=false   # use average of log energy, not energy.
 6 | --sample-frequency=16000 
 7 | --num-mel-bins=40
 8 | --num-ceps=40
 9 | --low-freq=40
10 | --high-freq=-400
11 | 


--------------------------------------------------------------------------------
/conf/mfcc_hires_16k.conf:
--------------------------------------------------------------------------------
 1 | # config for high-resolution MFCC features, intended for neural network training.
 2 | # Note: we keep all cepstra, so it has the same info as filterbank features,
 3 | # but MFCC is more easily compressible (because less correlated) which is why
 4 | # we prefer this method.
 5 | --use-energy=false   # use average of log energy, not energy.
 6 | --sample-frequency=16000 
 7 | --num-mel-bins=40
 8 | --num-ceps=40
 9 | --low-freq=40
10 | --high-freq=-400
11 | 


--------------------------------------------------------------------------------
/conf/mfcc_hires_8k.conf:
--------------------------------------------------------------------------------
 1 | # config for high-resolution MFCC features, intended for neural network training.
 2 | # Note: we keep all cepstra, so it has the same info as filterbank features,
 3 | # but MFCC is more easily compressible (because less correlated) which is why
 4 | # we prefer this method.
 5 | --use-energy=false   # use average of log energy, not energy.
 6 | --sample-frequency=8000
 7 | --num-mel-bins=40
 8 | --num-ceps=40
 9 | --low-freq=40
10 | --high-freq=-200
11 | 


--------------------------------------------------------------------------------
/conf/mfcc_sad.conf:
--------------------------------------------------------------------------------
 1 | # config for high-resolution MFCC features, intended for SAD neural network training.
 2 | # Note: we keep all cepstra, so it has the same info as filterbank features,
 3 | # but MFCC is more easily compressible (because less correlated) which is why
 4 | # we prefer this method.
 5 | --use-energy=false   # use average of log energy, not energy.
 6 | --sample-frequency=16000
 7 | --num-mel-bins=40
 8 | --num-ceps=40
 9 | --low-freq=40
10 | --high-freq=-400
11 | 


--------------------------------------------------------------------------------
/conf/online_cmvn.conf:
--------------------------------------------------------------------------------
1 | # configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
2 | 


--------------------------------------------------------------------------------
/conf/spectrogram_16k_64ms_16ms.conf:
--------------------------------------------------------------------------------
1 | --frame-length=64
2 | --frame-shift=16
3 | 


--------------------------------------------------------------------------------
/doc/CHiME_2023_DASR_wang.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/doc/CHiME_2023_DASR_wang.pdf


--------------------------------------------------------------------------------
/doc/ICASS2024.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/doc/ICASS2024.pdf


--------------------------------------------------------------------------------
/doc/NN_v3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/doc/NN_v3.jpg


--------------------------------------------------------------------------------
/doc/results.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/doc/results.jpg


--------------------------------------------------------------------------------
/embedding_raw/voxceleb/cluster_center_128.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/embedding_raw/voxceleb/cluster_center_128.npy


--------------------------------------------------------------------------------
/embedding_raw/voxceleb/cluster_center_256.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/embedding_raw/voxceleb/cluster_center_256.npy


--------------------------------------------------------------------------------
/embedding_raw/voxceleb/cluster_center_64.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/embedding_raw/voxceleb/cluster_center_64.npy


--------------------------------------------------------------------------------
/embedding_raw/voxceleb/xvector_cluster_center_128.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/embedding_raw/voxceleb/xvector_cluster_center_128.npy


--------------------------------------------------------------------------------
/embedding_raw/voxceleb/xvector_cluster_center_256.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/embedding_raw/voxceleb/xvector_cluster_center_256.npy


--------------------------------------------------------------------------------
/embedding_raw/voxceleb/xvector_cluster_center_64.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/embedding_raw/voxceleb/xvector_cluster_center_64.npy


--------------------------------------------------------------------------------
/exp/nnet3_recipe_ivector/extractor/10.ie:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/exp/nnet3_recipe_ivector/extractor/10.ie


--------------------------------------------------------------------------------
/exp/nnet3_recipe_ivector/extractor/final.dubm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/exp/nnet3_recipe_ivector/extractor/final.dubm


--------------------------------------------------------------------------------
/exp/nnet3_recipe_ivector/extractor/final.ie:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/exp/nnet3_recipe_ivector/extractor/final.ie


--------------------------------------------------------------------------------
/exp/nnet3_recipe_ivector/extractor/final.ie.id:
--------------------------------------------------------------------------------
1 | 3acf506c5892d1f607da22efbc9e7933
2 | 


--------------------------------------------------------------------------------
/exp/nnet3_recipe_ivector/extractor/final.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/exp/nnet3_recipe_ivector/extractor/final.mat


--------------------------------------------------------------------------------
/exp/nnet3_recipe_ivector/extractor/global_cmvn.stats:
--------------------------------------------------------------------------------
1 |  [
2 |   3.06165e+11 -8.240025e+09 -2.253718e+10 3.054559e+09 -4.238454e+10 -3.825784e+10 -5.03306e+10 -2.026265e+10 -2.347276e+10 -4.240301e+09 -1.706322e+10 -6.892789e+09 -2.357631e+10 -2.507509e+09 -1.907804e+10 -6.255032e+09 -1.384562e+10 -2.108998e+09 -8.082981e+09 -7.889935e+08 -3.700739e+09 -7.177256e+07 -6.844363e+08 -3.111713e+07 1.112144e+09 -1.571209e+08 1.9715e+09 -8.712586e+08 1.764505e+09 -1.274736e+09 1.756529e+09 -9.595976e+08 1.610006e+09 -7.968066e+08 1.349054e+09 -3.42071e+08 6.27247e+08 -8.501681e+08 -4.32481e+08 -5.583656e+08 3.028656e+09 
3 |   3.189034e+13 1.238453e+12 1.439817e+12 1.378808e+12 2.032086e+12 1.896199e+12 2.230804e+12 1.426913e+12 1.446148e+12 1.219986e+12 1.22271e+12 1.199805e+12 1.094108e+12 8.138162e+11 7.389557e+11 4.92009e+11 4.304046e+11 2.647395e+11 1.897839e+11 9.905068e+10 5.47939e+10 1.789002e+10 2.644892e+09 7.090682e+08 9.178836e+09 2.227656e+10 3.951766e+10 5.257301e+10 6.400752e+10 7.014398e+10 7.660171e+10 8.050034e+10 8.482946e+10 8.021838e+10 6.389043e+10 5.309594e+10 4.975399e+10 3.896006e+10 2.983068e+10 2.09207e+10 0 ]
4 | 


--------------------------------------------------------------------------------
/exp/nnet3_recipe_ivector/extractor/num_jobs:
--------------------------------------------------------------------------------
1 | 40
2 | 


--------------------------------------------------------------------------------
/exp/nnet3_recipe_ivector/extractor/online_cmvn.conf:
--------------------------------------------------------------------------------
1 | # configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
2 | 


--------------------------------------------------------------------------------
/exp/nnet3_recipe_ivector/extractor/splice_opts:
--------------------------------------------------------------------------------
1 | --left-context=3 --right-context=3
2 | 


--------------------------------------------------------------------------------
/local/HTK.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy
 4 | import struct
 5 | 
 6 | 
 7 | def readHtk(filename):
 8 |     '''
 9 |     Reads the features in a HTK file, and returns them in a 2-D numpy array.
10 |     '''
11 |     with open(filename, "rb") as f:
12 |         # Read header
13 |         nSamples, sampPeriod, sampSize, parmKind = struct.unpack(">iihh", f.read(12))
14 |         # sampPeriod and parmKind will be omitted
15 |         # Read data
16 |         data = struct.unpack(">%df" % (nSamples * sampSize / 4), f.read(nSamples * sampSize))
17 |         # return numpy.array(data).reshape(nSamples, int(sampSize / 4))
18 |         return nSamples, sampPeriod, sampSize, parmKind, data
19 | 
20 | def readHtk_start_end(filename, start, end):
21 |     with open(filename, "rb") as f:
22 |         # Read header
23 |         nSamples, sampPeriod, sampSize, parmKind = struct.unpack(">iihh", f.read(12))
24 |         # sampPeriod and parmKind will be omitted
25 |         f.seek(start * sampSize,1)
26 |         # Read data
27 |         data = struct.unpack(">%df" % ((end - start) * sampSize / 4), f.read((end - start) * sampSize))
28 |         # return numpy.array(data).reshape(nSamples, int(sampSize 1 4))
29 |         return nSamples, sampPeriod, sampSize, parmKind, data
30 | 
31 | def readHtk_info(filename):
32 |     with open(filename, "rb") as f:
33 |         # Read header
34 |         nSamples, sampPeriod, sampSize, parmKind = struct.unpack(">iihh", f.read(12))
35 |         return nSamples, sampPeriod, sampSize, parmKind
36 | 
37 | def writeHtk(filename, feature, sampPeriod=3200, parmKind=9):
38 |     '''
39 |     Writes the features in a 2-D numpy array into a HTK file.
40 |     '''
41 |     with open(filename, "wb") as f:
42 |         # Write header
43 |         nSamples = feature.shape[0]
44 |         sampSize = feature.shape[1] * 4
45 |         f.write(struct.pack(">iihh", nSamples, sampPeriod, sampSize, parmKind))
46 |         # Write data
47 |         f.write(struct.pack(">%df" % (nSamples * sampSize / 4), *feature.ravel()))
48 |         
49 |         
50 | def read_wav_start_end(path, start, end):
51 |     dur = end - start
52 |     with open(path, "rb") as f:
53 |         f.seek(44 + start * 2, 1)
54 |         data = struct.unpack("<%dh" % (dur), f.read(dur*2))
55 |     #print(dur, numpy.array(data).shape)
56 |     return numpy.array(data) / 32768.


--------------------------------------------------------------------------------
/local/__pycache__/HTK.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/local/__pycache__/HTK.cpython-39.pyc


--------------------------------------------------------------------------------
/local/__pycache__/config.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/local/__pycache__/config.cpython-310.pyc


--------------------------------------------------------------------------------
/local/__pycache__/config.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/local/__pycache__/config.cpython-39.pyc


--------------------------------------------------------------------------------
/local/__pycache__/conformer2.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/local/__pycache__/conformer2.cpython-310.pyc


--------------------------------------------------------------------------------
/local/__pycache__/conformer2.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/local/__pycache__/conformer2.cpython-39.pyc


--------------------------------------------------------------------------------
/local/__pycache__/model_S2S_weight_input_DIM.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/local/__pycache__/model_S2S_weight_input_DIM.cpython-39.pyc


--------------------------------------------------------------------------------
/local/__pycache__/reader_s2s.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/local/__pycache__/reader_s2s.cpython-39.pyc


--------------------------------------------------------------------------------
/local/__pycache__/utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/local/__pycache__/utils.cpython-39.pyc


--------------------------------------------------------------------------------
/local/__pycache__/utils_s2s.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/local/__pycache__/utils_s2s.cpython-39.pyc


--------------------------------------------------------------------------------
/local/analysis_diarization.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | 
 4 | score_area=
 5 | collar=0
 6 | uem=None
 7 | . ./utils/parse_options.sh
 8 | ref_rttm_path=$1
 9 | hyp_rttm_path=$2
10 | tempdir=$( mktemp  -d  /tmp/eval_diarization.XXXXXX )
11 | if [ -f $uem ];then
12 |     echo uem
13 |     local/md-eval-22.pl $score_area -u $uem -c $collar -afc -r $ref_rttm_path -s $hyp_rttm_path 2>/dev/null > ${tempdir}/temp.info
14 | else
15 |     local/md-eval-22.pl $score_area -c $collar -afc -r $ref_rttm_path -s $hyp_rttm_path 2>/dev/null > ${tempdir}/temp.info
16 | fi
17 | grep SCORED ${tempdir}/temp.info | cut -d "=" -f 2 | cut -d " " -f 1 > ${tempdir}/SCORED.list
18 | grep MISSED ${tempdir}/temp.info | cut -d "=" -f 2 | cut -d " " -f 1 > ${tempdir}/MISSED.list
19 | grep FALARM ${tempdir}/temp.info | cut -d "=" -f 2 | cut -d " " -f 1 > ${tempdir}/FALARM.list
20 | grep "SPEAKER ERROR" ${tempdir}/temp.info | cut -d "=" -f 2 | cut -d " " -f 1 > ${tempdir}/SPEAKER.list
21 | grep OVERALL ${tempdir}/temp.info | cut -d "=" -f 4 | cut -d ")" -f 1 > ${tempdir}/session.list
22 | sed -i '$d' ${tempdir}/session.list
23 | echo "ALL" >> ${tempdir}/session.list
24 | for l in `cat ${tempdir}/session.list`;do
25 |     grep $l $ref_rttm_path | awk '{print $8}' | sort | uniq | wc -l
26 | done > ${tempdir}/oracle_spknum.list
27 | 
28 | for l in `cat ${tempdir}/session.list`;do
29 |     grep $l $hyp_rttm_path | awk '{print $8}' | sort | uniq | wc -l
30 | done > ${tempdir}/diarized_spknum.list
31 | 
32 | paste -d " " ${tempdir}/session.list ${tempdir}/SCORED.list ${tempdir}/MISSED.list \
33 |              ${tempdir}/FALARM.list ${tempdir}/SPEAKER.list ${tempdir}/oracle_spknum.list \
34 |              ${tempdir}/diarized_spknum.list > ${tempdir}/temp.details
35 | 
36 | awk '{printf "%s %.2f %.2f %.2f %.2f %d %d\n",$1,$4/$2*100,$3/$2*100,$5/$2*100,($3+$4+$5)/$2*100,$6,$7}' ${tempdir}/temp.details > ${tempdir}/temp.info1
37 | echo "session FA MISS SPKERR DER ORACLE_SPKNUM DIARIZED_SPKNUM" > ${tempdir}/temp.details
38 | grep -v "ALL" ${tempdir}/temp.info1 | sort -n -k 5 >> ${tempdir}/temp.details
39 | grep "ALL" ${tempdir}/temp.info1 >> ${tempdir}/temp.details
40 | 
41 | column -t ${tempdir}/temp.details
42 | 
43 | rm -rf ${tempdir}


--------------------------------------------------------------------------------
/local/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | configs3_4Speakers_ivector_ivector128_xvectors128_S2S_MA_MSE_DIM = {"input_dim": 40,
 4 | "average_pooling": 301,
 5 | "cnn_configs": [[2, 64, 3, 1], [64, 64, 3, 1], [64, 128, 3, (2, 1)], [128, 128, 3, 1]],
 6 | "conformer_layers": 6,
 7 | "conformer_conv_kernel_size": 15,
 8 | "conformer_ff_dropout": 0.1,
 9 | "decoder_layers": 6,
10 | "decoder_num_heads": 8,
11 | "decoder_ffn_num_hiddens": 1024,
12 | "decoder_mlp_num_hiddens": 512,
13 | "decoder_attn_dropout": 0.0,
14 | "decoder_dropout": 0.0,
15 | "decode_Time": 800,
16 | "fea_dim": 512,
17 | "embedding_path1": "embedding_raw/voxceleb/cluster_center_128.npy",
18 | "ma_mse_layers_1":1,
19 | "embedding_path2": "embedding_raw/voxceleb/xvector_cluster_center_128.npy",
20 | "ma_mse_layers_2":1,
21 | "output_speaker": 4
22 | }
23 | 
24 | configs3_2Speakers_ivector_ivector128_xvectors128_S2S_MA_MSE_DIM = {"input_dim": 40,
25 | "average_pooling": 301,
26 | "cnn_configs": [[2, 64, 3, 1], [64, 64, 3, 1], [64, 128, 3, (2, 1)], [128, 128, 3, 1]],
27 | "conformer_layers": 6,
28 | "conformer_conv_kernel_size": 15,
29 | "conformer_ff_dropout": 0.1,
30 | "decoder_layers": 6,
31 | "decoder_num_heads": 8,
32 | "decoder_ffn_num_hiddens": 1024,
33 | "decoder_mlp_num_hiddens": 512,
34 | "decoder_attn_dropout": 0.0,
35 | "decoder_dropout": 0.0,
36 | "decode_Time": 800,
37 | "fea_dim": 512,
38 | "embedding_path1": "embedding_raw/voxceleb/cluster_center_128.npy",
39 | "ma_mse_layers_1":3,
40 | "embedding_path2": "embedding_raw/voxceleb/xvector_cluster_center_128.npy",
41 | "ma_mse_layers_2":3,
42 | "output_speaker": 2
43 | }
44 | 
45 | 
46 | configs = {
47 |     "configs3_4Speakers_ivector_ivector128_xvectors128_S2S_MA_MSE_DIM": configs3_4Speakers_ivector_ivector128_xvectors128_S2S_MA_MSE_DIM,
48 |     "configs3_2Speakers_ivector_ivector128_xvectors128_S2S_MA_MSE_DIM": configs3_2Speakers_ivector_ivector128_xvectors128_S2S_MA_MSE_DIM,
49 | }
50 | 


--------------------------------------------------------------------------------
/local/run_MAMSE_S2S_chime7_ws_input_DIM.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import sys
 5 | from train_Pretrain_DDP_S2S import Train
 6 | from model_S2S_weight_input_DIM import MULTI_MAM_SE_S2S_model
 7 | from config import configs3_4Speakers_ivector_ivector128_xvectors128_S2S_MA_MSE_DIM as config_train
 8 | import torch
 9 | from reader_sc_s2s import Fbank_Embedding_Label_Mask, collate_fn_mask, RTTM_to_Speaker_Mask
10 | 
11 | 
12 | data="CHiME6MAMSELabel_SimuCHiME6_Mixer6MAMSELabel_SimuMixer6_SimuDipcoDevNoise" # train data name
13 | feature_scp = f"data/{data}/cmn_slide_fbank_htk.list"  # fbank
14 | ivector_path = f"data/{data}/ivectors_spk.txt"    # i-vector
15 | oracle_rttm = f"data/{data}/oracle.rttm"
16 | 
17 | max_utt_durance = 800
18 | batchsize = 20
19 | mixup_rate=0.5
20 | 
21 | output_dir = f"exp/S2S/Batchsize{batchsize}_4speakers_Segment{max_utt_durance}s_Mixup{mixup_rate}_{data}_all_data_512_all0Dropout_6layers_weight_input_DIM"
22 | print('exp will be saved in', output_dir)
23 | if not os.path.exists(output_dir):
24 |     os.makedirs(output_dir, exist_ok=True)
25 | label_2classes = RTTM_to_Speaker_Mask(oracle_rttm, differ_silence_inference_speech = False)
26 | 
27 | multiple_4speakers_2classes = Fbank_Embedding_Label_Mask(feature_scp, ivector_path, label_2classes, append_speaker=True, diff_speaker=True, min_speaker=2, max_speaker=4, max_utt_durance=max_utt_durance, frame_shift=int(max_utt_durance/4*3), mixup_rate=mixup_rate, alpha=0.5)
28 | 
29 | 
30 | os.system("cp {} {}/{}".format(os.path.abspath(sys.argv[0]), output_dir, os.path.basename(sys.argv[0])))
31 | os.system("cp {} {}/{}".format("local_gb/model_S2S_weight_input_DIM.py", output_dir, "model.py"))
32 | optimizer = torch.optim.Adam
33 | loss_fn = torch.nn.BCEWithLogitsLoss()
34 | 
35 | 
36 | train = Train(multiple_4speakers_2classes, collate_fn_mask, MULTI_MAM_SE_S2S_model, config_train, "MULTI_MAM_SE_S2S_model", output_dir, optimizer, loss_fn, batchsize=batchsize, accumulation_steps=[(0, 1)], lr=0.0001, start_epoch=0, end_epoch=6, num_workers=12)
37 | train.train(updata_utt=True)
38 | 


--------------------------------------------------------------------------------
/local/split_long_segment_s2s.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import numpy as np
 3 | import os
 4 | import sys
 5 | 
 6 | def split_segment(prob, sess, spk, start, end, max_dur=2000):
 7 |     dur = end - start
 8 |     if dur <= max_dur:
 9 |         print("SPEAKER {} 1 {:.2f} {:.2f} <NA> <NA> {} <NA> <NA>".format(sess, start/100., dur/100., spk))
10 |     else:
11 |         tosplit = int(start+100 + np.argmin(prob[int(start+100):int(end-100)]))
12 |         split_segment(prob, sess, spk, start, tosplit)
13 |         split_segment(prob, sess, spk, tosplit, end)
14 | 
15 | 
16 | prob_array_dir = sys.argv[1]
17 | input_rttm = sys.argv[2]
18 | prob_array = [os.path.join(prob_array_dir, l) for l in os.listdir(prob_array_dir)]
19 | prob_label = {}
20 | #print(prob_array_dir, input_rttm)
21 | for p in prob_array:
22 |     if p.find(".npy") == -1: continue
23 |     session = os.path.basename(p).split('.')[0]
24 |     if session.find("CH") != -1 and session.find("S") != -1:
25 |         sess = session.split("_")[0]
26 |     elif session.find("CH") != -1 and session.find("S") == -1:
27 |         sess = "_".join(session.split("_")[:-1])
28 |     else:
29 |         sess = session
30 |     prob_label[sess] = np.load(os.path.join(p)) #num_spk, len
31 | IN = open(input_rttm)
32 | for l in IN:
33 |     #print(l)
34 |     line = l.split(" ")
35 |     session = line[1]
36 |     if line[-2] != "<NA>":
37 |         spk = line[-2]
38 |     else:
39 |         spk = line[-3]
40 |     #print(line[3] )
41 |     start = np.int64(np.float64(line[3]) * 100 )
42 |     dur =   np.int64(np.float64(line[4]) * 100)
43 |     end = start + dur
44 |     if dur <= 2000:
45 |         print(l.rstrip())
46 |         #pass
47 |     else:
48 |         split_segment(prob_label[session][int(spk)], session, spk, start, end, max_dur=2000)
49 | 


--------------------------------------------------------------------------------
/local/utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import torch
 4 | import logging
 5 | import sys
 6 | import pdb
 7 | 
 8 | 
 9 | def save_checkpoint(model, optimizer, filename):
10 |     try:
11 |         torch.save({'model': model.state_dict(), 'optimizer': optimizer.state_dict()}, filename)
12 |     except:
13 |         torch.save({'model': model.state_dict(), \
14 |             'optimizer_tsvad': optimizer['tsvad'].state_dict(), \
15 |             'optimizer_resnet': optimizer['resnet'].state_dict()}, filename)
16 | 
17 | def load_checkpoint(model, optimizer, filename):
18 |     checkpoint = torch.load(filename)
19 |     if model is not None:
20 |         model.load_state_dict(checkpoint['model'])
21 |     if optimizer is not None:
22 |         optimizer.load_state_dict(checkpoint['optimizer'])
23 |         
24 |         
25 | def load_checkpoint_join_training(model, optimizer, filename):
26 |     checkpoint = torch.load(filename)
27 |     # pdb.set_trace()
28 |     if model is not None:
29 |         model_dict = model.state_dict()
30 |         # pdb.set_trace()
31 |         state_dict_2 = {k:v for k,v in checkpoint['model'].items()}
32 |         # pdb.set_trace()
33 |         model_dict.update(state_dict_2)
34 |         model.load_state_dict(model_dict)
35 |         # model_dict['FC.2.weight'] - checkpoint['model']['FC.2.weight']
36 |         # pdb.set_trace()
37 |         # model.load_state_dict(checkpoint['model'])
38 |     # pdb.set_trace()
39 |     if optimizer is not None and 'join_train' in filename:
40 |         print('load optimizer')
41 |         optimizer.load_state_dict(checkpoint['optimizer'])
42 | 
43 | def get_logger(filename):
44 |     # Logging configuration: set the basic configuration of the logging system
45 |     log_formatter = logging.Formatter(fmt='%(asctime)s [%(processName)s, %(process)s] [%(levelname)-5.5s]  %(message)s', datefmt='%m-%d %H:%M')
46 |     logger = logging.getLogger()
47 |     logger.setLevel(logging.DEBUG)
48 |     # File logger
49 |     file_handler = logging.FileHandler("{}.log".format(filename)) 
50 |     file_handler.setFormatter(log_formatter)
51 |     file_handler.setLevel(logging.DEBUG)
52 |     logger.addHandler(file_handler)
53 |     # Stderr logger
54 |     std_handler = logging.StreamHandler(sys.stdout)
55 |     std_handler.setFormatter(log_formatter)
56 |     std_handler.setLevel(logging.DEBUG)
57 |     logger.addHandler(std_handler)
58 |     return logger
59 | 


--------------------------------------------------------------------------------
/path.sh:
--------------------------------------------------------------------------------
 1 | #export KALDI_ROOT=/yrfs1/intern/glzhong/kaldi
 2 | export KALDI_ROOT=/home/yoos/Documents/code/kaldi
 3 | export LD_LIBRARY_PATH=$KALDI_ROOT/tools/openfst/lib:$LD_LIBRARY_PATH
 4 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH
 5 | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 6 | . $KALDI_ROOT/tools/config/common_path.sh
 7 | export LC_ALL=C
 8 | export LD_LIBRARY_PATH=$KALDI_ROOT/src/lib:$KALDI_ROOT/tools/openfst/lib:$LD_LIBRARY_PATH
 9 | #LD_LIBRARY_PATH=/yrfs5/sre/leisun8/tools/kaldi_cuda9/tools/sox/lib:$LD_LIBRARY_PATH
10 | #PATH=/yrfs5/sre/leisun8/tools/kaldi_cuda9/tools/sox/bin:$PATH
11 | 
12 | #PATH=/home4/intern/rywang9/tools/sox/:$PATH
13 | #LD_LIBRARY_PATH=/home4/intern/rywang9/tools/sox/lib:$LD_LIBRARY_PATH
14 | #export PATH=/home/intern/stniu/anaconda3/bin/:$PATH
15 | #export PATH=/home4/intern/stniu/anaconda3/envs/mss/bin:$PATH
16 | #export PATH=/opt/lib/cuda-9.0_cudnn-v7.1.4/bin${PATH:+:${PATH}}
17 | #export LD_LIBRARY_PATH=/opt/lib/cuda-9.0_cudnn-v7.1.4/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
18 | 
19 | export PATH=/opt/lib/cuda-10.2/bin${PATH:+:${PATH}}
20 | export LD_LIBRARY_PATH=/opt/lib/cudnn/cudnn-10.2-v7.6.5.32/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
21 | export LD_LIBRARY_PATH=/work1/sre/leisun8/tools/libsndfile/lib/:$LD_LIBRARY_PATH
22 | #export LD_LIBRARY_PATH=/home4/intern/mkhe/anaconda3/envs/torch/lib:$LD_LIBRARY_PATH
23 | #. path_v100.sh
24 | export PATH=/home4/intern/mkhe/anaconda3/bin/:$PATH
25 | export LD_LIBRARY_PATH=/home4/intern/mkhe/anaconda3/lib:$LD_LIBRARY_PATH
26 | 
27 | export PATH=/home4/intern/stniu/libs/ffmpeg/bin/:$PATH
28 | export LD_LIBRARY_PATH=/home4/intern/stniu/libs/ffmpeg/lib:$LD_LIBRARY_PATH
29 | #CUDA_LAUNCH_BLOCKING=1
30 | #export NCCL_IB_DISABLE=1
31 | # NCCL_DEBUG=INFO
32 | 
33 | NCCL_SOCKET_IFNAME=eth0
34 | 
35 | #export PATH=/home3/cv1/hangchen2/anaconda3/envs/py38+cu102/bin/:$PATH
36 | #export LD_LIBRARY_PATH=/home3/cv1/hangchen2/anaconda3/envs/py38+cu102/lib:$LD_LIBRARY_PATH
37 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | einops==0.7.0
2 | matplotlib==3.6.2
3 | numpy<1.28.0
4 | scipy==1.11.4
5 | torch==2.1.1
6 | tqdm==4.65.0


--------------------------------------------------------------------------------
/run_decode.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | # single model decode
4 | bash /train8/sppro/gbyang/code/NSD-MS2S/local/decode_S2S_model.sh --stage 3 --data chime7_eval_all_CH --diarized_rttm data/chime7_eval_all_CH/f1.rttm --affix f1
5 | 
6 | # models fusion decode
7 | bash /train8/sppro/gbyang/code/NSD-MS2S/local/decode_S2S_models_fusion.sh --stage 3 --data chime7_eval_all_CH --diarized_rttm data/chime7_eval_all_CH/f1.rttm --affix f1


--------------------------------------------------------------------------------
/steps/cleanup/internal/ctm_to_text.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 | 
 3 | # Copyright 2016  Vimal Manohar
 4 | # Apache 2.0.
 5 | 
 6 | use strict;
 7 | use warnings;
 8 | 
 9 | if (scalar @ARGV != 1 && scalar @ARGV != 3) {
10 |   my $usage = <<END;
11 | This script converts a CTM into kaldi text format by concatenating the words
12 | belonging to the same utterance (or recording) and outputs the same to the
13 | standard output.
14 | If --non-scored-words list file is provided with one word per line, then 
15 | those words are not added to the text.
16 | 
17 | The CTM format is <file> <channel> <start-time> <duration> <word> [<conf>].
18 | This script assumes the CTM to be in NIST sorted order given by UNIX
19 | sort command "sort +0 -1 +1 -2 +2nb -3"
20 | 
21 | Usage: ctm_to_text.pl [--non-scored-words <file>] <ctm-file> > <text>
22 | END
23 |   die $usage;
24 | }
25 | 
26 | my $non_scored_words_list = "";
27 | if (scalar @ARGV > 1) {
28 |   if ($ARGV[0] eq "--non-scored-words") {
29 |     shift @ARGV;
30 |     $non_scored_words_list = shift @ARGV;
31 |   } else {
32 |     die "Unknown option $ARGV[0]\n";
33 |   }
34 | }
35 | 
36 | my %non_scored_words;
37 | $non_scored_words{"<eps>"} = 1;
38 | 
39 | if ($non_scored_words_list ne "") {
40 |   open NONSCORED, $non_scored_words_list or die "Failed to open $non_scored_words_list";
41 |   
42 |   while (<NONSCORED>) {
43 |     chomp;
44 |     my @F = split;
45 |     $non_scored_words{$F[0]} = 1;
46 |   }
47 | 
48 |   close NONSCORED;
49 | }
50 | 
51 | my $ctm_file = shift @ARGV;
52 | open CTM, $ctm_file or die "Failed to open $ctm_file";
53 | 
54 | my $prev_utt = "";
55 | my @text;
56 | 
57 | while (<CTM>) {
58 |   chomp;
59 |   my @F = split;
60 | 
61 |   my $utt = $F[0];
62 |   if ($utt ne $prev_utt && $prev_utt ne "") {
63 |     if (scalar @text > 0) {
64 |       print $prev_utt . " " . join(" ", @text) . "\n";
65 |     }
66 |     @text = ();
67 |   }
68 |   
69 |   if (scalar @F < 5 || scalar @F > 6) {
70 |     die "Invalid line $_ in CTM $ctm_file\n";
71 |   }
72 | 
73 |   if (!defined $non_scored_words{$F[4]}) {
74 |     push @text, $F[4];
75 |   }
76 | 
77 |   $prev_utt = $utt;
78 | }
79 | 
80 | close CTM;
81 |     
82 | if (scalar @text > 0) {
83 |   print $prev_utt . " " . join(" ", @text) . "\n";
84 | }
85 | 


--------------------------------------------------------------------------------
/steps/cleanup/internal/split_text_into_docs.pl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/perl
 2 | 
 3 | # Copyright 2017  Vimal Manohar
 4 | # Apache 2.0.
 5 | 
 6 | # If 'text' contains:
 7 | #  utterance1 A B C D
 8 | #  utterance2 C B
 9 | #  and you ran:
10 | #  split_text_into_docs.pl --max-words 2 text doc2text docs
11 | #  then 'doc2text' would contain:
12 | #  utterance1-1 utterance1
13 | #  utterance1-2 utterance1
14 | #  utterance2-1 utterance2
15 | #  and 'docs' would contain:
16 | #  utterance1-1 A B
17 | #  utterance1-2 C D
18 | #  utterance2-1 C B
19 | 
20 | use warnings;
21 | use strict;
22 | 
23 | my $max_words = 1000;
24 | 
25 | my $usage = "Usage: steps/cleanup/internal/split_text_into_docs.pl [--max-words <int>] text doc2text docs\n";
26 | 
27 | while (@ARGV > 3) {
28 |     if ($ARGV[0] eq "--max-words") {
29 |         shift @ARGV;
30 |         $max_words = shift @ARGV;
31 |     } else {
32 |         print STDERR "$usage";
33 |         exit (1);
34 |     }
35 | }
36 | 
37 | if (scalar @ARGV != 3) {
38 |   print STDERR "$usage";
39 |   exit (1);
40 | }
41 | 
42 | sub min ($$) { $_[$_[0] > $_[1]] }
43 | 
44 | open TEXT, $ARGV[0] or die "$0: Could not open file $ARGV[0] for reading\n";
45 | open DOC2TEXT, ">", $ARGV[1] or die "$0: Could not open file $ARGV[1] for writing\n";
46 | open DOCS, ">", $ARGV[2] or die "$0: Could not open file $ARGV[2] for writing\n";
47 | 
48 | while (<TEXT>) {
49 |   chomp;
50 |   my @F = split;
51 |   my $utt = shift @F;
52 |   my $num_words = scalar @F;
53 | 
54 |   if ($num_words  <= $max_words) {
55 |     print DOCS "$_\n";
56 |     print DOC2TEXT "$utt $utt\n";
57 |     next;
58 |   }
59 | 
60 |   my $num_docs = int($num_words / $max_words) + 1;
61 |   my $num_words_shift = int($num_words / $num_docs) + 1;
62 |   my $words_per_doc = $num_words_shift;
63 | 
64 |   #print STDERR ("$utt num-words=$num_words num-docs=$num_docs words-per-doc=$words_per_doc\n");
65 |   
66 |   for (my $i = 0; $i < $num_docs; $i++) {
67 |     my $st = $i*$num_words_shift;
68 |     my $end = min($st + $words_per_doc, $num_words) - 1;
69 |     print DOCS ("$utt-$i " . join(" ", @F[$st..$end]) . "\n");
70 |     print DOC2TEXT "$utt-$i $utt\n";
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/steps/cleanup/make_utterance_fsts.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use warnings; #sed replacement for -w perl parameter
 3 | 
 4 | # makes unigram decoding-graph FSTs specific to each utterances, where the
 5 | # supplied top-n-words list together with the supervision text of the utterance are
 6 | # combined.
 7 | 
 8 | if (@ARGV != 1) {
 9 |   print STDERR "** Warning: this script is deprecated and will be removed.  See\n" .
10 |                "** steps/cleanup/make_biased_lm_graphs.sh.\n" .
11 |                "Usage: make_utterance_fsts.pl top-words-file.txt < text-archive > fsts-archive\n" .
12 |                "e.g.: utils/sym2int.pl -f 2- data/lang/words.txt data/train/text | \\\n" .
13 |                "  make_utterance_fsts.pl exp/foo/top_words.int | compile-train-graphs-fsts ... \n";
14 |   exit(1);
15 | }
16 | 
17 | ($top_words_file) = @ARGV;
18 | 
19 | open(F, "<$top_words_file") || die "opening $top_words_file";
20 | 
21 | %top_word_probs = ( );
22 | 
23 | while(<F>) {
24 |   @A = split;
25 |   (@A == 2 && $A[0] > 0.0) || die "Bad line $_ in $top_words_file";
26 |   $A[1] =~ m/^[0-9]+$/ || die "Expecting numeric word-ids in $top_words_file: $_\n";
27 |   $top_word_probs{$A[1]} += $A[0];
28 | }
29 | 
30 | while (<STDIN>) {
31 |   @A = split;
32 |   $utterance_id = shift @A;
33 |   print "$utterance_id\n";
34 |   $num_words = @A + 0;  # length of array @A
35 |   %word_probs = %top_word_probs;
36 |   foreach $w (@A) {
37 |     $w =~ m/^[0-9]+$/ || die "Expecting numeric word-ids as stdin: $_";
38 |     $word_probs{$w} += 1.0 / $num_words;
39 |   }
40 |   foreach $w (keys %word_probs) {
41 |     $prob = $word_probs{$w};
42 |     $prob > 0.0 || die "Word $w with bad probability $prob, utterance-id = $utterance_id\n";
43 |     $cost = -log($prob);
44 |     print "0 0 $w $w $cost\n";
45 |   }
46 |   $final_cost = -log(1.0 / $num_words);
47 |   print "0 $final_cost\n";
48 |   print "\n"; # Empty line terminates the FST in the text-archive format.
49 | }
50 | 


--------------------------------------------------------------------------------
/steps/compute_vad_decision.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash 
 2 | 
 3 | # Copyright    2017  Vimal Manohar
 4 | # Apache 2.0
 5 | 
 6 | # To be run from .. (one directory up from here)
 7 | # see ../run.sh for example
 8 | 
 9 | # Compute energy based VAD output
10 | 
11 | nj=4
12 | cmd=run.pl
13 | vad_config=conf/vad.conf
14 | 
15 | echo "$0 $@"  # Print the command line for logging
16 | 
17 | if [ -f path.sh ]; then . ./path.sh; fi
18 | . parse_options.sh || exit 1;
19 | 
20 | if [ $# -lt 1 ] || [ $# -gt 3 ]; then
21 |    echo "Usage: $0 [options] <data-dir> [<log-dir> [<vad-dir>]]";
22 |    echo "e.g.: $0 data/train exp/make_vad mfcc"
23 |    echo "Note: <log-dir> defaults to <data-dir>/log, and <vad-dir> defaults to <data-dir>/data"
24 |    echo " Options:"
25 |    echo "  --vad-config <config-file>                       # config passed to compute-vad-energy"
26 |    echo "  --nj <nj>                                        # number of parallel jobs"
27 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
28 |    exit 1;
29 | fi
30 | 
31 | data=$1
32 | if [ $# -ge 2 ]; then
33 |   logdir=$2
34 | else
35 |   logdir=$data/log
36 | fi
37 | if [ $# -ge 3 ]; then
38 |   vaddir=$3
39 | else
40 |   vaddir=$data/data
41 | fi
42 | 
43 | 
44 | # make $vaddir an absolute pathname.
45 | vaddir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $vaddir ${PWD}`
46 | 
47 | # use "name" as part of name of the archive.
48 | name=`basename $data`
49 | 
50 | mkdir -p $vaddir || exit 1;
51 | mkdir -p $logdir || exit 1;
52 | 
53 | if [ -f $data/vad.scp ]; then
54 |   mkdir -p $data/.backup
55 |   echo "$0: moving $data/vad.scp to $data/.backup"
56 |   mv $data/vad.scp $data/.backup
57 | fi
58 | 
59 | for f in $data/feats.scp "$vad_config"; do
60 |   if [ ! -f $f ]; then
61 |     echo "compute_vad_decision.sh: no such file $f"
62 |     exit 1;
63 |   fi
64 | done
65 | 
66 | utils/split_data.sh $data $nj || exit 1;
67 | sdata=$data/split$nj;
68 | 
69 | $cmd JOB=1:$nj $logdir/vad_${name}.JOB.log \
70 |   compute-vad --config=$vad_config scp:$sdata/JOB/feats.scp \
71 |   ark,scp:$vaddir/vad_${name}.JOB.ark,$vaddir/vad_${name}.JOB.scp || exit 1
72 | 
73 | for ((n=1; n<=nj; n++)); do
74 |   cat $vaddir/vad_${name}.$n.scp || exit 1;
75 | done > $data/vad.scp
76 | 
77 | nc=`cat $data/vad.scp | wc -l` 
78 | nu=`cat $data/feats.scp | wc -l` 
79 | if [ $nc -ne $nu ]; then
80 |   echo "**Warning it seems not all of the speakers got VAD output ($nc != $nu);"
81 |   echo "**validate_data_dir.sh will fail; you might want to use fix_data_dir.sh"
82 |   [ $nc -eq 0 ] && exit 1;
83 | fi
84 | 
85 | 
86 | echo "Created VAD output for $name"
87 | 


--------------------------------------------------------------------------------
/steps/conf/append_prf_to_ctm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 4 | # Apache 2.0
 5 | 
 6 | from __future__ import print_function
 7 | import sys
 8 | 
 9 | # Append Levenshtein alignment of 'hypothesis' and 'reference' into 'CTM':
10 | # (parsed from the 'prf' output of 'sclite')
11 | 
12 | # The tags in appended column are:
13 | #  'C' = correct
14 | #  'S' = substitution
15 | #  'I' = insertion
16 | #  'U' = unknown (not part of scored segment)
17 | 
18 | # Parse options,
19 | if len(sys.argv) != 4:
20 |   print("Usage: %s prf ctm_in ctm_out" % __file__)
21 |   sys.exit(1)
22 | prf_file, ctm_file, ctm_out_file = sys.argv[1:]
23 | 
24 | if ctm_out_file == '-': ctm_out_file = '/dev/stdout'
25 | 
26 | # Load the prf file,
27 | prf = []
28 | with open(prf_file) as f:
29 |   for l in f:
30 |     # Store the data,
31 |     if l[:5] == 'File:':
32 |       file_id = l.split()[1]
33 |     if l[:8] == 'Channel:':
34 |       chan = l.split()[1]
35 |     if l[:5] == 'H_T1:':
36 |       h_t1 = l
37 |     if l[:5] == 'Eval:':
38 |       evl = l
39 |       prf.append((file_id,chan,h_t1,evl))
40 | 
41 | # Parse the prf records into dictionary,
42 | prf_dict = dict()
43 | for (f,c,t,e) in prf:
44 |   t_pos = 0 # position in the 't' string,
45 |   while t_pos < len(t):
46 |     t1 = t[t_pos:].split(' ',1)[0] # get 1st token at 't_pos'
47 |     try:
48 |       # get word evaluation letter 'C,S,I',
49 |       evl = e[t_pos] if e[t_pos] != ' ' else 'C' 
50 |       # add to dictionary,
51 |       key='%s,%s' % (f,c) # file,channel
52 |       if key not in prf_dict: prf_dict[key] = dict()
53 |       prf_dict[key][float(t1)] = evl
54 |     except ValueError:
55 |       pass
56 |     t_pos += len(t1)+1 # advance position for parsing,
57 | 
58 | # Load the ctm file (with confidences),
59 | with open(ctm_file) as f:
60 |   ctm = [ l.split() for l in f ]
61 | 
62 | # Append the sclite alignment tags to ctm,
63 | ctm_out = []
64 | for f, chan, beg, dur, wrd, conf in ctm:
65 |   # U = unknown, C = correct, S = substitution, I = insertion,
66 |   sclite_tag = 'U' 
67 |   try:
68 |     sclite_tag = prf_dict[('%s,%s'%(f,chan)).lower()][float(beg)]
69 |   except KeyError:
70 |     pass
71 |   ctm_out.append([f,chan,beg,dur,wrd,conf,sclite_tag])
72 | 
73 | # Save the augmented ctm file,
74 | with open(ctm_out_file, 'w') as f:
75 |   f.writelines([' '.join(ctm_record)+'\n' for ctm_record in ctm_out])
76 | 
77 | 


--------------------------------------------------------------------------------
/steps/conf/convert_ctm_to_tra.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 4 | # Apache 2.0
 5 | 
 6 | from __future__ import print_function
 7 | import sys, operator
 8 | 
 9 | # This scripts loads a 'ctm' file and converts it into the 'tra' format:
10 | # "utt-key word1 word2 word3 ... wordN"
11 | # The 'utt-key' is the 1st column in the CTM.
12 | 
13 | # Typically the CTM contains:
14 | # - utterance-relative timimng (i.e. prepared without 'utils/convert_ctm.pl')
15 | # - confidences
16 | 
17 | if len(sys.argv) != 3:
18 |   print('Usage: %s ctm-in tra-out' % __file__)
19 |   sys.exit(1)
20 | dummy, ctm_in, tra_out = sys.argv
21 | 
22 | if ctm_in == '-': ctm_in = '/dev/stdin'
23 | if tra_out == '-': tra_out = '/dev/stdout'
24 | 
25 | # Load the 'ctm' into dictionary,
26 | tra = dict()
27 | with open(ctm_in) as f:
28 |   for l in f:
29 |     utt, ch, beg, dur, wrd, conf = l.split()
30 |     if not utt in tra: tra[utt] = []
31 |     tra[utt].append((float(beg),wrd))
32 | 
33 | # Store the in 'tra' format,
34 | with open(tra_out,'w') as f:
35 |   for utt,tuples in tra.items():
36 |     tuples.sort(key = operator.itemgetter(0)) # Sort by 'beg' time,
37 |     f.write('%s %s\n' % (utt,' '.join([t[1] for t in tuples])))
38 | 
39 | 


--------------------------------------------------------------------------------
/steps/conf/lattice_depth_per_frame.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2015  Brno University of Technology (Author: Karel Vesely)
 3 | # Licensed under the Apache License, Version 2.0 (the "License")
 4 | 
 5 | # Extract lattice-depth for each frame.
 6 | 
 7 | # Begin configuration
 8 | cmd=run.pl
 9 | # End configuration
10 | 
11 | echo "$0 $@"  # Print the command line for logging
12 | 
13 | [ -f path.sh ] && . ./path.sh # source the path.
14 | . parse_options.sh || exit 1;
15 | 
16 | if [ $# != 2 ]; then
17 |    echo "usage: $0 [opts] <dir-with-lats> <out-dir>"
18 |    echo "main options (for others, see top of script file)"
19 |    echo "  --config <config-file>          # config containing options"
20 |    echo "  --cmd"
21 |    exit 1;
22 | fi
23 | 
24 | set -euo pipefail
25 | 
26 | latdir=$1
27 | dir=$2
28 | 
29 | [ ! -f $latdir/lat.1.gz ] && echo "Missing $latdir/lat.1.gz" && exit 1
30 | nj=$(cat $latdir/num_jobs)
31 | 
32 | # Get the pdf-posterior vectors,
33 | $cmd JOB=1:$nj $dir/log/lattice_depth_per_frame.JOB.log \
34 |   lattice-depth-per-frame "ark:gunzip -c $latdir/lat.JOB.gz |" ark,t:$dir/lattice_frame_depth.JOB.ark
35 | # Merge,
36 | for ((n=1; n<=nj; n++)); do cat $dir/lattice_frame_depth.${n}.ark; done >$dir/lattice_frame_depth.ark
37 | rm $dir/lattice_frame_depth.*.ark
38 | 
39 | # Done!
40 | 


--------------------------------------------------------------------------------
/steps/conf/parse_arpa_unigrams.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 4 | # Apache 2.0
 5 | 
 6 | from __future__ import print_function
 7 | import sys, gzip, re
 8 | 
 9 | # Parse options,
10 | if len(sys.argv) != 4:
11 |   print("Usage: %s <words.txt> <arpa-gz> <unigrams>" % __file__)
12 |   sys.exit(0)
13 | words_txt, arpa_gz, unigrams_out = sys.argv[1:]
14 | 
15 | if arpa_gz == '-': arpa_gz = '/dev/stdin'
16 | if unigrams_out == '-': unigrams_out = '/dev/stdout'
17 | 
18 | # Load the words.txt,
19 | words = [ l.split() for l in open(words_txt) ]
20 | 
21 | # Load the unigram probabilities in 10log from ARPA,
22 | wrd_log10 = dict()
23 | with gzip.open(arpa_gz,'r') as f:
24 |   read = False
25 |   for l in f:
26 |     if l.strip() == '\\1-grams:': read = True
27 |     if l.strip() == '\\2-grams:': break
28 |     if read and len(l.split())>=2:
29 |       log10_p_unigram, wrd = re.split('[\t ]+',l.strip(),2)[:2]
30 |       wrd_log10[wrd] = float(log10_p_unigram)
31 | 
32 | # Create list, 'wrd id log_p_unigram',
33 | words_unigram = [[wrd, id, (wrd_log10[wrd] if wrd in wrd_log10 else -99)] for wrd,id in words ]
34 | 
35 | print(words_unigram[0], file=sys.stderr)
36 | # Store,
37 | with open(unigrams_out,'w') as f:
38 |   f.writelines(['%s %s %g\n' % (w,i,p) for (w,i,p) in words_unigram])
39 | 
40 | 


--------------------------------------------------------------------------------
/steps/conf/prepare_word_categories.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 4 | # Apache 2.0
 5 | 
 6 | import sys
 7 | 
 8 | from optparse import OptionParser
 9 | desc = """
10 | Prepare mapping of words into categories. Each word with minimal frequency 
11 | has its own category, the rest is merged into single class.
12 | """
13 | usage = "%prog [opts] words.txt ctm category_mapping"
14 | parser = OptionParser(usage=usage, description=desc)
15 | parser.add_option("--min-count", help="Minimum word-count to have a single word category. [default %default]", type='int', default=20)
16 | (o, args) = parser.parse_args()
17 | 
18 | if len(args) != 3:
19 |   parser.print_help()
20 |   sys.exit(1)
21 | words_file, text_file, category_mapping_file = args
22 | 
23 | if text_file == '-': text_file = '/dev/stdin'
24 | if category_mapping_file == '-': category_mapping_file = '/dev/stdout'
25 | 
26 | # Read the words from the 'tra' file,
27 | with open(text_file) as f:
28 |   text_words = [ l.split()[1:] for l in f ]
29 | 
30 | # Flatten the array of arrays of words,
31 | import itertools
32 | text_words = list(itertools.chain.from_iterable(text_words))
33 | 
34 | # Count the words (regardless if correct or incorrect),
35 | word_counts = dict()
36 | for w in text_words:
37 |   if w not in word_counts: word_counts[w] = 0
38 |   word_counts[w] += 1
39 | 
40 | # Read the words.txt,
41 | with open(words_file) as f:
42 |   word_id = [ l.split() for l in f ]
43 | 
44 | # Append the categories,
45 | n=1
46 | word_id_cat=[]
47 | for word, idx in word_id:
48 |   cat = 0 
49 |   if word in word_counts:
50 |     if word_counts[word] > o.min_count:
51 |       cat = n; n += 1
52 |   word_id_cat.append([word, idx, str(cat)])
53 | 
54 | # Store the mapping,
55 | with open(category_mapping_file,'w') as f:
56 |   f.writelines([' '.join(record)+'\n' for record in word_id_cat])
57 | 


--------------------------------------------------------------------------------
/steps/data/data_dir_manipulation_lib.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | 
 3 | def RunKaldiCommand(command, wait = True):
 4 |     """ Runs commands frequently seen in Kaldi scripts. These are usually a
 5 |         sequence of commands connected by pipes, so we use shell=True """
 6 |     #logger.info("Running the command\n{0}".format(command))
 7 |     p = subprocess.Popen(command, shell = True,
 8 |                          stdout = subprocess.PIPE,
 9 |                          stderr = subprocess.PIPE)
10 | 
11 |     if wait:
12 |         [stdout, stderr] = p.communicate()
13 |         if p.returncode is not 0:
14 |             raise Exception("There was an error while running the command {0}\n------------\n{1}".format(command, stderr))
15 |         return stdout, stderr
16 |     else:
17 |         return p
18 | 


--------------------------------------------------------------------------------
/steps/data/make_musan.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2015   David Snyder
 3 | #           2019   Phani Sankar Nidadavolu
 4 | # Apache 2.0.
 5 | #
 6 | # This script creates the MUSAN data directory.
 7 | # Consists of babble, music and noise files.
 8 | # Used to create augmented data
 9 | # The required dataset is freely available at http://www.openslr.org/17/
10 | 
11 | # The corpus can be cited as follows:
12 | # @misc{musan2015,
13 | #  author = {David Snyder and Guoguo Chen and Daniel Povey},
14 | #  title = {{MUSAN}: {A} {M}usic, {S}peech, and {N}oise {C}orpus},
15 | #  year = {2015},
16 | #  eprint = {1510.08484},
17 | #  note = {arXiv:1510.08484v1}
18 | # }
19 | 
20 | set -e
21 | use_vocals=true
22 | sampling_rate=16000
23 | stage=0
24 | 
25 | echo "$0 $@"  # Print the command line for logging
26 | 
27 | if [ -f path.sh ]; then . ./path.sh; fi
28 | . parse_options.sh || exit 1;
29 | 
30 | if [ $# -ne 2 ]; then
31 |     echo USAGE: $0 input_dir output_dir
32 |     echo input_dir is the path where the MUSAN corpus is located
33 |     echo e.g: $0 /export/corpora/JHU/musan data
34 |     echo "main options (for others, see top of script file)"
35 |     echo "  --sampling-rate <sampling frequency>        # Sampling frequency of source dir"
36 |     echo "  --use-vocals <true/false>        # Use vocals from music portion of MUSAN corpus"
37 |     exit 1;
38 | fi
39 | 
40 | in_dir=$1
41 | data_dir=$2
42 | 
43 | mkdir -p local/musan.tmp
44 | 
45 | # The below script will create the musan corpus
46 | steps/data/make_musan.py --use-vocals ${use_vocals} \
47 |                         --sampling-rate ${sampling_rate} \
48 |                         ${in_dir} ${data_dir}/musan || exit 1;
49 | 
50 | utils/fix_data_dir.sh ${data_dir}/musan
51 | 
52 | grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music
53 | grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech
54 | grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise
55 | 
56 | utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \
57 |         ${data_dir}/musan ${data_dir}/musan_music
58 | utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \
59 |         ${data_dir}/musan ${data_dir}/musan_speech
60 | utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \
61 |         ${data_dir}/musan ${data_dir}/musan_noise
62 | 
63 | utils/fix_data_dir.sh ${data_dir}/musan_music
64 | utils/fix_data_dir.sh ${data_dir}/musan_speech
65 | utils/fix_data_dir.sh ${data_dir}/musan_noise
66 | 
67 | rm -rf local/musan.tmp
68 | 
69 | for name in speech noise music; do
70 |     utils/data/get_reco2dur.sh ${data_dir}/musan_${name}
71 | done
72 | 


--------------------------------------------------------------------------------
/steps/decode_combine.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | 
 5 | # Combine two decoding directories by composing the lattices (we
 6 | # apply a weight to each of the original weights, by default 0.5 each).
 7 | # Note, this is not the only combination method, or the most normal combination
 8 | # method.  See also egs/wsj/s5/local/score_combine.sh.
 9 | 
10 | # Begin configuration section.
11 | weight1=0.5 # Weight on 1st set of lattices.
12 | cmd=run.pl
13 | skip_scoring=false
14 | # End configuration section.
15 | 
16 | echo "$0 $@"  # Print the command line for logging
17 | 
18 | [ -f ./path.sh ] && . ./path.sh; # source the path.
19 | . parse_options.sh || exit 1;
20 | 
21 | if [ $# -ne 5 ]; then
22 |   echo "Usage: steps/decode_combine.sh [options] <data> <lang-dir|graph-dir> <decode-dir1> <decode-dir2> <decode-dir-out>"
23 |   echo " e.g.: steps/decode_combine.sh data/lang data/test exp/dir1/decode exp/dir2/decode exp/combine_1_2/decode"
24 |   echo "main options (for others, see top of script file)"
25 |   echo "  --config <config-file>                   # config containing options"
26 |   echo "  --cmd <cmd>                              # Command to run in parallel with"
27 |   echo "  --weight1 <weight>                       # Weight on 1st set of lattices (default 0.5)"
28 |   exit 1;
29 | fi
30 | 
31 | data=$1
32 | lang_or_graphdir=$2
33 | srcdir1=$3
34 | srcdir2=$4
35 | dir=$5
36 | 
37 | for f in $data/utt2spk $lang_or_graphdir/phones.txt $srcdir1/lat.1.gz $srcdir2/lat.1.gz; do
38 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
39 | done
40 | 
41 | nj1=`cat $srcdir1/num_jobs` || exit 1;
42 | nj2=`cat $srcdir2/num_jobs` || exit 1;
43 | [ $nj1 -ne $nj2 ] && echo "$0: mismatch in number of jobs $nj1 versus $nj2" && exit 1;
44 | nj=$nj1
45 | 
46 | mkdir -p $dir/log
47 | echo $nj > $dir/num_jobs
48 | 
49 | # The lattice-interp command does the score interpolation (with composition),
50 | # and the lattice-copy-backoff replaces the result with the 1st lattice, in
51 | # cases where the composed result was empty.
52 | $cmd JOB=1:$nj $dir/log/interp.JOB.log \
53 |   lattice-interp --alpha=$weight1 "ark:gunzip -c $srcdir1/lat.JOB.gz|" \
54 |    "ark,s,cs:gunzip -c $srcdir2/lat.JOB.gz|" ark:- \| \
55 |   lattice-copy-backoff "ark,s,cs:gunzip -c $srcdir1/lat.JOB.gz|" ark,s,cs:- \
56 |    "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
57 | 
58 | if ! $skip_scoring ; then
59 |   [ ! -x local/score.sh ] && \
60 |     echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
61 |   local/score.sh --cmd "$cmd" $data $lang_or_graphdir $dir ||
62 |     { echo "$0: Scoring failed. (ignore by '--skip-scoring true')"; exit 1; }
63 | fi
64 | 
65 | exit 0;
66 | 


--------------------------------------------------------------------------------
/steps/diagnostic/analyze_alignments.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # Copyright Johns Hopkins University (Author: Daniel Povey) 2016.  Apache 2.0.
 4 | 
 5 | # This script performs some analysis of alignments on disk, currently in terms
 6 | # of phone lengths, including lengths of leading and trailing silences
 7 | 
 8 | 
 9 | # begin configuration section.
10 | cmd=run.pl
11 | #end configuration section.
12 | 
13 | echo "$0 $@"  # Print the command line for logging
14 | 
15 | [ -f ./path.sh ] && . ./path.sh
16 | . parse_options.sh || exit 1;
17 | 
18 | if [ $# -ne 2 ]; then
19 |   echo "Usage: $0 [options] <lang-dir> <ali-dir>"
20 |   echo " Options:"
21 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
22 |   echo "e.g.:"
23 |   echo "$0 data/lang exp/tri4b"
24 |   echo "This script writes some diagnostics to <ali-dir>/log/alignments.log"
25 |   exit 1;
26 | fi
27 | 
28 | lang=$1
29 | dir=$2
30 | 
31 | model=$dir/final.mdl
32 | 
33 | for f in $lang/words.txt $model $dir/ali.1.gz $dir/num_jobs; do
34 |   [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
35 | done
36 | 
37 | num_jobs=$(cat $dir/num_jobs) || exit 1
38 | 
39 | mkdir -p $dir/log
40 | 
41 | rm $dir/phone_stats.*.gz 2>/dev/null || true
42 | 
43 | $cmd JOB=1:$num_jobs $dir/log/get_phone_alignments.JOB.log \
44 |   set -o pipefail '&&' ali-to-phones --write-lengths=true "$model"  \
45 |       "ark:gunzip -c $dir/ali.JOB.gz|" ark,t:- \| \
46 |    sed -E 's/^[^ ]+ //' \| \
47 |    awk 'BEGIN{FS=" ; "; OFS="\n";} {print "begin " $1; if (NF>1) print "end " $NF; for (n=1;n<=NF;n++) print "all " $n; }' \| \
48 |    sort \| uniq -c \| gzip -c '>' $dir/phone_stats.JOB.gz || exit 1
49 | 
50 | if ! $cmd $dir/log/analyze_alignments.log \
51 |   gunzip -c "$dir/phone_stats.*.gz" \| \
52 |   steps/diagnostic/analyze_phone_length_stats.py $lang; then
53 |   echo "$0: analyze_phone_length_stats.py failed, but ignoring the error (it's just for diagnostics)"
54 | fi
55 | 
56 | grep WARNING $dir/log/analyze_alignments.log
57 | echo "$0: see stats in $dir/log/analyze_alignments.log"
58 | 
59 | rm $dir/phone_stats.*.gz
60 | 
61 | exit 0
62 | 


--------------------------------------------------------------------------------
/steps/libs/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Copyright 2016    Vimal Manohar
 4 | # Apache 2.0.
 5 | 
 6 | """ This package contains modules and subpackages used in kaldi scripts.
 7 | """
 8 | 
 9 | from . import common
10 | 
11 | __all__ = ["common"]
12 | 


--------------------------------------------------------------------------------
/steps/libs/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/steps/libs/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/steps/libs/__pycache__/common.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyunlongaaa/NSD-MS2S/6acb1fed78be709c0e332353468ada08bfcc2515/steps/libs/__pycache__/common.cpython-38.pyc


--------------------------------------------------------------------------------
/steps/libs/nnet3/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Copyright 2016    Johns Hopkins University (Dan Povey)
 4 | #           2016    Vimal Manohar
 5 | #           2016    Vijayaditya Peddinti
 6 | #           2016    Yiming Wang
 7 | # Apache 2.0.
 8 | 
 9 | 
10 | # This module has the python functions which facilitate the use of nnet3 toolkit
11 | # It has two sub-modules
12 | # xconfig : Library for parsing high level description of neural networks
13 | # train : Library for training scripts
14 | 


--------------------------------------------------------------------------------
/steps/libs/nnet3/report/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | # Copyright 2016    Vimal Manohar
4 | # Apache 2.0.
5 | 
6 | from . import log_parse
7 | 
8 | __all__ = ["log_parse"]
9 | 


--------------------------------------------------------------------------------
/steps/libs/nnet3/train/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright 2016 Vimal Manohar
 3 | # Apache 2.0
 4 | 
 5 | """ This library has classes and methods commonly used for training nnet3
 6 | neural networks.
 7 | 
 8 | It has separate submodules for frame-level objectives and chain objective:
 9 | frame_level_objf -- For both recurrent and non-recurrent architectures
10 | chain_objf -- LF-MMI objective training
11 | """
12 | 


--------------------------------------------------------------------------------
/steps/libs/nnet3/train/chain_objf/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Copyright 2016    Vimal Manohar
 4 | # Apache 2.0.
 5 | 
 6 | """ This is a subpackage containing modules for training of
 7 | deep neural network acoustic model with chain objective.
 8 | """
 9 | 
10 | from . import acoustic_model
11 | 
12 | __all__ = ["acoustic_model"]
13 | 


--------------------------------------------------------------------------------
/steps/libs/nnet3/train/frame_level_objf/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Copyright 2016 Vimal Manohar
 4 | # Apache 2.0
 5 | 
 6 | """ This library has classes and methods commonly used for training nnet3
 7 | neural networks with frame-level objectives.
 8 | """
 9 | 
10 | from . import common
11 | from . import raw_model
12 | from . import acoustic_model
13 | 
14 | __all__ = ["common", "raw_model", "acoustic_model"]
15 | 


--------------------------------------------------------------------------------
/steps/libs/nnet3/xconfig/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016    Johns Hopkins University (Dan Povey)
 2 | #           2016    Vijayaditya Peddinti
 3 | #           2016    Yiming Wang
 4 | # Apache 2.0.
 5 | 
 6 | """This library has classes and methods to form neural network computation graphs,
 7 | in the nnet3 framework, using higher level abstractions called 'layers'
 8 | (e.g. sub-graphs like LSTMS ).
 9 | 
10 | Note : We use the term 'layer' though the computation graph can have a highly
11 | non-linear structure as, other terms such as nodes/components have already been
12 | used in C++ codebase of nnet3.
13 | 
14 | This is basically a config parser module, where the configs have very concise
15 | descriptions of a neural network.
16 | 
17 | This module has methods to convert the xconfigs into a configs interpretable by
18 | nnet3 C++ library.
19 | 
20 | It generates three different configs:
21 |  'init.config' : which is the config with the info necessary for computing
22 |                the preconditioning matrix i.e., LDA transform
23 |                e.g.
24 |                  input-node name=input dim=40
25 |                  input-node name=ivector dim=100
26 |                  output-node name=output input=Append(Offset(input, -2), Offset(input, -1), input, Offset(input, 1), Offset(input, 2), ReplaceIndex(ivector, t, 0)) objective=linear
27 | 
28 |  'ref.config' : which is a version of the config file used to generate
29 |                 a model for getting left and right context (it doesn't read
30 |                 anything for the LDA-like transform and/or
31 |                 presoftmax-prior-scale components)
32 | 
33 |  'final.config' : which has the actual config used to initialize the model used
34 |                  in training i.e, it has file paths for LDA transform and
35 |                  other initialization files
36 | """
37 | 
38 | 
39 | __all__ = ["utils", "layers", "parser"]
40 | 


--------------------------------------------------------------------------------
/steps/libs/nnet3/xconfig/layers.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016    Johns Hopkins University (Dan Povey)
 2 | #           2016    Vijayaditya Peddinti
 3 | #           2016    Yiming Wang
 4 | # Apache 2.0.
 5 | 
 6 | from .basic_layers import *
 7 | from .convolution import *
 8 | from .attention import *
 9 | from .lstm import *
10 | from .gru import *
11 | from .stats_layer import *
12 | from .trivial_layers import *
13 | from .composite_layers import *
14 | 


--------------------------------------------------------------------------------
/steps/lmrescore_const_arpa.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2014  Guoguo Chen
 4 | # Apache 2.0
 5 | 
 6 | # This script rescores lattices with the ConstArpaLm format language model.
 7 | 
 8 | # Begin configuration section.
 9 | cmd=run.pl
10 | skip_scoring=false
11 | stage=1
12 | scoring_opts=
13 | # End configuration section.
14 | 
15 | echo "$0 $@"  # Print the command line for logging
16 | 
17 | . ./utils/parse_options.sh
18 | 
19 | if [ $# != 5 ]; then
20 |    echo "Does language model rescoring of lattices (remove old LM, add new LM)"
21 |    echo "Usage: $0 [options] <old-lang-dir> <new-lang-dir> \\"
22 |    echo "                   <data-dir> <input-decode-dir> <output-decode-dir>"
23 |    echo "options: [--cmd (run.pl|queue.pl [queue opts])]"
24 |    exit 1;
25 | fi
26 | 
27 | [ -f path.sh ] && . ./path.sh;
28 | 
29 | oldlang=$1
30 | newlang=$2
31 | data=$3
32 | indir=$4
33 | outdir=$5
34 | 
35 | oldlm=$oldlang/G.fst
36 | newlm=$newlang/G.carpa
37 | ! cmp $oldlang/words.txt $newlang/words.txt &&\
38 |   echo "$0: Warning: vocabularies may be incompatible."
39 | [ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1;
40 | [ ! -f $newlm ] && echo "$0: Missing file $newlm" && exit 1;
41 | ! ls $indir/lat.*.gz >/dev/null &&\
42 |   echo "$0: No lattices input directory $indir" && exit 1;
43 | 
44 | if ! cmp -s $oldlang/words.txt $newlang/words.txt; then
45 |   echo "$0: $oldlang/words.txt and $newlang/words.txt differ: make sure you know what you are doing.";
46 | fi
47 | 
48 | oldlmcommand="fstproject --project_output=true $oldlm |"
49 | 
50 | mkdir -p $outdir/log
51 | nj=`cat $indir/num_jobs` || exit 1;
52 | cp $indir/num_jobs $outdir
53 | 
54 | if [ $stage -le 1 ]; then
55 |   $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
56 |     lattice-lmrescore --lm-scale=-1.0 \
57 |     "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlmcommand" ark:-  \| \
58 |     lattice-lmrescore-const-arpa --lm-scale=1.0 \
59 |     ark:- "$newlm" "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
60 | fi
61 | 
62 | if ! $skip_scoring && [ $stage -le 2 ]; then
63 |   err_msg="Not scoring because local/score.sh does not exist or not executable."
64 |   [ ! -x local/score.sh ] && echo $err_msg && exit 1;
65 |   local/score.sh --cmd "$cmd" $scoring_opts $data $newlang $outdir
66 | else
67 |   echo "Not scoring because requested so..."
68 | fi
69 | 
70 | exit 0;
71 | 


--------------------------------------------------------------------------------
/steps/nnet2/check_ivectors_compatible.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright (c) 2016, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 3 | # License: Apache 2.0
 4 | 
 5 | # Begin configuration section.
 6 | # End configuration section
 7 | 
 8 | #echo >&2 "$0 $@"  # Print the command line for logging
 9 | if [ $# != 2 ] ; then
10 |   echo >&2 "Usage: $0  <first-dir> <second-dir>"
11 |   echo >&2 " e.g.: $0 exp/nnet3/extractor exp/nnet3/ivectors_dev10h.pem"
12 | fi
13 | 
14 | dir_a=$1
15 | dir_b=$2
16 | 
17 | id_a=$(steps/nnet2/get_ivector_id.sh $dir_a)
18 | ret_a=$?
19 | id_b=$(steps/nnet2/get_ivector_id.sh $dir_b)
20 | ret_b=$?
21 | 
22 | if [ ! -z "$id_a" ] && [ ! -z "${id_b}" ] ; then
23 |   if [ "${id_a}" == "${id_b}" ]; then
24 |     exit 0
25 |   else
26 |     echo >&2 "$0: ERROR: iVector id ${id_a} in $dir_a and the iVector id ${id_b} in $dir_b do not match"
27 |     echo >&2 "$0: ERROR: that means that the systems are not compatible."
28 |     exit 1
29 |   fi
30 | elif [ -z "$id_a" ] && [ -z "${id_b}" ] ; then
31 |     echo >&2 "$0: WARNING: The directories do not contain iVector ID."
32 |     echo >&2 "$0: WARNING: That means it's you who's reponsible for keeping "
33 |     echo >&2 "$0: WARNING: the directories compatible"
34 |     exit 0
35 | else
36 |     echo >&2 "$0: WARNING: One of the directories do not contain iVector ID."
37 |     echo >&2 "$0: WARNING: That means it's you who's reponsible for keeping "
38 |     echo >&2 "$0: WARNING: the directories compatible"
39 |     exit 0
40 | fi
41 | 


--------------------------------------------------------------------------------
/steps/nnet2/convert_nnet1_to_nnet2.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2014    Johns Hopkins University (Author: Daniel Povey).
 4 | # Apache 2.0.
 5 | 
 6 | # This script converts nnet1 into nnet2 models.
 7 | # Note, it doesn't support all possible types of nnet1 models.
 8 | 
 9 | # Begin configuration section
10 | cleanup=true
11 | cmd=run.pl
12 | # End configuration section.
13 | 
14 | echo "$0 $@"  # Print the command line for logging
15 | 
16 | [ -f ./path.sh ] && . ./path.sh; # source the path.
17 | . parse_options.sh || exit 1;
18 | 
19 | 
20 | if [ $# -ne 2 ]; then
21 |   echo "Usage: $0 [options] <src-nnet1-dir> <dest-nnet2-dir>"
22 |   echo "e.g.: $0 exp/dnn4b_pretrain-dbn_dnn exp/dnn4b_nnet2"
23 |   exit 1;
24 | fi
25 | 
26 | src=$1
27 | dir=$2
28 | 
29 | mkdir -p $dir/log || exit 1;
30 | 
31 | for f in $src/final.mdl $src/final.feature_transform $src/ali_train_pdf.counts; do
32 |   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
33 | done
34 | 
35 | cp $src/phones.txt $dir 2>/dev/null
36 | 
37 | $cmd $dir/log/convert_feature_transform.log \
38 |   nnet1-to-raw-nnet $src/final.feature_transform $dir/0.raw || exit 1;
39 | 
40 | 
41 | if [ -f $src/final.nnet ]; then
42 |   echo "$0: $src/final.nnet exists, using it as input."
43 |   $cmd $dir/log/convert_model.log \
44 |     nnet1-to-raw-nnet $src/final.nnet $dir/1.raw || exit 1;
45 | elif [ -f $src/final.dbn ]; then
46 |   echo "$0: $src/final.dbn exists, using it as input."
47 |   num_leaves=$(am-info $src/final.mdl | grep -w pdfs | awk '{print $NF}') || exit 1;
48 |   dbn_output_dim=$(nnet-info exp/dnn4b_pretrain-dbn/6.dbn  | grep component | tail -n 1 | sed s:,::g | awk '{print $NF}') || exit 1;
49 |   [ -z "$dbn_output_dim" ] && exit 1;
50 |   
51 |   cat > $dir/final_layer.conf <<EOF
52 | AffineComponent input-dim=$dbn_output_dim output-dim=$num_leaves learning-rate=0.001
53 | SoftmaxComponent dim=$num_leaves
54 | EOF
55 |   $cmd $dir/log/convert_model.log \
56 |     nnet1-to-raw-nnet $src/final.dbn - \| \
57 |     raw-nnet-concat - "raw-nnet-init $dir/final_layer.conf -|" $dir/1.raw || exit 1;
58 | else
59 |   echo "$0: expected either $src/final.nnet or $src/final.dbn to exist"
60 | fi
61 | 
62 | $cmd $dir/log/append_model.log \
63 |   raw-nnet-concat $dir/0.raw $dir/1.raw $dir/concat.raw || exit 1;
64 | 
65 | $cmd $dir/log/init_model.log \
66 |   nnet-am-init $src/final.mdl $dir/concat.raw $dir/final_noprior.mdl || exit 1;
67 | 
68 | $cmd $dir/log/set_priors.log \
69 |   nnet-adjust-priors $dir/final_noprior.mdl $src/ali_train_pdf.counts $dir/final.mdl || exit 1;
70 | 
71 | if $cleanup; then
72 |   rm $dir/0.raw $dir/1.raw $dir/concat.raw $dir/final_noprior.mdl
73 | fi
74 | 


--------------------------------------------------------------------------------
/steps/nnet2/get_ivector_id.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright (c) 2016, Johns Hopkins University (Yenda Trmal <jtrmal@gmail.com>)
 3 | # License: Apache 2.0
 4 | 
 5 | # Begin configuration section.
 6 | # End configuration section
 7 | set -e -o pipefail
 8 | set -o nounset                              # Treat unset variables as an error
 9 | 
10 | # End configuration section.
11 | 
12 | #echo >&2 "$0 $@"  # Print the command line for logging
13 | 
14 | if [ -f path.sh ]; then . ./path.sh; fi
15 | . parse_options.sh || exit 1;
16 | 
17 | 
18 | if [ $# != 1 ]; then
19 |   echo >&2 "Usage: $0 <directory>"
20 |   echo >&2 " e.g.: $0 exp/nnet3/extractor"
21 |   exit 1
22 | fi
23 | 
24 | ivecdir=$1
25 | 
26 | if [ -f $ivecdir/final.ie.id ] ; then
27 |   cat $ivecdir/final.ie.id
28 | elif [ -f $ivecdir/final.ie ] ; then
29 |   # note the creation can fail in case the extractor directory
30 |   # is not read-only media or the user des not have access rights
31 |   # in that case we will just behave as if the id is not available
32 |   id=$(md5sum $ivecdir/final.ie | awk '{print $1}')
33 |   echo "$id" > $ivecdir/final.ie.id || true
34 |   echo "$id"
35 | else
36 |   exit 0
37 | fi
38 | 
39 | exit 0
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/steps/nnet2/get_num_frames.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script works out the approximate number of frames in a training directory.
 4 | # This is sometimes needed by higher-level scripts
 5 | 
 6 | 
 7 | if [ -f path.sh ]; then . ./path.sh; fi
 8 | . parse_options.sh || exit 1;
 9 | 
10 | if [ $# -ne 1 ]; then
11 |   (
12 |     echo "Usage: $0 <data-dir>"
13 |     echo "Prints the number of frames of data in the data-dir"
14 |   ) 1>&2
15 | fi
16 | 
17 | data=$1
18 | 
19 | if [ ! -f $data/utt2dur ]; then
20 |   utils/data/get_utt2dur.sh $data 1>&2 || exit 1
21 | fi
22 | 
23 | frame_shift=$(utils/data/get_frame_shift.sh $data) || exit 1
24 | 
25 | awk -v s=$frame_shift '{n += $2} END{printf("%.0f\n", (n / s))}' <$data/utt2dur
26 | 


--------------------------------------------------------------------------------
/steps/nnet2/remove_egs.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2014  Johns Hopkins University (Author: Daniel Povey).  
 4 | # Apache 2.0.
 5 | 
 6 | # This script removes the examples in an egs/ directory, e.g.
 7 | # steps/nnet2/remove_egs.sh exp/nnet4b/egs/
 8 | # We give it its own script because we need to be careful about
 9 | # things that are soft links to something in storage/ (i.e. remove the
10 | # data that's linked to as well as the soft link), and we want to not
11 | # delete the examples if someone has done "touch $dir/egs/.nodelete".
12 | 
13 | 
14 | if [ $# != 1 ]; then
15 |   echo "Usage: $0 <egs-dir>"
16 |   echo "e.g.: $0 data/nnet4b/egs/"
17 |   echo "e.g.: $0 data/nnet4b_mpe/degs/"
18 |   echo "This script is usually equivalent to 'rm <egs-dir>/egs.* <egs-dir>/degs.*' but it follows"
19 |   echo "soft links to <egs-dir>/storage/; and it avoids deleting anything in the directory if"
20 |   echo "someone did 'touch <egs-dir>/.nodelete"
21 |   exit 1;
22 | fi
23 | 
24 | egs=$1
25 | 
26 | if [ ! -d $egs ]; then
27 |   echo "$0: expected directory $egs to exist"
28 |   exit 1;
29 | fi
30 | 
31 | if [ -f $egs/.nodelete ]; then
32 |   echo "$0: not deleting egs in $egs since $egs/.nodelete exists"
33 |   exit 0;
34 | fi
35 | 
36 | 
37 | 
38 | for f in $egs/egs.*.ark $egs/degs.*.ark $egs/cegs.*.ark; do
39 |   if [ -L $f ]; then
40 |     rm $(dirname $f)/$(readlink $f)  # this will print a warning if it fails.
41 |   fi
42 |   rm $f 2>/dev/null
43 | done
44 | 
45 | 
46 | echo "$0: Finished deleting examples in $egs"
47 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain/e2e/README.txt:
--------------------------------------------------------------------------------
 1 | The scripts related to end2end chain training are in this directory
 2 | Currently it has 3 scripts:
 3 | 
 4 | ** prepare_e2e.sh which is almost equivalent
 5 | to regular chain's build-tree.sh (i.e. it creates the tree and
 6 | the transition-model) except it does not require any previously
 7 | trained models (in other terms, it does what stages -3 and -2
 8 | of steps/train_mono.sh do).
 9 | 
10 | ** get_egs_e2e.sh: this is simlilar to chain/get_egs.sh except it
11 | uses training FSTs (instead of lattices) to generate end2end egs.
12 | 
13 | ** train_e2e.py: this is very similar to chain/train.py but
14 | with fewer stages (e.g. it does not compute the preconditioning matrix)
15 | 
16 | 
17 | For details please see the comments at top of local/chain/e2e/run_flatstart_*.sh
18 | and also src/chain/chain-generic-numerator.h.
19 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain/e2e/text_to_phones.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright    2017 Hossein Hadian
 4 | # Apache 2.0
 5 | 
 6 | 
 7 | """ This reads data/train/text from standard input, converts the word transcriptions
 8 |     to phone transcriptions using the provided lexicon,
 9 |     and writes them to standard output.
10 | """
11 | from __future__ import print_function
12 | 
13 | import argparse
14 | from os.path import join
15 | import sys
16 | import copy
17 | import random
18 | 
19 | parser = argparse.ArgumentParser(description="""This script reads
20 |     data/train/text from std input and converts the word transcriptions
21 |     to phone transcriptions using the provided lexicon""")
22 | parser.add_argument('langdir', type=str)
23 | parser.add_argument('--edge-silprob', type=float, default=0.8,
24 |                     help="""Probability of optional silence at the beginning
25 |                     and end.""")
26 | parser.add_argument('--between-silprob', type=float, default=0.2,
27 |                     help="Probability of optional silence between the words.")
28 | 
29 | 
30 | args = parser.parse_args()
31 | 
32 | # optional silence
33 | sil = open(join(args.langdir,
34 |                 "phones/optional_silence.txt")).readline().strip()
35 | 
36 | oov_word = open(join(args.langdir, "oov.txt")).readline().strip()
37 | 
38 | 
39 | # load the lexicon
40 | lexicon = {}
41 | with open(join(args.langdir, "phones/align_lexicon.txt")) as f:
42 |     for line in f:
43 |         line = line.strip();
44 |         parts = line.split()
45 |         lexicon[parts[0]] = parts[2:]  # ignore parts[1]
46 | 
47 | n_tot = 0
48 | n_fail = 0
49 | for line in sys.stdin:
50 |     line = line.strip().split()
51 |     key = line[0]
52 |     word_trans = line[1:]   # word-level transcription
53 |     phone_trans = []        # phone-level transcription
54 |     if random.random() < args.edge_silprob:
55 |         phone_trans += [sil]
56 |     for i in range(len(word_trans)):
57 |         n_tot += 1
58 |         word = word_trans[i]
59 |         if word not in lexicon:
60 |             n_fail += 1
61 |             if n_fail < 20:
62 |                 sys.stderr.write("{} not found in lexicon, replacing with {}\n".format(word, oov_word))
63 |             elif n_fail == 20:
64 |                 sys.stderr.write("Not warning about OOVs any more.\n")
65 |             pronunciation = lexicon[oov_word]
66 |         else:
67 |             pronunciation = copy.deepcopy(lexicon[word])
68 |         phone_trans += pronunciation
69 |         prob = args.between_silprob if i < len(word_trans) - 1 else args.edge_silprob
70 |         if random.random() < prob:
71 |             phone_trans += [sil]
72 |     print(key + " " + " ".join(phone_trans))
73 | 
74 | sys.stderr.write("Done. {} out of {} were OOVs.\n".format(n_fail, n_tot))
75 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Generate a topology file.  This allows control of the number of states in the
 6 | # non-silence HMMs, and in the silence HMMs.  This is a modified version of
 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
 8 | # believe should be useful in the 'chain' model.  Note: right now it doesn't
 9 | # have any real options, and it treats silence and nonsilence the same.  The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 | 
13 | if (@ARGV != 2) {
14 |   print STDERR "Usage: utils/gen_topo.pl <colon-separated-nonsilence-phones> <colon-separated-silence-phones>\n";
15 |   print STDERR "e.g.:  utils/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n";
16 |   exit (1);
17 | }
18 | 
19 | ($nonsil_phones, $sil_phones) = @ARGV;
20 | 
21 | $nonsil_phones =~ s/:/ /g;
22 | $sil_phones =~ s/:/ /g;
23 | $nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n";
24 | $sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n";
25 | 
26 | print "<Topology>\n";
27 | print "<TopologyEntry>\n";
28 | print "<ForPhones>\n";
29 | print "$nonsil_phones $sil_phones\n";
30 | print "</ForPhones>\n";
31 | # The next two lines may look like a bug, but they are as intended.  State 0 has
32 | # no self-loop, it happens exactly once.  And it can go either to state 1 (with
33 | # a self-loop) or to state 2, so we can have zero or more instances of state 1
34 | # following state 0.
35 | # We make the transition-probs 0.5 so they normalize, to keep the code happy.
36 | # In fact, we always set the transition probability scale to 0.0 in the 'chain'
37 | # code, so they are never used.
38 | print "<State> 0 <PdfClass> 0 <Transition> 1 0.5 <Transition> 2 0.5 </State>\n";
39 | print "<State> 1 <PdfClass> 1 <Transition> 1 0.5 <Transition> 2 0.5 </State>\n";
40 | print "<State> 2 </State>\n";
41 | print "</TopologyEntry>\n";
42 | print "</Topology>\n";
43 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # This script was modified around 11.11.2016, when the code was extended to
 6 | # support having a different pdf-class on the self loop.
 7 | 
 8 | # Generate a topology file.  This allows control of the number of states in the
 9 | # non-silence HMMs, and in the silence HMMs.  This is a modified version of
10 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
11 | # believe should be useful in the 'chain' model.  Note: right now it doesn't
12 | # have any real options, and it treats silence and nonsilence the same.  The
13 | # intention is that you write different versions of this script, or add options,
14 | # if you experiment with it.
15 | 
16 | from __future__ import print_function
17 | import argparse
18 | 
19 | 
20 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
21 |                                              "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
22 |                                              "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
23 |                                  epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
24 | parser.add_argument("nonsilence_phones", type=str,
25 |                     help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
26 | parser.add_argument("silence_phones", type=str,
27 |                     help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
28 | 
29 | args = parser.parse_args()
30 | 
31 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
32 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
33 | all_phones = silence_phones +  nonsilence_phones
34 | 
35 | print("<Topology>")
36 | print("<TopologyEntry>")
37 | print("<ForPhones>")
38 | print(" ".join([str(x) for x in all_phones]))
39 | print("</ForPhones>")
40 | # We make the transition-probs 0.5 so they normalize, to keep the code happy.
41 | # In fact, we always set the transition probability scale to 0.0 in the 'chain'
42 | # code, so they are never used.
43 | # Note: the <ForwardPdfClass> will actually happen on the incoming arc because
44 | # we always build the graph with "reorder=true".
45 | print("<State> 0 <ForwardPdfClass> 0 <SelfLoopPdfClass> 1 <Transition> 0 0.5 <Transition> 1 0.5 </State>")
46 | print("<State> 1 </State>")
47 | print("</TopologyEntry>")
48 | print("</Topology>")
49 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Generate a topology file.  This allows control of the number of states in the
 6 | # non-silence HMMs, and in the silence HMMs.  This is a modified version of
 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
 8 | # believe should be useful in the 'chain' model.  Note: right now it doesn't
 9 | # have any real options, and it treats silence and nonsilence the same.  The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 | 
13 | from __future__ import print_function
14 | import argparse
15 | 
16 | 
17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
18 |                                              "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
19 |                                              "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
20 |                                  epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
21 | parser.add_argument("nonsilence_phones", type=str,
22 |                     help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
23 | parser.add_argument("silence_phones", type=str,
24 |                     help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
25 | 
26 | args = parser.parse_args()
27 | 
28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
30 | all_phones = silence_phones +  nonsilence_phones
31 | 
32 | print("<Topology>")
33 | print("<TopologyEntry>")
34 | print("<ForPhones>")
35 | print(" ".join([str(x) for x in all_phones]))
36 | print("</ForPhones>")
37 | print("<State> 0 <PdfClass> 0 <Transition> 0 0.5 <Transition> 1 0.5 </State>")
38 | print("<State> 1 </State>")
39 | print("</TopologyEntry>")
40 | print("</Topology>")
41 | 
42 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo4.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Generate a topology file.  This allows control of the number of states in the
 6 | # non-silence HMMs, and in the silence HMMs.  This is a modified version of
 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
 8 | # believe should be useful in the 'chain' model.  Note: right now it doesn't
 9 | # have any real options, and it treats silence and nonsilence the same.  The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 | 
13 | from __future__ import print_function
14 | import argparse
15 | 
16 | 
17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
18 |                                              "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
19 |                                              "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
20 |                                  epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
21 | parser.add_argument("nonsilence_phones", type=str,
22 |                     help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
23 | parser.add_argument("silence_phones", type=str,
24 |                     help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
25 | 
26 | args = parser.parse_args()
27 | 
28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
30 | all_phones = silence_phones +  nonsilence_phones
31 | 
32 | print("<Topology>")
33 | print("<TopologyEntry>")
34 | print("<ForPhones>")
35 | print(" ".join([str(x) for x in all_phones]))
36 | print("</ForPhones>")
37 | # state 0 is obligatory (occurs once)
38 | print("<State> 0 <PdfClass> 0 <Transition> 1 0.3333 <Transition> 2 0.3333 <Transition> 3 0.3333 </State> ")
39 | # state 1 is used only when >2 frames
40 | print("<State> 1 <PdfClass> 1 <Transition> 1 0.5 <Transition> 2 0.5 </State>")
41 | # state 2 is used only when >=2 frames (and occurs once)
42 | print("<State> 2 <PdfClass> 2 <Transition> 3 1.0 </State>")
43 | print("<State> 3 </State>")  # final nonemitting state
44 | print("</TopologyEntry>")
45 | print("</Topology>")
46 | 
47 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain/gen_topo5.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Generate a topology file.  This allows control of the number of states in the
 6 | # non-silence HMMs, and in the silence HMMs.  This is a modified version of
 7 | # 'utils/gen_topo.pl' that generates a different type of topology, one that we
 8 | # believe should be useful in the 'chain' model.  Note: right now it doesn't
 9 | # have any real options, and it treats silence and nonsilence the same.  The
10 | # intention is that you write different versions of this script, or add options,
11 | # if you experiment with it.
12 | 
13 | from __future__ import print_function
14 | import argparse
15 | 
16 | 
17 | parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py "
18 |                                              "<colon-separated-nonsilence-phones> <colon-separated-silence-phones>"
19 |                                              "e.g.:  steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n",
20 |                                  epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage.");
21 | parser.add_argument("nonsilence_phones", type=str,
22 |                     help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9");
23 | parser.add_argument("silence_phones", type=str,
24 |                     help="List of silence phones as integers, separated by colons, e.g. 1:2:3");
25 | 
26 | args = parser.parse_args()
27 | 
28 | silence_phones = [ int(x) for x in args.silence_phones.split(":") ]
29 | nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ]
30 | all_phones = silence_phones +  nonsilence_phones
31 | 
32 | print("<Topology>")
33 | print("<TopologyEntry>")
34 | print("<ForPhones>")
35 | print(" ".join([str(x) for x in all_phones]))
36 | print("</ForPhones>")
37 | # state 0 is nonemitting
38 | print("<State> 0 <Transition> 1 0.5 <Transition> 2 0.5 </State>")
39 | # state 1 is for when we traverse it in 1 state
40 | print("<State> 1 <PdfClass> 0 <Transition> 4 1.0 </State>")
41 | # state 2 is for when we traverse it in >1 state, for the first state.
42 | print("<State> 2 <PdfClass> 2 <Transition> 3 1.0 </State>")
43 | # state 3 is for the self-loop.  Use pdf-class 1 here so that the default
44 | # phone-class clustering (which uses only pdf-class 1 by default) gets only
45 | # stats from longer phones.
46 | print("<State> 3 <PdfClass> 1 <Transition> 3 0.5 <Transition> 4 0.5 </State>")
47 | print("<State> 4 </State>")
48 | print("</TopologyEntry>")
49 | print("</Topology>")
50 | 
51 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain2/internal/get_best_model.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright   2019  Idiap Research Institute (Author: Srikanth Madikeri).  Apache 2.0.
 4 | # This script is the equivalent of get_successful_models function in the python library.
 5 | # It takes a list of models and returns either the best model (the deafult) or a list of
 6 | # models to average.
 7 | 
 8 | models_to_average=false
 9 | difference_threshold=1.0
10 | output=output
11 | 
12 | 
13 | # echo "$0 $@"  # Print the command line for logging
14 | 
15 | if [ -f path.sh ]; then . ./path.sh; fi
16 | . parse_options.sh || exit 1;
17 | 
18 | if [ $# -lt 1 ]; then
19 |     echo "Usage: $0: [options] <model-1-log> <model-2-log> .... <model-N-log>"
20 |     echo "where <model-n> is one of the n models to choose from."
21 |     echo ""
22 |     echo "--models-to-average: when true, returns the models to be averaged rather than the single best model"
23 |     echo "--difference-threshold: used to reject models. models with objf < max-value - difference_threshold are rejected"
24 |     echo "--output: the objf of the this output layer is used for model selection"
25 |     echo ""
26 |     exit 1;
27 | fi
28 | 
29 | if ! $models_to_average; then
30 |     if [ $# -eq 1 ]; then
31 |         basename $1 | tr '.' ' ' | awk '{ print $(NF-1) }'
32 |         exit 0;
33 |     fi
34 |     model_log_list=$(for arg in $*; do echo $arg; done)
35 |     first_log=$1
36 |     log_line=`fgrep -m 1 "Overall average objective function for '$output' is" $first_log`
37 |     colno=`echo $log_line | cut -d '=' -f1 | wc -w`
38 |     ((colno+=2))
39 |     filename=$(fgrep -m 1 "Overall average objective function for '$output' is" $model_log_list | \
40 |         cut -d ' ' -f1,$colno | tr ':' ' ' | \
41 |         awk '{print $1,$3}' | \
42 |         sort -k2,2 -g | tail -1 | cut -d ' ' -f1)
43 |     basename $filename | tr '.' ' ' | awk '{ print $(NF-1) }'
44 | fi
45 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain2/validate_processed_egs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright   2019  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | # Copyright   2019  Idiap Research Institute (Author: Srikanth Madikeri).  Apache 2.0.
 5 | #
 6 | # This script validates a directory containing 'processed' egs for 'chain'
 7 | # training, i.e. the output of process_egs.sh.  It also helps to document the
 8 | # expectations on such a directory.
 9 | 
10 | 
11 | if [ -f path.sh ]; then . ./path.sh; fi
12 | 
13 | 
14 | if [ $# != 1 ]; then
15 |   echo "Usage: $0  <processed-egs-dir>"
16 |   echo " e.g.: $0 exp/chain/tdnn1a_sp/processed_egs"
17 |   echo ""
18 |   echo "Validates that the processed-egs dir has the expected format"
19 | fi
20 | 
21 | dir=$1
22 | 
23 | # Note: the .ark files are not actually consumed directly downstream (only via
24 | # the top-level .scp files), but we check them anyway for now.
25 | for f in $dir/train.scp $dir/info.txt \
26 |          $dir/heldout_subset.{ark,scp} $dir/train_subset.{ark,scp} \
27 |          $dir/train.1.scp $dir/train.1.ark; do
28 |   if ! [ -f $f -a -s $f ]; then
29 |     echo "$0: expected file $f to exist and be nonempty."
30 |     exit 1
31 |   fi
32 | done
33 | 
34 | 
35 | if [ $(awk '/^dir_type/ { print $2; }' <$dir/info.txt) != "processed_chain_egs" ]; then
36 |   grep dir_type $dir/info.txt
37 |   echo "$0: dir_type should be processed_chain_egs in $dir/info.txt"
38 |   exit 1
39 | fi
40 | 
41 | lang=$(awk '/^lang / {print $2; }' <$dir/info.txt)
42 | 
43 | for f in $dir/misc/$lang.{trans_mdl,normalization.fst,den.fst}; do
44 |   if ! [ -f $f -a -s $f ]; then
45 |     echo "$0: expected file $f to exist and be nonempty."
46 |     exit 1
47 |   fi
48 | done
49 | 
50 | echo "$0: sucessfully validated processed egs in $dir"
51 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain2/validate_randomized_egs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright   2019  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | # Copyright   2019  Idiap Research Institute (Author: Srikanth Madikeri).  Apache 2.0.
 5 | #
 6 | # This script validates a directory containing 'randomized' egs for 'chain'
 7 | # training, i.e. the output of randomize_egs.sh (this is the final form of the
 8 | # egs which is consumed by the training script).  It also helps to document the
 9 | # expectations on such a directory.
10 | 
11 | 
12 | if [ -f path.sh ]; then . ./path.sh; fi
13 | 
14 | 
15 | if [ $# != 1 ]; then
16 |   echo "Usage: $0  <randomized-egs-dir>"
17 |   echo " e.g.: $0 exp/chain/tdnn1a_sp/egs"
18 |   echo ""
19 |   echo "Validates that the final (randomized) egs dir has the expected format"
20 | fi
21 | 
22 | dir=$1
23 | 
24 | # Note: the .ark files are not actually consumed directly downstream (only via
25 | # the top-level .scp files), but we check them anyway for now.
26 | for f in $dir/train.1.scp $dir/info.txt \
27 |          $dir/heldout_subset.scp $dir/train_subset.scp; do
28 |   if ! [ -f $f -a -s $f ]; then
29 |     echo "$0: expected file $f to exist and be nonempty."
30 |     exit 1
31 |   fi
32 | done
33 | 
34 | 
35 | if [ $(awk '/^dir_type/ { print $2; }' <$dir/info.txt) != "randomized_chain_egs" ]; then
36 |   grep dir_type $dir/info.txt
37 |   echo "$0: dir_type should be randomized_chain_egs in $dir/info.txt"
38 |   exit 1
39 | fi
40 | 
41 | langs=$(awk '/^langs / {$1 = ""; print; }' <$dir/info.txt)
42 | num_scp_files=$(awk '/^num_scp_files / { print $2; }' <$dir/info.txt)
43 | 
44 | if [ -z "$langs" ]; then
45 |   echo "$0: expecting the list of languages to be nonempty in $dir/info.txt"
46 |   exit 1
47 | fi
48 | 
49 | for lang in $langs; do
50 |   for f in $dir/misc/$lang.{trans_mdl,normalization.fst,den.fst} $dir/info_${lang}.txt; do
51 |     if ! [ -f $f -a -s $f ]; then
52 |       echo "$0: expected file $f to exist and be nonempty."
53 |       exit 1
54 |     fi
55 |   done
56 | done
57 | 
58 | for i in $(seq $num_scp_files); do
59 |   if ! [ -s $dir/train.$i.scp ]; then
60 |     echo "$0: expected file $dir/train.$i.scp to exist and be nonempty."
61 |     exit 1
62 |   fi
63 | done
64 | 
65 | 
66 | echo "$0: sucessfully validated randomized egs in $dir"
67 | 


--------------------------------------------------------------------------------
/steps/nnet3/chain2/validate_raw_egs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright   2019  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | # Copyright   2019  Idiap Research Institute (Author: Srikanth Madikeri).  Apache 2.0.
 5 | #
 6 | # This script validates a directory containing 'raw' egs for 'chain' training.
 7 | # It also helps to document the expectations on such a directory.
 8 | 
 9 | 
10 | 
11 | if [ -f path.sh ]; then . ./path.sh; fi
12 | 
13 | 
14 | if [ $# != 1 ]; then
15 |   echo "Usage: $0  <raw-egs-dir>"
16 |   echo " e.g.: $0 exp/chaina/tdnn1a_sp/raw_egs"
17 |   echo ""
18 |   echo "Validates that the raw-egs dir has the expected format"
19 | fi
20 | 
21 | dir=$1
22 | 
23 | for f in $dir/all.scp $dir/cegs.1.ark $dir/info.txt \
24 |          $dir/misc/utt2spk; do
25 |   if ! [ -s $f ]; then
26 |     echo "$0: expected file $f to exist and be nonempty."
27 |     exit 1
28 |   fi
29 | done
30 | 
31 | 
32 | if [ $(awk '/^dir_type/ { print $2; }' <$dir/info.txt) != "raw_chain_egs" ]; then
33 |   grep dir_type $dir/info.txt
34 |   echo "$0: dir_type should be raw_chain_egs in $dir/info.txt"
35 |   exit 1
36 | fi
37 | 
38 | lang=$(awk '/^lang / {print $2; }' <$dir/info.txt)
39 | 
40 | for f in $dir/misc/$lang.{trans_mdl,normalization.fst,den.fst}; do
41 |   if ! [ -s $f ]; then
42 |     echo "$0: expected file $f to exist and be nonempty."
43 |     exit 1
44 |   fi
45 | done
46 | 
47 | echo "$0: sucessfully validated raw egs in $dir"
48 | 


--------------------------------------------------------------------------------
/steps/nnet3/nnet3_to_dot.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # script showing use of nnet3_to_dot.py
 4 | # Copyright 2015  Johns Hopkins University (Author: Vijayaditya Peddinti).
 5 | 
 6 | # Begin configuration section.
 7 | component_attributes="name,type"
 8 | node_prefixes=""
 9 | info_bin=nnet3-am-info
10 | echo "$0 $@"  # Print the command line for logging
11 | 
12 | [ -f ./path.sh ] && . ./path.sh; # source the path.
13 | . parse_options.sh || exit 1;
14 | 
15 | if [ $# != 3 ]; then
16 |   echo "Usage: $0 [opts] <nnet3-mdl-file> <output-dot-file> <output-png-file>"
17 |   echo " e.g.: $0 exp/sdm1/nnet3/lstm_sp/0.mdl lstm.dot lstm.png"
18 |   echo ""
19 |   echo "Main options (for others, see top of script file)"
20 |   echo "  --info-bin <nnet3-am-info|nnet3-info>        # Name of the binary to generate the nnet3 file"
21 |   echo "  --component-attributes <string|name,type>     # attributes to be printed in nnet3 components"
22 |   echo "  --node-prefixes <string|Lstm1,Lstm2>          # list of prefixes. Nnet3 components/component-nodes with the same prefix"
23 |   echo "                                                # will be clustered together in the dot-graph"
24 | 
25 | 
26 |   exit 1;
27 | fi
28 | 
29 | model=$1
30 | dot_file=$2
31 | output_file=$3
32 | 
33 | attr=${node_prefixes:+ --node-prefixes "$node_prefixes"}
34 | $info_bin $model | \
35 |   steps/nnet3/dot/nnet3_to_dot.py \
36 |     --component-attributes "$component_attributes" \
37 |     $attr $dot_file
38 | echo "Generated the dot file $dot_file"
39 | 
40 | command -v dot >/dev/null 2>&1 || { echo >&2 "This script requires dot but it's not installed. Please compile $dot_file with dot"; exit 1; }
41 | dot -Tpdf $dot_file -o $output_file
42 | 


--------------------------------------------------------------------------------
/steps/nnet3/remove_egs.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2014  Johns Hopkins University (Author: Daniel Povey).  
 4 | # Apache 2.0.
 5 | 
 6 | # This script removes the examples in an egs/ directory, e.g.
 7 | # steps/nnet2/remove_egs.sh exp/nnet4b/egs/
 8 | # We give it its own script because we need to be careful about
 9 | # things that are soft links to something in storage/ (i.e. remove the
10 | # data that's linked to as well as the soft link), and we want to not
11 | # delete the examples if someone has done "touch $dir/egs/.nodelete".
12 | 
13 | 
14 | if [ $# != 1 ]; then
15 |   echo "Usage: $0 <egs-dir>"
16 |   echo "e.g.: $0 data/nnet4b/egs/"
17 |   echo "e.g.: $0 data/nnet4b_mpe/degs/"
18 |   echo "This script is usually equivalent to 'rm <egs-dir>/egs.* <egs-dir>/degs.*' but it follows"
19 |   echo "soft links to <egs-dir>/storage/; and it avoids deleting anything in the directory if"
20 |   echo "someone did 'touch <egs-dir>/.nodelete"
21 |   exit 1;
22 | fi
23 | 
24 | egs=$1
25 | 
26 | if [ ! -d $egs ]; then
27 |   echo "$0: expected directory $egs to exist"
28 |   exit 1;
29 | fi
30 | 
31 | if [ -f $egs/.nodelete ]; then
32 |   echo "$0: not deleting egs in $egs since $egs/.nodelete exists"
33 |   exit 0;
34 | fi
35 | 
36 | 
37 | 
38 | for f in $egs/egs.*.ark $egs/degs.*.ark $egs/cegs.*.ark; do
39 |   if [ -L $f ]; then
40 |     rm $(dirname $f)/$(readlink $f)  # this will print a warning if it fails.
41 |   fi
42 |   rm $f 2>/dev/null
43 | done
44 | 
45 | 
46 | echo "$0: Finished deleting examples in $egs"
47 | 


--------------------------------------------------------------------------------
/steps/online/nnet2/copy_ivector_dir.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2017  Johns Hopkins University (author: Hossein Hadian)
 4 | # Apache 2.0
 5 | 
 6 | # This script copies the necessary parts of an online ivector directory
 7 | # optionally applying a mapping to the ivector_online.scp file
 8 | 
 9 | utt2orig=
10 | 
11 | . utils/parse_options.sh
12 | 
13 | if [ $# != 2 ]; then
14 |   echo "Usage: "
15 |   echo "  $0 [options] <srcdir> <destdir>"
16 |   echo "e.g.:"
17 |   echo " $0 exp/nnet3/online_ivector_train exp/nnet3/online_ivector_train_fs"
18 |   echo "Options"
19 |   echo "   --utt2orig=<file>     # utterance id mapping to use"
20 |   exit 1;
21 | fi
22 | 
23 | 
24 | srcdir=$1
25 | destdir=$2
26 | 
27 | if [ ! -f $srcdir/ivector_period ]; then
28 |   echo "$0: no such file $srcdir/ivector_period"
29 |   exit 1;
30 | fi
31 | 
32 | if [ "$destdir" == "$srcdir" ]; then
33 |   echo "$0: this script requires <srcdir> and <destdir> to be different."
34 |   exit 1
35 | fi
36 | 
37 | set -e;
38 | 
39 | mkdir -p $destdir
40 | cp -r $srcdir/{conf,ivector_period} $destdir
41 | if [ -z $utt2orig ]; then
42 |   cp $srcdir/ivector_online.scp $destdir
43 | else
44 |   utils/apply_map.pl -f 2 $srcdir/ivector_online.scp < $utt2orig > $destdir/ivector_online.scp
45 | fi
46 | cp $srcdir/final.ie.id $destdir
47 | 
48 | echo "$0: Copied necessary parts of online ivector directory $srcdir to $destdir"
49 | 


--------------------------------------------------------------------------------
/steps/online/nnet2/get_pca_transform.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2016  David Snyder
 4 | #
 5 | # This script computes a PCA transform on top of spliced features processed with
 6 | # apply-cmvn-online.
 7 | #
 8 | #
 9 | # Apache 2.0.
10 | 
11 | # Begin configuration.
12 | cmd=run.pl
13 | config=
14 | stage=0
15 | dim=40 # The dim after applying PCA
16 | normalize_variance=true # If the PCA transform normalizes the variance
17 | normalize_mean=true # If the PCA transform centers
18 | splice_opts=
19 | online_cmvn_opts=
20 | max_utts=5000 # maximum number of files to use
21 | subsample=5 # subsample features with this periodicity
22 | 
23 | echo "$0 $@"  # Print the command line for logging
24 | 
25 | [ -f path.sh ] && . ./path.sh
26 | . parse_options.sh || exit 1;
27 | 
28 | if [ $# != 2 ]; then
29 |   echo "Usage: steps/nnet2/get_pca_transform.sh [options] <data> <dir>"
30 |   echo " e.g.: steps/train_pca_transform.sh data/train_si84 exp/tri2b"
31 |   echo "Main options (for others, see top of script file)"
32 |   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
33 |   echo "  --config <config-file>                           # config containing options"
34 |   echo "  --stage <stage>                                  # stage to do partial re-run from."
35 |   exit 1;
36 | fi
37 | 
38 | data=$1
39 | dir=$2
40 | 
41 | for f in $data/feats.scp ; do
42 |   [ ! -f "$f" ] && echo "$0: expecting file $f to exist" && exit 1
43 | done
44 | 
45 | mkdir -p $dir/log
46 | 
47 | echo "$splice_opts" >$dir/splice_opts # keep track of frame-splicing options
48 |            # so that later stages of system building can know what they were.
49 | echo $online_cmvn_opts > $dir/online_cmvn.conf # keep track of options to CMVN.
50 | 
51 | # create global_cmvn.stats
52 | if ! matrix-sum --binary=false scp:$data/cmvn.scp - >$dir/global_cmvn.stats 2>/dev/null; then
53 |   echo "$0: Error summing cmvn stats"
54 |   exit 1
55 | fi
56 | 
57 | feats="ark,s,cs:utils/subset_scp.pl --quiet $max_utts $data/feats.scp | apply-cmvn-online $online_cmvn_opts $dir/global_cmvn.stats scp:- ark:- | splice-feats $splice_opts ark:- ark:- | subsample-feats --n=$subsample ark:- ark:- |"
58 | 
59 | if [ $stage -le 0 ]; then
60 |   $cmd $dir/log/pca_est.log \
61 |     est-pca --dim=$dim --normalize-variance=$normalize_variance \
62 |     --normalize-mean=$normalize_mean "$feats" $dir/final.mat || exit 1;
63 | fi
64 | 
65 | echo "Done estimating PCA transform in $dir"
66 | 
67 | exit 0
68 | 


--------------------------------------------------------------------------------
/steps/overlap/post_process_output.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2015-17  Vimal Manohar
 4 | #           2020     Desh Raj
 5 | # Apache 2.0.
 6 | 
 7 | # This script post-processes the output of the overlap neural network,
 8 | # which is in the form of frame-level alignments, into an RTTM file.
 9 | # The alignments must be 0/1/2 denoting silence/single/overlap. Based
10 | # on this, this script can also be used to get single speaker regions.
11 | 
12 | set -e -o pipefail -u
13 | . ./path.sh
14 | 
15 | cmd=run.pl
16 | stage=-10
17 | nj=18
18 | 
19 | region_type=overlap # change this to "single" to get only single-speaker regions
20 | 
21 | # The values below are in seconds
22 | frame_shift=0.01
23 | segment_padding=0.2
24 | min_segment_dur=0
25 | merge_consecutive_max_dur=inf
26 | 
27 | . utils/parse_options.sh
28 | 
29 | if [ $# -ne 3 ]; then
30 |   echo "This script post-processes the output of steps/segmentation/decode_sad.sh, "
31 |   echo "which is in the form of frame-level alignments, into kaldi segments. "
32 |   echo "The alignments must be speech activity detection marks i.e. 1 for silence "
33 |   echo "and 2 for speech."
34 |   echo "Usage: $0 <data-dir> <output-dir> <rttm-dir>"
35 |   echo " e.g.: $0 data/dev_aspire_whole exp/vad_dev_aspire"
36 |   exit 1
37 | fi
38 | 
39 | data_dir=$1
40 | output_dir=$2    # Alignment directory containing frame-level SAD labels
41 | dir=$3
42 | 
43 | mkdir -p $dir
44 | 
45 | for f in $output_dir/ali.1.gz $output_dir/num_jobs; do
46 |   if [ ! -f $f ]; then
47 |     echo "$0: Could not find file $f" && exit 1
48 |   fi
49 | done
50 | 
51 | nj=`cat $output_dir/num_jobs` || exit 1
52 | utils/split_data.sh $data_dir $nj
53 | 
54 | utils/data/get_utt2dur.sh $data_dir
55 | 
56 | if [ $stage -le 0 ]; then
57 |   $cmd JOB=1:$nj $dir/log/segmentation.JOB.log \
58 |     copy-int-vector "ark:gunzip -c $output_dir/ali.JOB.gz |" ark,t:- \| \
59 |     steps/overlap/output_to_rttm.py \
60 |       --region-type=$region_type \
61 |       --frame-shift=$frame_shift --segment-padding=$segment_padding \
62 |       --min-segment-dur=$min_segment_dur --merge-consecutive-max-dur=$merge_consecutive_max_dur \
63 |       --utt2dur=$data_dir/utt2dur - $dir/rttm_${region_type}.JOB
64 | fi
65 | 
66 | echo $nj > $dir/num_jobs
67 | 
68 | for n in $(seq $nj); do 
69 |   cat $dir/rttm_${region_type}.$n
70 | done > $dir/rttm_${region_type}
71 | 


--------------------------------------------------------------------------------
/steps/pytorchnn/check_py.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | 


--------------------------------------------------------------------------------
/steps/pytorchnn/data.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import os
 6 | import torch
 7 | 
 8 | 
 9 | class Dictionary(object):
10 |     def __init__(self):
11 |         self.word2idx = {}
12 |         self.idx2word = []
13 | 
14 |     def read_vocab(self, path):
15 |         with open(path, 'r', encoding='utf-8') as f:
16 |             for line in f:
17 |                 word = line.split()
18 |                 assert (len(word) == 2)
19 |                 word = word[0]
20 |                 if word not in self.word2idx:
21 |                     self.idx2word.append(word)
22 |                     self.word2idx[word] = len(self.idx2word) - 1
23 | 
24 |     def __len__(self):
25 |         return len(self.idx2word)
26 | 
27 | 
28 | class Corpus(object):
29 |     def __init__(self, path):
30 |         self.dictionary = Dictionary()
31 |         self.dictionary.read_vocab(os.path.join(path, 'words.txt'))
32 |         self.train = self.tokenize(os.path.join(path, 'train.txt'))
33 |         self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
34 |         self.test = self.tokenize(os.path.join(path, 'test.txt'))
35 | 
36 |     def tokenize(self, path):
37 |         """Tokenizes a text file."""
38 |         assert os.path.exists(path)
39 |         with open(path, 'r', encoding='utf-8') as f:
40 |             all_ids = []
41 |             for line in f:
42 |                 words = line.split() + ['<s>']
43 |                 ids = []
44 |                 for word in words:
45 |                     if word in self.dictionary.word2idx:
46 |                         ids.append(self.dictionary.word2idx[word])
47 |                     else:
48 |                         ids.append(self.dictionary.word2idx['<unk>'])
49 |                 all_ids.append(torch.tensor(ids).type(torch.int64))
50 |             data = torch.cat(all_ids)
51 | 
52 |         return data
53 | 


--------------------------------------------------------------------------------
/steps/score_kaldi_compare.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2016 Nicolas Serrano
 3 | # Apache 2.0
 4 | 
 5 | [ -f ./path.sh ] && . ./path.sh
 6 | 
 7 | # begin configuration section.
 8 | cmd=run.pl
 9 | replications=10000
10 | #end configuration section.
11 | 
12 | echo "$0 $@"  # Print the command line for logging
13 | [ -f ./path.sh ] && . ./path.sh
14 | . parse_options.sh || exit 1;
15 | 
16 | if [ $# -ne 3 ]; then
17 |   echo "Usage: $0 [--cmd (run.pl|queue.pl...)] <score-dir1> <score-dir2> <score-compare-dir>"
18 |   echo " Options:"
19 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
20 |   echo "    --replications <int>            # number of bootstrap evaluation to compute confidence."
21 |   exit 1;
22 | fi
23 | 
24 | dir1=$1
25 | dir2=$2
26 | dir_compare=$3
27 | 
28 | mkdir -p $dir_compare/log
29 | 
30 | for d in $dir1 $dir2; do
31 |   for f in test_filt.txt best_wer; do
32 |     [ ! -f $d/$f ] && echo "$0: no such file $d/$f" && exit 1;
33 |   done
34 | done
35 | 
36 | 
37 | best_wer_file1=$(awk '{print $NF}' $dir1/best_wer)
38 | best_transcript_file1=$(echo $best_wer_file1 | sed -e 's=.*/wer_==' | \
39 |         awk -v FS='_' -v dir=$dir1 '{print dir"/penalty_"$2"/"$1".txt"}')
40 | 
41 | best_wer_file2=$(awk '{print $NF}' $dir2/best_wer)
42 | best_transcript_file2=$(echo $best_wer_file2 | sed -e 's=.*/wer_==' | \
43 |         awk -v FS='_' -v dir=$dir2 '{print dir"/penalty_"$2"/"$1".txt"}')
44 | 
45 | $cmd $dir_compare/log/score_compare.log \
46 |   compute-wer-bootci --replications=$replications \
47 |     ark:$dir1/test_filt.txt ark:$best_transcript_file1 ark:$best_transcript_file2 \
48 |     '>' $dir_compare/wer_bootci_comparison || exit 1;
49 | 
50 | exit 0;
51 | 


--------------------------------------------------------------------------------
/steps/scoring/score_kaldi_compare.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2016 Nicolas Serrano
 3 | # Apache 2.0
 4 | 
 5 | [ -f ./path.sh ] && . ./path.sh
 6 | 
 7 | # begin configuration section.
 8 | cmd=run.pl
 9 | replications=10000
10 | #end configuration section.
11 | 
12 | echo "$0 $@"  # Print the command line for logging
13 | [ -f ./path.sh ] && . ./path.sh
14 | . parse_options.sh || exit 1;
15 | 
16 | if [ $# -ne 3 ]; then
17 |   echo "Usage: $0 [--cmd (run.pl|queue.pl...)] <score-dir1> <score-dir2> <score-compare-dir>"
18 |   echo " Options:"
19 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
20 |   echo "    --replications <int>            # number of bootstrap evaluation to compute confidence."
21 |   exit 1;
22 | fi
23 | 
24 | dir1=$1
25 | dir2=$2
26 | dir_compare=$3
27 | 
28 | mkdir -p $dir_compare/log
29 | 
30 | for d in $dir1 $dir2; do
31 |   for f in test_filt.txt best_wer; do
32 |     [ ! -f $d/$f ] && echo "$0: no such file $d/$f" && exit 1;
33 |   done
34 | done
35 | 
36 | 
37 | best_wer_file1=$(awk '{print $NF}' $dir1/best_wer)
38 | best_transcript_file1=$(echo $best_wer_file1 | sed -e 's=.*/wer_==' | \
39 |         awk -v FS='_' -v dir=$dir1 '{print dir"/penalty_"$2"/"$1".txt"}')
40 | 
41 | best_wer_file2=$(awk '{print $NF}' $dir2/best_wer)
42 | best_transcript_file2=$(echo $best_wer_file2 | sed -e 's=.*/wer_==' | \
43 |         awk -v FS='_' -v dir=$dir2 '{print dir"/penalty_"$2"/"$1".txt"}')
44 | 
45 | $cmd $dir_compare/log/score_compare.log \
46 |   compute-wer-bootci --replications=$replications \
47 |     ark:$dir1/test_filt.txt ark:$best_transcript_file1 ark:$best_transcript_file2 \
48 |     '>' $dir_compare/wer_bootci_comparison || exit 1;
49 | 
50 | exit 0;
51 | 


--------------------------------------------------------------------------------
/steps/search_index.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 4 | # Apache 2.0
 5 | 
 6 | # Begin configuration section.  
 7 | cmd=run.pl
 8 | nbest=-1
 9 | strict=true
10 | indices_dir=
11 | frame_subsampling_factor=1
12 | # End configuration section.
13 | 
14 | echo "$0 $@"  # Print the command line for logging
15 | 
16 | [ -f ./path.sh ] && . ./path.sh; # source the path.
17 | . parse_options.sh || exit 1;
18 | 
19 | if [ $# != 2 ]; then
20 |    echo "Usage: steps/search_index.sh [options] <kws-data-dir> <kws-dir>"
21 |    echo " e.g.: steps/search_index.sh data/kws exp/sgmm2_5a_mmi/decode/kws/"
22 |    echo ""
23 |    echo "main options (for others, see top of script file)"
24 |    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
25 |    echo "  --nbest <int>                                    # return n best results. (-1 means all)"
26 |    echo "  --indices-dir <path>                             # where the indices should be stored, by default it will be in <kws-dir>"
27 |    exit 1;
28 | fi
29 | 
30 | 
31 | kwsdatadir=$1;
32 | kwsdir=$2;
33 | 
34 | if [ -z $indices_dir ] ; then
35 |   indices_dir=$kwsdir
36 | fi
37 | 
38 | mkdir -p $kwsdir/log;
39 | nj=`cat $indices_dir/num_jobs` || exit 1;
40 | if [ -f $kwsdatadir/keywords.fsts.gz ]; then
41 |   keywords="\"gunzip -c $kwsdatadir/keywords.fsts.gz|\""
42 | elif [ -f $kwsdatadir/keywords.fsts ]; then
43 |   keywords=$kwsdatadir/keywords.fsts;
44 | else
45 |   echo "$0: no such file $kwsdatadir/keywords.fsts[.gz]" && exit 1;
46 | fi
47 | 
48 | for f in $indices_dir/index.1.gz ; do
49 |   [ ! -f $f ] && echo "make_index.sh: no such file $f" && exit 1;
50 | done
51 | 
52 | $cmd JOB=1:$nj $kwsdir/log/search.JOB.log \
53 |   kws-search --strict=$strict --negative-tolerance=-1 \
54 |   --frame-subsampling-factor=${frame_subsampling_factor} \
55 |   "ark:gzip -cdf $indices_dir/index.JOB.gz|" ark:$keywords \
56 |   "ark,t:|gzip -c > $kwsdir/result.JOB.gz" \
57 |   "ark,t:|gzip -c > $kwsdir/stats.JOB.gz" || exit 1;
58 | 
59 | exit 0;
60 | 


--------------------------------------------------------------------------------
/steps/segmentation/combine_targets_dirs.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2017 Nagendra Kumar Goel
 4 | #           2018 Vimal Manohar   
 5 | # Apache 2.0.
 6 | 
 7 | # This script combines targets directory into a new targets directory 
 8 | # containing targets from all the input targets directories.
 9 | 
10 | echo "$0 $@"  # Print the command line for logging
11 | 
12 | if [ -f path.sh ]; then . ./path.sh; fi
13 | . parse_options.sh || exit 1;
14 | 
15 | if [ $# -lt 3 ]; then
16 |   echo "Usage: $0 [options] <data> <dest-targets-dir> <src-targets-dir1> <src-targets-dir2> ..."
17 |   echo "e.g.: $0 data/train exp/targets_combined exp/targets_1 exp/targets_2"
18 |   exit 1;
19 | fi
20 | 
21 | export LC_ALL=C
22 | 
23 | data=$1;
24 | shift;
25 | dest=$1;
26 | shift;
27 | first_src=$1;
28 | 
29 | mkdir -p $dest;
30 | rm -f $dest/{targets.*.ark,frame_subsampling_factor} 2>/dev/null
31 | 
32 | frame_subsampling_factor=1
33 | if [ -f $first_src/frame_subsampling_factor ]; then
34 |   cp $first_src/frame_subsampling_factor $dest
35 |   frame_subsampling_factor=$(cat $dest/frame_subsampling_factor)
36 | fi
37 | 
38 | for d in $*; do
39 |   this_frame_subsampling_factor=1
40 |   if [ -f $d/frame_subsampling_factor ]; then
41 |     this_frame_subsampling_factor=$(cat $d/frame_subsampling_factor)
42 |   fi
43 | 
44 |   if [ $this_frame_subsampling_factor != $frame_subsampling_factor ]; then
45 |     echo "$0: Cannot combine targets directories with different frame-subsampling-factors" 1>&2
46 |     exit 1
47 |   fi
48 | 
49 |   cat $d/targets.scp
50 | done | sort -k1,1 > $dest/targets.scp || exit 1
51 | 
52 | steps/segmentation/validate_targets_dir.sh $dest $data || exit 1
53 | 
54 | echo "Combined targets and stored in $dest"
55 | exit 0
56 | 


--------------------------------------------------------------------------------
/steps/segmentation/copy_targets_dir.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright    2017  Nagendra Kumar Goel
 4 | #              2014  Johns Hopkins University (author: Nagendra K Goel)
 5 | # Apache 2.0
 6 | 
 7 | # This script makes a copy of targets directory (by copying targets.scp),
 8 | # possibly adding a specified prefix or a suffix to the utterance names.
 9 | 
10 | # begin configuration section
11 | utt_prefix=
12 | utt_suffix=
13 | # end configuration section
14 | 
15 | if [ -f ./path.sh ]; then . ./path.sh; fi
16 | . ./utils/parse_options.sh
17 | 
18 | if [ $# != 2 ]; then
19 |   echo "Usage: "
20 |   echo "  $0 [options] <srcdir> <destdir>"
21 |   echo "e.g.:"
22 |   echo " $0  --utt-prefix=1- exp/segmentation_1a/train_whole_combined_targets_sub3 exp/segmentation_1a/train_whole_combined_targets_sub3_rev1"
23 |   echo "Options"
24 |   echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
25 |   echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
26 |   exit 1;
27 | fi
28 | 
29 | export LC_ALL=C
30 | 
31 | srcdir=$1
32 | destdir=$2
33 | 
34 | mkdir -p $destdir
35 | 
36 | if [ -f $srcdir/frame_subsampling_factor ]; then
37 |   cp $srcdir/frame_subsampling_factor $destdir
38 | fi
39 | 
40 | cat $srcdir/targets.scp | awk -v p=$utt_prefix -v s=$utt_suffix \
41 |   '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
42 | 
43 | cat $srcdir/targets.scp | utils/apply_map.pl -f 1 $destdir/utt_map | \
44 |   sort -k1,1 > $destdir/targets.scp
45 | 
46 | echo "$0: copied targets from $srcdir to $destdir"
47 | 


--------------------------------------------------------------------------------
/steps/segmentation/decode_sad.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2016  Vimal Manohar
 4 | # Apache 2.0.
 5 | 
 6 | # This script does Viterbi decoding using a matrix of frame log-likelihoods 
 7 | # with the columns corresponding to the pdfs.
 8 | # It is a wrapper around the binary decode-faster.
 9 | 
10 | set -e
11 | set -o pipefail
12 | 
13 | cmd=run.pl
14 | nj=4
15 | acwt=0.1
16 | beam=8
17 | max_active=1000
18 | transform=   # Transformation matrix to apply on the input archives read from output.scp
19 | 
20 | . ./path.sh
21 | 
22 | . utils/parse_options.sh
23 | 
24 | if [ $# -ne 3 ]; then
25 |   echo "Usage: $0 <graph-dir> <nnet_output_dir> <decode-dir>"
26 |   echo " e.g.: $0 "
27 |   exit 1 
28 | fi
29 | 
30 | graph_dir=$1
31 | nnet_output_dir=$2
32 | dir=$3
33 | 
34 | mkdir -p $dir/log
35 | 
36 | echo $nj > $dir/num_jobs
37 | 
38 | for f in $graph_dir/HCLG.fst $nnet_output_dir/output.scp $extra_files; do
39 |   if [ ! -f $f ]; then
40 |     echo "$0: Could not find file $f"
41 |     exit 1
42 |   fi
43 | done
44 | 
45 | rspecifier="ark:utils/split_scp.pl -j $nj \$[JOB-1] $nnet_output_dir/output.scp | copy-feats scp:- ark:- |"
46 | 
47 | # Apply a transformation on the input matrix to combine 
48 | # probs from different columns to pseudo-likelihoods
49 | if [ ! -z "$transform" ]; then
50 |   rspecifier="$rspecifier transform-feats $transform ark:- ark:- |"
51 | fi
52 | 
53 | # Convert pseudo-likelihoods to pseudo log-likelihood
54 | rspecifier="$rspecifier copy-matrix --apply-log ark:- ark:- |"
55 | 
56 | decoder_opts+=(--acoustic-scale=$acwt --beam=$beam --max-active=$max_active)
57 | 
58 | $cmd JOB=1:$nj $dir/log/decode.JOB.log \
59 |   decode-faster ${decoder_opts[@]} \
60 |   $graph_dir/HCLG.fst "$rspecifier" \
61 |   ark:/dev/null "ark:| gzip -c > $dir/ali.JOB.gz"
62 | 


--------------------------------------------------------------------------------
/steps/segmentation/internal/find_oov_phone.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2017  Vimal Manohar
 4 | # Apache 2.0
 5 | 
 6 | """This script finds the OOV phone by reading the OOV word from
 7 | oov.int in the input <lang> directory and the lexicon
 8 | <lang>/phones/align_lexicon.int.
 9 | It prints the OOV phone to stdout, if it can find a single phone
10 | mapping for the OOV word."""
11 | from __future__ import print_function
12 | 
13 | import sys
14 | 
15 | 
16 | def main():
17 |     if len(sys.argv) != 2:
18 |         raise RuntimeError("Usage: {0} <lang>".format(sys.argv[0]))
19 | 
20 |     lang = sys.argv[1]
21 | 
22 |     oov_int = int(open("{0}/oov.int").readline())
23 |     assert oov_int > 0
24 | 
25 |     oov_mapped_to_multiple_phones = False
26 |     for line in open("{0}/phones/align_lexicon.int"):
27 |         parts = line.strip().split()
28 | 
29 |         if len(parts) < 3:
30 |             raise RuntimeError("Could not parse line {0} in "
31 |                                "{1}/phones/align_lexicon.int"
32 |                                "".format(line, lang))
33 | 
34 |         w = int(parts[0])
35 |         if w != oov_int:
36 |             continue
37 | 
38 |         if len(parts[2:]) > 1:
39 |             # Try to find a single phone mapping for OOV
40 |             oov_mapped_to_multiple_phones = True
41 |             continue
42 | 
43 |         p = int(parts[2])
44 |         print ("{0}".format(p))
45 | 
46 |         raise SystemExit(0)
47 | 
48 |     if oov_mapped_to_multiple_phones:
49 |         raise RuntimeError("OOV word found, but is mapped to multiples phones. "
50 |                            "This is an unusual case.")
51 | 
52 |     raise RuntimeError("Could not find OOV word in "
53 |                        "{0}/phones/align_lexicon.int".format(lang))
54 | 
55 | 
56 | if __name__ != "__main__":
57 |     main()
58 | 


--------------------------------------------------------------------------------
/steps/segmentation/internal/verify_phones_list.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2017  Vimal Manohar
 4 | # Apache 2.0
 5 | 
 6 | """This script verifies the list of phones read from stdin are valid
 7 | phones present in lang/phones.txt."""
 8 | 
 9 | import argparse
10 | import sys
11 | 
12 | def get_args():
13 |     parser = argparse.ArgumentParser(description="""
14 |     This script verifies the list of phones read from stdin are valid
15 |     phones present in lang/phones.txt.""")
16 | 
17 |     parser.add_argument("phones", type=str,
18 |                         help="File containing the list of all phones as the "
19 |                         "first column")
20 | 
21 |     args = parser.parse_args()
22 |     return args
23 | 
24 | 
25 | def main():
26 |     args = get_args()
27 |     phones = set()
28 |     for line in open(args.phones):
29 |         phones.add(line.strip().split()[0])
30 | 
31 |     for line in sys.stdin.readlines():
32 |         p = line.strip()
33 | 
34 |         if p not in phones:
35 |             sys.stderr.write("Could not find phone {p} in {f}"
36 |                              "\n".format(p=p, f=args.phones))
37 |             raise SystemExit(1)
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     main()
42 | 


--------------------------------------------------------------------------------
/steps/segmentation/post_process_sad_to_segments.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2015-17  Vimal Manohar
 4 | # Apache 2.0.
 5 | 
 6 | # This script post-processes the output of steps/segmentation/decode_sad.sh,
 7 | # which is in the form of frame-level alignments, into a 'segments' file.
 8 | # The alignments must be speech activity detection marks i.e. 1 for silence 
 9 | # and 2 for speech.
10 | 
11 | set -e -o pipefail -u
12 | . ./path.sh
13 | 
14 | cmd=run.pl
15 | stage=-10
16 | nj=18
17 | 
18 | # The values below are in seconds
19 | frame_shift=0.01
20 | segment_padding=0.2
21 | min_segment_dur=0
22 | merge_consecutive_max_dur=0
23 | 
24 | . utils/parse_options.sh
25 | 
26 | if [ $# -ne 3 ]; then
27 |   echo "This script post-processes the output of steps/segmentation/decode_sad.sh, "
28 |   echo "which is in the form of frame-level alignments, into kaldi segments. "
29 |   echo "The alignments must be speech activity detection marks i.e. 1 for silence "
30 |   echo "and 2 for speech."
31 |   echo "Usage: $0 <data-dir> <vad-dir> <segmentation-dir>"
32 |   echo " e.g.: $0 data/dev_aspire_whole exp/vad_dev_aspire"
33 |   exit 1
34 | fi
35 | 
36 | data_dir=$1
37 | vad_dir=$2    # Alignment directory containing frame-level SAD labels
38 | dir=$3
39 | 
40 | mkdir -p $dir
41 | 
42 | for f in $vad_dir/ali.1.gz $vad_dir/num_jobs; do
43 |   if [ ! -f $f ]; then
44 |     echo "$0: Could not find file $f" && exit 1
45 |   fi
46 | done
47 | 
48 | nj=`cat $vad_dir/num_jobs` || exit 1
49 | utils/split_data.sh $data_dir $nj
50 | 
51 | utils/data/get_utt2dur.sh $data_dir
52 | 
53 | if [ $stage -le 0 ]; then
54 |   $cmd JOB=1:$nj $dir/log/segmentation.JOB.log \
55 |     copy-int-vector "ark:gunzip -c $vad_dir/ali.JOB.gz |" ark,t:- \| \
56 |     steps/segmentation/internal/sad_to_segments.py \
57 |       --frame-shift=$frame_shift --segment-padding=$segment_padding \
58 |       --min-segment-dur=$min_segment_dur --merge-consecutive-max-dur=$merge_consecutive_max_dur \
59 |       --utt2dur=$data_dir/utt2dur - $dir/segments.JOB
60 | fi
61 | 
62 | echo $nj > $dir/num_jobs
63 | 
64 | for n in $(seq $nj); do 
65 |   cat $dir/segments.$n
66 | done > $dir/segments
67 | 


--------------------------------------------------------------------------------
/steps/subset_ali_dir.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2017  Vimal Manohar
 4 | # Apache 2.0.
 5 | 
 6 | cmd=run.pl
 7 | 
 8 | if [ -f ./path.sh ]; then . ./path.sh; fi
 9 | 
10 | . ./utils/parse_options.sh
11 | 
12 | if [ $# -ne 4 ]; then
13 |   cat <<EOF
14 |   This script creates an alignment directory containing a subset of 
15 |   utterances contained in <subset-data-dir> from the 
16 |   original alignment directory containing alignments for utterances in
17 |   <full-data-dir>.
18 | 
19 |   The number of split jobs in the output alignment directory is 
20 |   equal to the number of jobs in the original alignment directory, 
21 |   unless the subset data directory has too few speakers.
22 | 
23 |   Usage: $0 [options] <full-data-dir> <subset-data-dir> <ali-dir> <subset-ali-dir>
24 |    e.g.: $0 data/train_sp data/train exp/tri3_ali_sp exp/tri3_ali
25 | 
26 |   Options: 
27 |       --cmd (utils/run.pl|utils/queue.pl <queue opts>)  # how to run jobs.
28 | EOF
29 |   exit 1
30 | fi
31 | 
32 | data=$1
33 | subset_data=$2
34 | ali_dir=$3
35 | dir=$4
36 | 
37 | nj=$(cat $ali_dir/num_jobs) || exit 1
38 | utils/split_data.sh $data $nj
39 | 
40 | mkdir -p $dir
41 | cp $ali_dir/{final.mdl,*.mat,*_opts,tree} $dir/ || true
42 | cp -r $ali_dir/phones $dir 2>/dev/null || true
43 | 
44 | $cmd JOB=1:$nj $dir/log/copy_alignments.JOB.log \
45 |   copy-int-vector "ark:gunzip -c $ali_dir/ali.JOB.gz |" \
46 |   ark,scp:$dir/ali_tmp.JOB.ark,$dir/ali_tmp.JOB.scp || exit 1
47 | 
48 | for n in `seq $nj`; do
49 |   cat $dir/ali_tmp.$n.scp 
50 | done > $dir/ali_tmp.scp
51 | 
52 | num_spk=$(cat $subset_data/spk2utt | wc -l)
53 | if [ $num_spk -lt $nj ]; then
54 |   nj=$num_spk
55 | fi
56 | 
57 | utils/split_data.sh $subset_data $nj
58 | $cmd JOB=1:$nj $dir/log/filter_alignments.JOB.log \
59 |   copy-int-vector \
60 |   "scp:utils/filter_scp.pl $subset_data/split${nj}/JOB/utt2spk $dir/ali_tmp.scp |" \
61 |   "ark:| gzip -c > $dir/ali.JOB.gz" || exit 1
62 | 
63 | echo $nj > $dir/num_jobs
64 | 
65 | rm $dir/ali_tmp.*.{ark,scp} $dir/ali_tmp.scp
66 | 
67 | exit 0
68 | 


--------------------------------------------------------------------------------
/steps/tfrnnlm/check_py.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | 


--------------------------------------------------------------------------------
/steps/tfrnnlm/check_tensorflow_installed.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # this script checks if TF is installed to be used with python
 4 | #                    and if TF related binaries in kaldi is ready to use
 5 | . ./path.sh
 6 | 
 7 | if which lattice-lmrescore-tf-rnnlm 2>&1>/dev/null; then
 8 |   echo TensorFlow relate binaries found. This is good.
 9 | else
10 |   echo TF related binaries not compiled.
11 |   echo You need to go to tools/ and run extras/install_tensorflow_cc.sh first
12 |   echo and then do \"make\" under both src/tfrnnlm and src/tfrnnlmbin
13 |   exit 1
14 | fi
15 | 
16 | echo
17 | 
18 | if python steps/tfrnnlm/check_py.py 2>/dev/null; then
19 |   echo TensorFlow ready to use on the python side. This is good.
20 | else
21 |   echo TensorFlow not found on the python side.
22 |   echo Please go to tools/ and run extras/install_tensorflow_py.sh to install it
23 |   echo If you already have TensorFlow installed somewhere else, you would need
24 |   echo to add it to your PATH
25 |   exit 1
26 | fi
27 | 


--------------------------------------------------------------------------------
/steps/word_align_lattices.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright Johns Hopkins University (Author: Daniel Povey)  2012
 4 | # Apache 2.0.
 5 | 
 6 | # Begin configuration section.
 7 | silence_label=0
 8 | cmd=run.pl
 9 | # End configuration section.
10 | 
11 | echo "$0 $@"  # Print the command line for logging
12 | 
13 | for x in `seq 2`; do
14 |   [ "$1" == "--silence-label" ] && silence_label=$2 && shift 2;
15 |   [ "$1" == "--cmd" ] && cmd="$2" && shift 2;
16 | done
17 | 
18 | if [ $# != 3 ]; then
19 |    echo "Word-align lattices (make the arcs sync up with words)"
20 |    echo ""
21 |    echo "Usage: $0 [options] <lang-dir> <decode-dir-in> <decode-dir-out>"
22 |    echo "options: [--cmd (run.pl|queue.pl [queue opts])] [--silence-label <integer-id-of-silence-word>]"
23 |    exit 1;
24 | fi
25 | 
26 | . ./path.sh || exit 1;
27 | 
28 | lang=$1
29 | indir=$2
30 | outdir=$3
31 | 
32 | mdl=`dirname $indir`/final.mdl
33 | wbfile=$lang/phones/word_boundary.int
34 | 
35 | for f in $mdl $wbfile $indir/num_jobs; do
36 |   [ ! -f $f ] && echo "word_align_lattices.sh: no such file $f" && exit 1;
37 | done
38 | 
39 | mkdir -p $outdir/log
40 | 
41 | 
42 | cp $indir/num_jobs $outdir;
43 | nj=`cat $indir/num_jobs`
44 | 
45 | $cmd JOB=1:$nj $outdir/log/align.JOB.log \
46 |   lattice-align-words --silence-label=$silence_label --test=true \
47 |    $wbfile $mdl "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c >$outdir/lat.JOB.gz" || exit 1;
48 | 
49 | 


--------------------------------------------------------------------------------
/utils/add_disambig.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | # Adds some specified number of disambig symbols to a symbol table.
19 | # Adds these as #1, #2, etc.
20 | # If the --include-zero option is specified, includes an extra one
21 | # #0.
22 | 
23 | $include_zero = 0;
24 | if($ARGV[0] eq "--include-zero") {
25 |     $include_zero = 1;
26 |     shift @ARGV;
27 | }
28 | 
29 | if(@ARGV != 2) {
30 |     die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
31 | }
32 | 
33 | 
34 | $input = $ARGV[0];
35 | $nsyms = $ARGV[1];
36 | 
37 | open(F, "<$input") || die "Opening file $input";
38 | 
39 | while(<F>) {
40 |     @A = split(" ", $_);
41 |     @A == 2 || die "Bad line $_";
42 |     $lastsym = $A[1];
43 |     print;
44 | }
45 | 
46 | if(!defined($lastsym)){
47 |  die "Empty symbol file?";
48 | }
49 | 
50 | if($include_zero) {
51 |     $lastsym++;
52 |     print "#0  $lastsym\n";
53 | }
54 | 
55 | for($n = 1; $n <= $nsyms; $n++) {
56 |     $y = $n + $lastsym;
57 |     print "#$n  $y\n";
58 | }
59 | 


--------------------------------------------------------------------------------
/utils/analyze_segments.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Copyright 2015 GoVivace Inc. (Author: Nagendra Kumar Goel)
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Analyze a segments file and print important stats on it.
18 | 
19 | $dur = $total = 0;
20 | $maxDur = 0;
21 | $minDur = 9999999999;
22 | $n = 0;
23 | while(<>){
24 |     chomp;
25 |     @t = split(/\s+/);
26 |     $dur = $t[3] - $t[2];
27 |     $total += $dur;
28 |     if ($dur > $maxDur) {
29 |         $maxSegId = $t[0];
30 |         $maxDur = $dur;
31 |     }
32 |     if ($dur < $minDur) {
33 |         $minSegId = $t[0];
34 |         $minDur = $dur;
35 |     }
36 |     $n++;
37 | }
38 | $avg=$total/$n;
39 | $hrs = $total/3600;
40 | print "Total $hrs hours of data\n";
41 | print "Average segment length $avg seconds\n";
42 | print "Segment $maxSegId has length of $maxDur seconds\n";
43 | print "Segment $minSegId has length of $minDur seconds\n";
44 | 


--------------------------------------------------------------------------------
/utils/best_wer.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # Copyright 2010-2011 Microsoft Corporation
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # To be run from one directory above this script.
19 | 
20 | perl -e 'while(<>){ 
21 |     s/\|(\d)/\| $1/g; s/(\d)\|/$1 \|/g;
22 |     if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool.
23 |     elsif (m: (Mean|Sum/Avg|)\s*\|\s*\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|:
24 |         && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } }  # sclite.
25 |    if (defined $bestline){ print $bestline; } ' | \
26 |   awk 'BEGIN{ FS="%WER"; } { if(NF == 2) { print FS$2" "$1; } else { print $0; }}' | \
27 |   awk 'BEGIN{ FS="Sum/Avg"; } { if(NF == 2) { print $2" "$1; } else { print $0; }}' | \
28 |   awk '{ if($1!~/%WER/) { print "%WER "$9" "$0; } else { print $0; }}' | \
29 |   sed -e 's|\s\s*| |g' -e 's|\:$||' -e 's|\:\s*\|\s*$||'
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/utils/build_const_arpa_lm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2014  Guoguo Chen
 4 | # Apache 2.0
 5 | 
 6 | # This script reads in an Arpa format language model, and converts it into the
 7 | # ConstArpaLm format language model.
 8 | 
 9 | # begin configuration section
10 | # end configuration section
11 | 
12 | [ -f path.sh ] && . ./path.sh;
13 | 
14 | . utils/parse_options.sh
15 | 
16 | if [ $# != 3 ]; then
17 |   echo "Usage: "
18 |   echo "  $0 [options] <arpa-lm-path> <old-lang-dir> <new-lang-dir>"
19 |   echo "e.g.:"
20 |   echo "  $0 data/local/lm/3-gram.full.arpa.gz data/lang/ data/lang_test_tgmed"
21 |   echo "Options"
22 |   exit 1;
23 | fi
24 | 
25 | export LC_ALL=C
26 | 
27 | arpa_lm=$1
28 | old_lang=$2
29 | new_lang=$3
30 | 
31 | mkdir -p $new_lang
32 | 
33 | mkdir -p $new_lang
34 | cp -r $old_lang/* $new_lang
35 | 
36 | unk=`cat $old_lang/oov.int`
37 | bos=`grep "^<s>\s" $old_lang/words.txt | awk '{print $2}'`
38 | eos=`grep "^</s>\s" $old_lang/words.txt | awk '{print $2}'`
39 | if [[ -z $bos || -z $eos ]]; then
40 |   echo "$0: <s> and </s> symbols are not in $old_lang/words.txt"
41 |   exit 1
42 | fi
43 | if [[ -z $unk ]]; then
44 |   echo "$0: can't find oov symbol id in $old_lang/oov.int"
45 |   exit 1
46 | fi
47 | 
48 | 
49 | arpa-to-const-arpa --bos-symbol=$bos \
50 |   --eos-symbol=$eos --unk-symbol=$unk \
51 |   "gunzip -c $arpa_lm | utils/map_arpa_lm.pl $new_lang/words.txt|"  $new_lang/G.carpa  || exit 1;
52 | 
53 | exit 0;
54 | 


--------------------------------------------------------------------------------
/utils/build_kenlm_model_from_arpa.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # 2020 author Jiayu DU
 3 | # Apache 2.0
 4 | 
 5 | # This script reads in an Arpa format language model, and converts it into the
 6 | # KenLM format language model.
 7 | 
 8 | [ -f path.sh ] && . ./path.sh;
 9 | 
10 | # begin configuration section
11 | kenlm_opts="" # e.g. "-q 8 -b 8" for 8bits quantization
12 | model_type="trie" # "trie" or "probing". trie is smaller, probing is faster.
13 | # end configuration section
14 | 
15 | . utils/parse_options.sh
16 | 
17 | if [ $# != 2 ]; then
18 |   echo "Usage: "
19 |   echo "  $0 [options] <arpa-lm-path> <kenlm-path>"
20 |   echo "e.g.:"
21 |   echo "  $0 data/local/lm/4gram.arpa data/lang_test/G.trie"
22 |   echo "Options:"
23 |   echo "  --model-type can be either \"trie\" or \"probing\""
24 |   echo "  --kenlm-opts directly pass through to kenlm"
25 |   echo "    e.g. for 8bits quantization, feed \"-q 8 -b 8\""
26 |   exit 1;
27 | fi
28 | 
29 | export LC_ALL=C
30 | 
31 | arpa_lm=$1
32 | kenlm=$2
33 | 
34 | if ! which build_binary >& /dev/null ; then
35 |   echo "$0: cannot find KenLM's build_binary tool,"
36 |   echo "check kenlm installation (tools/extras/install_kenlm_query_only.sh)."
37 |   exit 1
38 | fi
39 | 
40 | mkdir -p $(dirname $kenlm)
41 | build_binary  $kenlm_opts  $model_type  $arpa_lm  $kenlm
42 | 
43 | echo "$0: Successfully built arpa into kenlm format: $kenlm"
44 | exit 0
45 | 


--------------------------------------------------------------------------------
/utils/ctm/fix_ctm.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | stmfile=$1
 4 | ctmfile=$2
 5 | 
 6 | segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u`
 7 | segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u`
 8 | 
 9 | segments_stm_count=`echo "$segments_stm" | wc -l `
10 | segments_ctm_count=`echo "$segments_ctm" | wc -l `
11 | 
12 | #echo $segments_stm_count
13 | #echo $segments_ctm_count
14 | 
15 | if [ "$segments_stm_count" -gt "$segments_ctm_count"  ] ; then
16 |   pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g")
17 |   (
18 |     for elem in $pp ; do
19 |       echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE"
20 |     done
21 |   ) >> $ctmfile
22 |   echo "FIXED CTM FILE"
23 |   exit 0
24 | elif [ "$segments_stm_count" -lt "$segments_ctm_count"  ] ; then
25 |   echo "Segment STM count: $segments_stm_count"
26 |   echo "Segment CTM count: $segments_ctm_count"
27 |   echo "FAILURE FIXING CTM FILE"
28 |   exit 1
29 | else
30 |   exit 0
31 | fi
32 | 
33 | 


--------------------------------------------------------------------------------
/utils/data/convert_data_dir_to_whole.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Copyright 2016-2018  Vimal Manohar
 4 | # Apache 2.0
 5 | 
 6 | # This scripts converts a data directory into a "whole" data directory
 7 | # by removing the segments and using the recordings themselves as 
 8 | # utterances
 9 | 
10 | set -o pipefail
11 | 
12 | . ./path.sh
13 | 
14 | . utils/parse_options.sh
15 | 
16 | if [ $# -ne 2 ]; then
17 |   echo "Usage: convert_data_dir_to_whole.sh <in-data> <out-data>"
18 |   echo " e.g.: convert_data_dir_to_whole.sh data/dev data/dev_whole"
19 |   exit 1
20 | fi
21 | 
22 | data=$1
23 | dir=$2
24 | 
25 | if [ ! -f $data/segments ]; then
26 |   echo "$0: Data directory already does not contain segments. So just copying it."
27 |   utils/copy_data_dir.sh $data $dir
28 |   exit 0
29 | fi
30 | 
31 | mkdir -p $dir
32 | cp $data/wav.scp $dir
33 | if [ -f $data/reco2file_and_channel ]; then 
34 |   cp $data/reco2file_and_channel $dir; 
35 | fi
36 | 
37 | mkdir -p $dir/.backup
38 | if [ -f $dir/feats.scp ]; then
39 |   mv $dir/feats.scp $dir/.backup
40 | fi
41 | if [ -f $dir/cmvn.scp ]; then
42 |   mv $dir/cmvn.scp $dir/.backup
43 | fi
44 | if [ -f $dir/utt2spk ]; then
45 |   mv $dir/utt2spk $dir/.backup
46 | fi
47 | 
48 | [ -f $data/stm ] && cp $data/stm $dir
49 | [ -f $data/glm ] && cp $data/glm $dir
50 | 
51 | utils/data/internal/combine_segments_to_recording.py \
52 |   --write-reco2utt=$dir/reco2sorted_utts $data/segments $dir/utt2spk || exit 1
53 | 
54 | if [ -f $data/text ]; then
55 |   utils/apply_map.pl -f 2- $data/text < $dir/reco2sorted_utts > $dir/text || exit 1
56 | fi
57 | 
58 | rm $dir/reco2sorted_utts
59 | 
60 | utils/fix_data_dir.sh $dir || exit 1
61 | 
62 | exit 0
63 | 


--------------------------------------------------------------------------------
/utils/data/extract_wav_segments_data_dir.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright    2017  Hossein Hadian
 4 | # Apache 2.0
 5 | 
 6 | # This script copies a data directory (which has a 'segments' file), extracting
 7 | # wav segments (according to the 'segments' file)
 8 | # so that the resulting data directory does not have a 'segments' file anymore.
 9 | 
10 | nj=4
11 | cmd=run.pl
12 | 
13 | . ./utils/parse_options.sh
14 | . ./path.sh
15 | 
16 | if [ $# != 2 ]; then
17 |   echo "Usage: $0 <srcdir> <destdir>"
18 |   echo " This script copies data directory <srcdir> to <destdir> and removes"
19 |   echo " the 'segments' file by extracting the wav segments."
20 |   echo "Options: "
21 |   echo "  --nj <nj>                                        # number of parallel jobs"
22 |   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
23 |   exit 1;
24 | fi
25 | 
26 | 
27 | export LC_ALL=C
28 | 
29 | srcdir=$1
30 | dir=$2
31 | logdir=$dir/log
32 | 
33 | if ! mkdir -p $dir/data; then
34 |   echo "$0: failed to create directory $dir/data"
35 |   exit 1
36 | fi
37 | mkdir -p $logdir
38 | 
39 | set -eu -o pipefail
40 | utils/copy_data_dir.sh $srcdir $dir
41 | 
42 | split_segments=""
43 | for n in $(seq $nj); do
44 |   split_segments="$split_segments $logdir/segments.$n"
45 | done
46 | 
47 | utils/split_scp.pl $srcdir/segments $split_segments
48 | 
49 | $cmd JOB=1:$nj $logdir/extract_wav_segments.JOB.log \
50 |      extract-segments scp,p:$srcdir/wav.scp $logdir/segments.JOB \
51 |      ark,scp:$dir/data/wav_segments.JOB.ark,$dir/data/wav_segments.JOB.scp
52 | 
53 | # concatenate the .scp files together.
54 | for n in $(seq $nj); do
55 |   cat $dir/data/wav_segments.$n.scp
56 | done > $dir/data/wav_segments.scp
57 | 
58 | cat $dir/data/wav_segments.scp | awk '{ print $1 " wav-copy " $2 " - |" }' >$dir/wav.scp
59 | rm $dir/{segments,reco2file_and_channel} 2>/dev/null || true
60 | 


--------------------------------------------------------------------------------
/utils/data/get_frame_shift.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
 4 | # Apache 2.0
 5 | 
 6 | # This script takes as input a data directory, such as data/train/, preferably
 7 | # with utt2dur file already existing (or the utt2dur file will be created if
 8 | # not), and it attempts to work out the approximate frame shift by comparing the
 9 | # utt2dur with the output of feat-to-len on the feats.scp.  It prints it out.
10 | # if the shift is very close to, but above, 0.01 (the normal frame shift) it
11 | # rounds it down.
12 | 
13 | . utils/parse_options.sh
14 | . ./path.sh
15 | 
16 | if [ $# != 1 ]; then
17 |   cat >&2 <<EOF
18 | Usage: frame_shift=\$($0 <datadir>)
19 | e.g.:  frame_shift=\$($0 data/train)
20 | 
21 | This script prints the frame-shift in seconds (e.g. 0.01) to the standard out.
22 | Its output is intended to be captured in a shell variable.
23 | 
24 | If <datadir> does not contain the file utt2dur, this script may invoke
25 | utils/data/get_utt2dur.sh, which will require write permission to <datadir>.
26 | EOF
27 |   exit 1
28 | fi
29 | 
30 | export LC_ALL=C
31 | 
32 | dir=$1
33 | 
34 | if [[ -s $dir/frame_shift ]]; then
35 |   cat $dir/frame_shift
36 |   exit
37 | fi
38 | 
39 | if [ ! -f $dir/feats.scp ]; then
40 |   echo "$0: $dir/feats.scp does not exist" 1>&2
41 |   exit 1
42 | fi
43 | 
44 | if [ ! -s $dir/utt2dur ]; then
45 |   if [ ! -e $dir/wav.scp ] && [ ! -s $dir/segments ]; then
46 |     echo "$0: neither $dir/wav.scp nor $dir/segments exist; assuming a frame shift of 0.01." 1>&2
47 |     echo 0.01
48 |     exit 0
49 |   fi
50 |   echo "$0: $dir/utt2dur does not exist: creating it" 1>&2
51 |   utils/data/get_utt2dur.sh 1>&2 $dir || exit 1
52 | fi
53 | 
54 | temp=$(mktemp /tmp/tmp.XXXX) || exit 1
55 | 
56 | feat-to-len --print-args=false "scp:head -n 10 $dir/feats.scp|" ark,t:- > $temp
57 | 
58 | if [[ ! -s $temp ]]; then
59 |   rm $temp
60 |   echo "$0: error running feat-to-len" 1>&2
61 |   exit 1
62 | fi
63 | 
64 | frame_shift=$(head -n 10 $dir/utt2dur | paste - $temp | awk '
65 |       { dur += $2; frames += $4; }
66 |   END { shift = dur / frames;
67 |         if (shift > 0.01 && shift < 0.0102) shift = 0.01;
68 |         print shift; }') || exit 1;
69 | 
70 | rm $temp
71 | 
72 | echo $frame_shift > $dir/frame_shift
73 | echo $frame_shift
74 | exit 0
75 | 


--------------------------------------------------------------------------------
/utils/data/get_num_frames.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script works out the approximate number of frames in a training directory.
 4 | # This is sometimes needed by higher-level scripts
 5 | 
 6 | 
 7 | if [ -f path.sh ]; then . ./path.sh; fi
 8 | . parse_options.sh || exit 1;
 9 | 
10 | if [ $# -ne 1 ]; then
11 |   (
12 |     echo "Usage: $0 <data-dir>"
13 |     echo "Prints the number of frames of data in the data-dir"
14 |   ) 1>&2
15 | fi
16 | 
17 | data=$1
18 | 
19 | if [ ! -f $data/utt2dur ]; then
20 |   utils/data/get_utt2dur.sh $data 1>&2 || exit 1
21 | fi
22 | 
23 | frame_shift=$(utils/data/get_frame_shift.sh $data) || exit 1
24 | 
25 | awk -v s=$frame_shift '{n += $2} END{printf("%.0f\n", (n / s))}' <$data/utt2dur
26 | 


--------------------------------------------------------------------------------
/utils/data/get_reco2utt_for_data.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Copyright 2016  Vimal Manohar
 4 | # Apache 2.0
 5 | 
 6 | if [ $# -ne 1 ]; then
 7 |   echo "This script outputs a mapping from recording to a list of utterances "
 8 |   echo "corresponding to the recording. It is analogous to the content of "
 9 |   echo "a spk2utt file, but is indexed by recording instead of speaker."
10 |   echo "Usage: get_reco2utt.sh <data>"
11 |   echo " e.g.: get_reco2utt.sh data/train"
12 |   exit 1
13 | fi
14 | 
15 | data=$1
16 | 
17 | if [ ! -s $data/segments ]; then
18 |   utils/data/get_segments_for_data.sh $data > $data/segments
19 | fi
20 | 
21 | cut -d ' ' -f 1,2 $data/segments | utils/utt2spk_to_spk2utt.pl
22 | 


--------------------------------------------------------------------------------
/utils/data/get_segments_for_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script operates on a data directory, such as in data/train/,
 4 | # and writes new segments to stdout. The file 'segments' maps from
 5 | # utterance to time offsets into a recording, with the format:
 6 | #   <utterance-id> <recording-id> <segment-begin> <segment-end>
 7 | # This script assumes utterance and recording ids are the same (i.e., that
 8 | # wav.scp is indexed by utterance), and uses durations from 'utt2dur', 
 9 | # created if necessary by get_utt2dur.sh.
10 | 
11 | . ./path.sh
12 | 
13 | if [ $# != 1 ]; then
14 |   echo "Usage: $0 [options] <datadir>"
15 |   echo "e.g.:"
16 |   echo " $0 data/train > data/train/segments"
17 |   exit 1
18 | fi
19 | 
20 | data=$1
21 | 
22 | if [ ! -s $data/utt2dur ]; then
23 |   utils/data/get_utt2dur.sh $data 1>&2 || exit 1;
24 | fi
25 | 
26 | # <utt-id> <utt-id> 0 <utt-dur>
27 | awk '{ print $1, $1, 0, $2 }' $data/utt2dur
28 | 
29 | exit 0
30 | 


--------------------------------------------------------------------------------
/utils/data/get_utt2num_frames.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Copyright 2016  Vimal Manohar
 4 | # Apache 2.0.
 5 | 
 6 | cmd=run.pl
 7 | nj=4
 8 | 
 9 | frame_shift=0.01
10 | frame_overlap=0.015
11 | 
12 | . utils/parse_options.sh
13 | . ./path.sh
14 | 
15 | if [ $# -ne 1 ]; then
16 |   echo "This script writes a file utt2num_frames with the "
17 |   echo "number of frames in each utterance as measured based on the "
18 |   echo "duration of the utterances (in utt2dur) and the specified "
19 |   echo "frame_shift and frame_overlap."
20 |   echo "Usage: $0 <data>"
21 |   exit 1
22 | fi
23 | 
24 | data=$1
25 | 
26 | if [ -s $data/utt2num_frames ]; then
27 |   echo "$0: $data/utt2num_frames already present!"
28 |   exit 0;
29 | fi
30 | 
31 | if [ ! -f $data/feats.scp ]; then
32 |   utils/data/get_utt2dur.sh --nj ${nj} --cmd "$cmd" $data
33 |   awk -v fs=$frame_shift -v fovlp=$frame_overlap \
34 |     '{print $1" "int( ($2 - fovlp) / fs)}' $data/utt2dur > $data/utt2num_frames
35 |   exit 0
36 | fi
37 | 
38 | utils/split_data.sh --per-utt $data $nj || exit 1
39 | $cmd JOB=1:$nj $data/log/get_utt2num_frames.JOB.log \
40 |   feat-to-len scp:$data/split${nj}utt/JOB/feats.scp ark,t:$data/split${nj}utt/JOB/utt2num_frames || exit 1
41 | 
42 | for n in `seq $nj`; do
43 |   cat $data/split${nj}utt/$n/utt2num_frames
44 | done > $data/utt2num_frames
45 | 
46 | echo "$0: Computed and wrote $data/utt2num_frames"
47 | 


--------------------------------------------------------------------------------
/utils/data/internal/combine_segments_to_recording.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Copyright 2018  Vimal Manohar
 4 | # Apache 2.0
 5 | 
 6 | from __future__ import print_function
 7 | import argparse
 8 | import sys
 9 | import collections
10 | from collections import defaultdict
11 | 
12 | def get_args():
13 |     parser = argparse.ArgumentParser(description="""
14 |         This script combines segments into utterances at
15 |         recording-level and write out new utt2spk file with reco-id as the
16 |         speakers. If --write-reco2utt is provided, it writes a mapping from
17 |         recording-id to the list of utterances sorted by start and end times.
18 |         This map can be used to combine text corresponding to the segments to
19 |         recording-level.""")
20 | 
21 |     parser.add_argument("--write-reco2utt", help="If provided, writes a "
22 |                         "mapping from recording-id to list of utterances "
23 |                         "sorted by start and end times.")
24 |     parser.add_argument("segments_in", help="Input segments file")
25 |     parser.add_argument("utt2spk_out", help="Output utt2spk file")
26 | 
27 |     args = parser.parse_args()
28 | 
29 |     return args
30 | 
31 | 
32 | def main():
33 |     args = get_args()
34 | 
35 |     utt2reco = {}
36 |     segments_for_reco = defaultdict(list)
37 |     for line in open(args.segments_in):
38 |         parts = line.strip().split()
39 | 
40 |         if len(parts) < 4:
41 |             raise TypeError("bad line in segments file {}".format(line))
42 | 
43 |         utt = parts[0]
44 |         reco = parts[1]
45 |         start_time = parts[2]
46 |         end_time = parts[3]
47 | 
48 |         segments_for_reco[reco].append((utt, start_time, end_time))
49 |         utt2reco[utt] = reco
50 | 
51 |     if args.write_reco2utt is not None:
52 |         with open(args.write_reco2utt, 'w') as reco2utt_writer, \
53 |                 open(args.utt2spk_out, 'w') as utt2spk_writer:
54 |             for reco, segments_in_reco in segments_for_reco.items():
55 |                 utts = ' '.join([seg[0] for seg in sorted(
56 |                     segments_in_reco, key=lambda x:(x[1], x[2]))])
57 |                 print("{0} {1}".format(reco, utts), file=reco2utt_writer)
58 |                 print ("{0} {0}".format(reco), file=utt2spk_writer)
59 |     else:
60 |         with open(args.utt2spk_out, 'w') as utt2spk_writer:
61 |             for reco in segments_for_reco.keys():
62 |                 print ("{0} {0}".format(reco), file=utt2spk_writer)
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     main()
67 | 


--------------------------------------------------------------------------------
/utils/data/limit_feature_dim.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2016  Alibaba Robotics Corp. (author: Xingyu Na)
 4 | # Apache 2.0
 5 | 
 6 | # The script creates a new data directory by selecting a specified
 7 | # dimension range of the features in the source directory.
 8 | 
 9 | . utils/parse_options.sh
10 | 
11 | if [ $# != 3 ]; then
12 |   echo "Usage: "
13 |   echo "  $0 <feat-dim-range> <srcdir> <destdir>"
14 |   echo "The script creates a new data directory by selecting a specified"
15 |   echo "dimension range of the features in the source directory."
16 |   echo "e.g.:"
17 |   echo " $0 0:39 data/train_hires_pitch data/train_hires"
18 |   exit 1;
19 | fi
20 | 
21 | feat_dim_range=$1
22 | srcdir=$2
23 | destdir=$3
24 | 
25 | if [ "$destdir" == "$srcdir" ]; then
26 |   echo "$0: this script requires <srcdir> and <destdir> to be different."
27 |   exit 1
28 | fi
29 | 
30 | if [ ! -f $srcdir/feats.scp ]; then
31 |   echo "$0: no such file $srcdir/feats.scp"
32 |   exit 1;
33 | fi
34 | 
35 | mkdir -p $destdir
36 | utils/copy_data_dir.sh $srcdir $destdir
37 | 
38 | if [ -f $destdir/cmvn.scp ]; then
39 |   rm $destdir/cmvn.scp
40 |   echo "$0: warning: removing $destdir/cmvn.cp, you will have to regenerate it from the features."
41 | fi
42 | 
43 | rm $destdir/feats.scp
44 | sed 's/$/\[:,'${feat_dim_range}'\]/' $srcdir/feats.scp | \
45 |   utils/data/normalize_data_range.pl > $destdir/feats.scp
46 | 
47 | [ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
48 | utils/validate_data_dir.sh $validate_opts $destdir
49 | 


--------------------------------------------------------------------------------
/utils/data/modify_speaker_info_to_recording.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2017  Vimal Manohar
 4 | # Apache 2.0.
 5 | 
 6 | # Copy the data directory, but modify it to use the recording-id as the 
 7 | # speaker. This is useful to get matching speaker information in the 
 8 | # whole recording data directory.
 9 | # Note that this also appends the recording-id as a prefix to the 
10 | # utterance-id.
11 | 
12 | if [ $# -ne 2 ]; then
13 |   echo "Usage: $0 <in-data> <out-data>"
14 |   echo " e.g.: $0 data/train data/train_recospk"
15 |   exit 1
16 | fi
17 | 
18 | in_data=$1
19 | out_data=$2
20 | 
21 | mkdir -p $out_data
22 | 
23 | for f in wav.scp segments utt2spk; do 
24 |   if [ ! -f $in_data/$f ]; then
25 |     echo "$0: Could not find file $in_data/$f" 
26 |     exit 1
27 |   fi
28 | done
29 | 
30 | cp $in_data/wav.scp $out_data/ || exit 1
31 | cp $in_data/reco2file_and_channel $out_data/ 2> /dev/null || true
32 | awk '{print $1" "$2"-"$1}' $in_data/segments > \
33 |   $out_data/old2new.uttmap || exit 1
34 | utils/apply_map.pl -f 1 $out_data/old2new.uttmap < $in_data/segments > \
35 |   $out_data/segments || exit 1
36 | awk '{print $1" "$2}' $out_data/segments > $out_data/utt2spk || exit 1
37 | utils/utt2spk_to_spk2utt.pl $out_data/utt2spk > $out_data/spk2utt || exit 1
38 | 
39 | if [ -f $in_data/text ]; then
40 |   utils/apply_map.pl -f 1 $out_data/old2new.uttmap < $in_data/text > \
41 |     $out_data/text || exit 1
42 | fi
43 | 
44 | if [ -f $in_data/feats.scp ]; then
45 |   utils/apply_map.pl -f 1 $out_data/old2new.uttmap < $in_data/feats.scp > \
46 |     $out_data/feats.scp || exit 1
47 | fi
48 | 
49 | utils/fix_data_dir.sh $out_data || exit 1
50 | utils/validate_data_dir.sh --no-text --no-feats $out_data || exit 1
51 | 


--------------------------------------------------------------------------------
/utils/data/remove_dup_utts.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Remove excess utterances once they appear  more than a specified
 4 | # number of times with the same transcription, in a data set.
 5 | # E.g. useful for removing excess "uh-huh" from training.
 6 | 
 7 | if [ $# != 3 ]; then
 8 |   echo "Usage: remove_dup_utts.sh max-count <src-data-dir> <dest-data-dir>"
 9 |   echo "e.g.: remove_dup_utts.sh 10 data/train data/train_nodup"
10 |   echo "This script is used to filter out utterances that have from over-represented"
11 |   echo "transcriptions (such as 'uh-huh'), by limiting the number of repetitions of"
12 |   echo "any given word-sequence to a specified value.  It's often used to get"
13 |   echo "subsets for early stages of training."
14 |   exit 1;
15 | fi
16 | 
17 | maxcount=$1
18 | srcdir=$2
19 | destdir=$3
20 | mkdir -p $destdir
21 | 
22 | [ ! -f $srcdir/text ] && echo "$0: Invalid input directory $srcdir" && exit 1;
23 | 
24 | ! mkdir -p $destdir && echo "$0: could not create directory $destdir" && exit 1;
25 | 
26 | ! [ "$maxcount" -gt 1 ] && echo "$0: invalid max-count '$maxcount'" && exit 1;
27 | 
28 | cp $srcdir/* $destdir
29 | cat $srcdir/text | \
30 |   perl -e '
31 |   $maxcount = shift @ARGV;
32 |   @all = ();
33 |    $p1 = 103349; $p2 = 71147; $k = 0;
34 |    sub random { # our own random number generator: predictable.
35 |      $k = ($k + $p1) % $p2;
36 |      return ($k / $p2);
37 |   }
38 |   while(<>) {
39 |     push @all, $_;
40 |     @A = split(" ", $_);
41 |     shift @A;
42 |     $text = join(" ", @A);
43 |     $count{$text} ++;
44 |   }
45 |   foreach $line (@all) {
46 |     @A = split(" ", $line);
47 |     shift @A;
48 |     $text = join(" ", @A);
49 |     $n = $count{$text};
50 |     if ($n < $maxcount || random() < ($maxcount / $n)) {
51 |       print $line;
52 |     }
53 |   }'  $maxcount >$destdir/text
54 | 
55 | echo "Reduced number of utterances from `cat $srcdir/text | wc -l` to `cat $destdir/text | wc -l`"
56 | 
57 | echo "Using fix_data_dir.sh to reconcile the other files."
58 | utils/fix_data_dir.sh $destdir
59 | rm -r $destdir/.backup
60 | 
61 | exit 0
62 | 


--------------------------------------------------------------------------------
/utils/data/resample_data_dir.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Copyright 2016  Vimal Manohar
 4 | #           2018  Xiaohui Zhang
 5 | # Apache 2.0.
 6 | 
 7 | if [ $# -ne 2 ]; then
 8 |   echo "This script adds a sox line in wav.scp to resample the audio at a "
 9 |   echo "different sampling-rate"
10 |   echo "Usage: $0 <frequency> <data-dir>"
11 |   echo " e.g.: $0 8000 data/dev"
12 |   exit 1
13 | fi
14 | 
15 | freq=$1
16 | dir=$2
17 | 
18 | sox=`which sox` || { echo "Could not find sox in PATH"; exit 1; }
19 | 
20 | if [ -f $dir/feats.scp ]; then
21 |   mkdir -p $dir/.backup
22 |   mv $dir/feats.scp $dir/.backup/
23 |   if [ -f $dir/cmvn.scp ]; then
24 |     mv $dir/cmvn.scp $dir/.backup/
25 |   fi
26 |   echo "$0: feats.scp already exists. Moving it to $dir/.backup"
27 | fi
28 | 
29 | # After resampling we cannot compute utt2dur from wav.scp any more,
30 | # so we create utt2dur now, in case it's needed later
31 | if [ ! -s $dir/utt2dur ]; then
32 |   utils/data/get_utt2dur.sh $dir 1>&2 || exit 1;
33 | fi
34 | 
35 | mv $dir/wav.scp $dir/wav.scp.tmp
36 | cat $dir/wav.scp.tmp | python -c "import sys
37 | for line in sys.stdin.readlines():
38 |   splits = line.strip().split()
39 |   if splits[-1] == '|':
40 |     out_line = line.strip() + ' $sox -t wav - -c 1 -b 16 -t wav - rate $freq |'
41 |   else:
42 |     out_line = '{0} cat {1} | $sox -t wav - -c 1 -b 16 -t wav - rate $freq |'.format(splits[0], ' '.join(splits[1:]))
43 |   print (out_line)" > ${dir}/wav.scp
44 | rm $dir/wav.scp.tmp
45 | 


--------------------------------------------------------------------------------
/utils/data/shift_and_combine_feats.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2017  Hossein Hadian
 4 | 
 5 | # Apache 2.0
 6 | 
 7 | write_utt2orig=              # if provided, this script will write
 8 |                              # a mapping of shifted utterance ids
 9 |                              # to the original ones into the file
10 |                              # specified by this option
11 | 
12 | echo "$0 $@"  # Print the command line for logging
13 | if [ -f path.sh ]; then . ./path.sh; fi
14 | . utils/parse_options.sh
15 | 
16 | if [ $# != 3 ]; then
17 |   echo "Usage: $0 <frame-subsampling-factor> <srcdir> <destdir>"
18 |   echo "e.g.: $0 3 data/train data/train_fs3"
19 |   echo "For use in perturbing data for discriminative training and alignment of"
20 |   echo "frame-subsampled systems, this script uses utils/data/shift_feats.sh"
21 |   echo "and utils/data/combine_data.sh to shift the features"
22 |   echo "<frame-subsampling-factor> different ways and combine them."
23 |   echo "E.g. if <frame-subsampling-factor> is 3, this script will combine"
24 |   echo "the data frame-shifted by -1, 0 and 1 (c.f. shift-feats)."
25 |   exit 1
26 | fi
27 | 
28 | frame_subsampling_factor=$1
29 | srcdir=$2
30 | destdir=$3
31 | 
32 | if [ ! -f $srcdir/feats.scp ]; then
33 |   echo "$0: expected $srcdir/feats.scp to exist"
34 |   exit 1
35 | fi
36 | 
37 | if [ -f $destdir/feats.scp ]; then
38 |   echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)"
39 |   exit 1
40 | fi
41 | 
42 | if [ ! -z $write_utt2orig ]; then
43 |   awk '{print $1 " " $1}' $srcdir/feats.scp >$write_utt2orig
44 | fi
45 | 
46 | tmp_shift_destdirs=()
47 | for frame_shift in `seq $[-(frame_subsampling_factor/2)] $[-(frame_subsampling_factor/2) + frame_subsampling_factor - 1]`; do
48 |   if [ "$frame_shift" == 0 ]; then continue; fi
49 |   utils/data/shift_feats.sh $frame_shift $srcdir ${destdir}_fs$frame_shift || exit 1
50 |   tmp_shift_destdirs+=("${destdir}_fs$frame_shift")
51 |   if [ ! -z $write_utt2orig ]; then
52 |     awk -v prefix="fs$frame_shift-" '{printf("%s%s %s\n", prefix, $1, $1);}' $srcdir/feats.scp >>$write_utt2orig
53 |   fi  
54 | done
55 | utils/data/combine_data.sh $destdir $srcdir ${tmp_shift_destdirs[@]} || exit 1
56 | rm -r ${tmp_shift_destdirs[@]}
57 | 
58 | utils/validate_data_dir.sh $destdir
59 | 
60 | src_nf=`cat $srcdir/feats.scp | wc -l`
61 | dest_nf=`cat $destdir/feats.scp | wc -l`
62 | if [ $[src_nf*frame_subsampling_factor] -ne $dest_nf ]; then
63 |   echo "There was a problem. Expected number of feature lines in destination dir to be $[src_nf*frame_subsampling_factor];"
64 |   exit 1;
65 | fi
66 | 
67 | echo "$0: Successfully generated $frame_subsampling_factor-way shifted version of data in $srcdir, in $destdir"
68 | 


--------------------------------------------------------------------------------
/utils/data/shift_feats.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2016    Vimal Manohar
 4 | #           2017    Hossein Hadian
 5 | # Apache 2.0
 6 | 
 7 | echo "$0 $@"  # Print the command line for logging
 8 | if [ -f path.sh ]; then . ./path.sh; fi
 9 | . parse_options.sh || exit 1;
10 | 
11 | if [ $# != 3 ]; then
12 |   echo " Usage: $0 <frame-shift> <srcdir> <destdir>"
13 |   echo "e.g.: $0 -1 data/train data/train_fs-1"
14 |   echo "The script creates a new data directory with the features modified"
15 |   echo "using the program shift-feats with the specified frame-shift."
16 |   echo "This program automatically adds the prefix 'fs<frame-shift>-' to the"
17 |   echo "utterance and speaker names. See also utils/data/shift_and_combine_feats.sh"
18 |   exit 1
19 | fi
20 | 
21 | frame_shift=$1
22 | srcdir=$2
23 | destdir=$3
24 | 
25 | 
26 | if [ "$destdir" == "$srcdir" ]; then
27 |   echo "$0: this script requires <srcdir> and <destdir> to be different."
28 |   exit 1
29 | fi
30 | 
31 | if [ ! -f $srcdir/feats.scp ]; then
32 |   echo "$0: no such file $srcdir/feats.scp"
33 |   exit 1;
34 | fi
35 | 
36 | utt_prefix="fs$frame_shift-"
37 | spk_prefix="fs$frame_shift-"
38 | 
39 | mkdir -p $destdir
40 | utils/copy_data_dir.sh --utt-prefix $utt_prefix --spk-prefix $spk_prefix \
41 |   $srcdir $destdir
42 | 
43 | if grep --quiet "'" $srcdir/feats.scp; then
44 |   echo "$0: the input features already use single quotes. Can't proceed."
45 |   exit 1;
46 | fi
47 | 
48 | awk -v shift=$frame_shift 'NF == 2 {uttid=$1; feat=$2; qt="";} \
49 | NF > 2 {idx=index($0, " "); uttid=$1; feat=substr($0, idx + 1); qt="\x27";} \
50 | NF {print uttid " shift-feats --print-args=false --shift=" shift, qt feat qt " - |";}' \
51 |   $destdir/feats.scp >$destdir/feats_shifted.scp
52 | mv -f $destdir/feats_shifted.scp $destdir/feats.scp
53 | 
54 | echo "$0: Done"
55 | 
56 | 


--------------------------------------------------------------------------------
/utils/eps2disambig.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | #                2015 Guoguo Chen
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # This script replaces epsilon with #0 on the input side only, of the G.fst
19 | # acceptor.  
20 | 
21 | while(<>){
22 |   if (/\s+#0\s+/) {
23 |     print STDERR "$0: ERROR: LM has word #0, " .
24 |                  "which is reserved as disambiguation symbol\n";
25 |     exit 1;
26 |   }
27 |   s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
28 |   print;
29 | }
30 | 


--------------------------------------------------------------------------------
/utils/filt.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Apache 2.0
 4 | 
 5 | from __future__ import print_function
 6 | import sys
 7 | 
 8 | vocab=set()
 9 | with open(sys.argv[1]) as vocabfile:
10 |     for line in vocabfile:
11 |         vocab.add(line.strip())
12 | 
13 | with open(sys.argv[2]) as textfile:
14 |     for line in textfile:
15 |         print(" ".join([word if word in vocab else '<UNK>' for word in line.strip().split()]))
16 | 


--------------------------------------------------------------------------------
/utils/find_arpa_oovs.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | if (  @ARGV < 1 && @ARGV > 2) {
19 |     die "Usage: find_arpa_oovs.pl words.txt [lm.arpa]\n";
20 |     # This program finds words in the arpa file that are not symbols
21 |     # in the OpenFst-format symbol table words.txt.  It prints them
22 |     # on the standard output, one per line.
23 | }
24 | 
25 | $symtab = shift @ARGV;
26 | open(S, "<$symtab") || die "Failed opening symbol table file $symtab\n";
27 | while(<S>){
28 |     @A = split(" ", $_);
29 |     @A == 2 || die "Bad line in symbol table file: $_";
30 |     $seen{$A[0]} = 1;
31 | }
32 | 
33 | $found_data=0;
34 | $curgram=0;
35 | while(<>) { # Find the \data\ marker.
36 |     if(m:^\\data\\\s*$:) { $found_data=1; last; }
37 | }
38 | 
39 | if ($found_data==0) {
40 |   print STDERR "find_arpa_oovs.pl: found no \\data\\ marker in the ARPA input.\n";
41 |   exit(1);
42 | }
43 | 
44 | while(<>) {
45 |     if(m/^\\(\d+)\-grams:\s*$/) {
46 |         $curgram = $1;
47 |         if($curgram > 1) {
48 |             last; # This is an optimization as we can get the vocab from the 1-grams
49 |         }
50 |     } elsif($curgram > 0) {
51 |         @A = split(" ", $_);
52 |         if(@A > 1) {
53 |             shift @A;
54 |             for($n=0;$n<$curgram;$n++) {
55 |                 $word = $A[$n];
56 |                 if(!defined $word) { print STDERR "Unusual line $_ (line $.) in arpa file.\n"; }
57 |                 $in_arpa{$word} = 1;
58 |             }
59 |         } else {
60 |             if(@A > 0 && $A[0] !~ m:\\end\\:) {
61 |                 print STDERR "Unusual line $_ (line $.) in arpa file\n";
62 |             }
63 |         }
64 |     }
65 | }
66 | 
67 | foreach $w (keys %in_arpa) {
68 |     if(!defined $seen{$w} && $w ne "<s>" && $w ne "</s>") {
69 |         print "$w\n";
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/utils/fix_ctm.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | stmfile=$1
 4 | ctmfile=$2
 5 | 
 6 | segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u`
 7 | segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u`
 8 | 
 9 | segments_stm_count=`echo "$segments_stm" | wc -l `
10 | segments_ctm_count=`echo "$segments_ctm" | wc -l `
11 | 
12 | #echo $segments_stm_count
13 | #echo $segments_ctm_count
14 | 
15 | if [ "$segments_stm_count" -gt "$segments_ctm_count"  ] ; then
16 |   pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g")
17 |   (
18 |     for elem in $pp ; do
19 |       echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE"
20 |     done
21 |   ) >> $ctmfile
22 |   echo "FIXED CTM FILE"
23 |   exit 0
24 | elif [ "$segments_stm_count" -lt "$segments_ctm_count"  ] ; then
25 |   echo "Segment STM count: $segments_stm_count"
26 |   echo "Segment CTM count: $segments_ctm_count"
27 |   echo "FAILURE FIXING CTM FILE"
28 |   exit 1
29 | else
30 |   exit 0
31 | fi
32 | 
33 | 


--------------------------------------------------------------------------------
/utils/int2sym.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 3 | # Apache 2.0.
 4 | 
 5 | undef $field_begin;
 6 | undef $field_end;
 7 | 
 8 | 
 9 | if ($ARGV[0] eq "-f") {
10 |   shift @ARGV;
11 |   $field_spec = shift @ARGV;
12 |   if ($field_spec =~ m/^\d+$/) {
13 |     $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
14 |   }
15 |   if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesy (properly, 1-10)
16 |     if ($1 ne "") {
17 |       $field_begin = $1 - 1; # Change to zero-based indexing.
18 |     }
19 |     if ($2 ne "") {
20 |       $field_end = $2 - 1; # Change to zero-based indexing.
21 |     }
22 |   }
23 |   if (!defined $field_begin && !defined $field_end) {
24 |     die "Bad argument to -f option: $field_spec";
25 |   }
26 | }
27 | $symtab = shift @ARGV;
28 | if(!defined $symtab) {
29 |     print STDERR "Usage: int2sym.pl [options] symtab [input] > output\n" .
30 |       "options: [-f (<field>|<field_start>-<field-end>)]\n" .
31 |       "e.g.: -f 2, or -f 3-4\n";
32 |     exit(1);
33 | }
34 | 
35 | open(F, "<$symtab") || die "Error opening symbol table file $symtab";
36 | while(<F>) {
37 |     @A = split(" ", $_);
38 |     @A == 2 || die "bad line in symbol table file: $_";
39 |     $int2sym{$A[1]} = $A[0];
40 | }
41 | 
42 | sub int2sym {
43 |     my $a = shift @_;
44 |     my $pos = shift @_;
45 |     if($a !~  m:^\d+$:) { # not all digits..
46 |       $pos1 = $pos+1; # make it one-based.
47 |       die "int2sym.pl: found noninteger token $a [in position $pos1]\n";
48 |     }
49 |     $s = $int2sym{$a};
50 |     if(!defined ($s)) {
51 |       die "int2sym.pl: integer $a not in symbol table $symtab.";
52 |     }
53 |     return $s;
54 | }
55 | 
56 | $error = 0;
57 | while (<>) {
58 |   @A = split(" ", $_);
59 |   for ($pos = 0; $pos <= $#A; $pos++) {
60 |     $a = $A[$pos];
61 |     if ( (!defined $field_begin || $pos >= $field_begin)
62 |          && (!defined $field_end || $pos <= $field_end)) {
63 |       $a = int2sym($a, $pos);
64 |     }
65 |     print $a . " ";
66 |   }
67 |   print "\n";
68 | }
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/utils/lang/add_unigrams_arpa.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2018  Xiaohui Zhang
 4 | # Apache 2.0.
 5 | #
 6 | use strict;
 7 | use warnings;
 8 | use Getopt::Long;
 9 | 
10 | my $Usage = <<EOU;
11 | # This is a simple script to add unigrams to an ARPA lm file.
12 | Usage: utils/lang/add_unigrams_arpa.pl [options] <oov-prob-file> <scale> <input-arpa >output-arpa
13 | <oov-prob-file> contains a list of words and their probabilities, e.g. "jack 0.2". All probs will be
14 | scaled by a positive scalar <scale> and then be used as the unigram prob. of the added word.
15 | The scale should approximiately relect the OOV rate of the language in concern.
16 | EOU
17 | 
18 | my @F;
19 | my @OOVS;
20 | 
21 | if (@ARGV != 2) {
22 |   die $Usage;
23 | }
24 | 
25 | # Gets parameters.
26 | my $oov_prob_file = shift @ARGV;
27 | my $scale = shift @ARGV;
28 | my $arpa_in = shift @ARGV;
29 | my $arpa_out = shift @ARGV;
30 | 
31 | # Opens files.
32 | open(F, "<$oov_prob_file") || die "$0: Fail to open $oov_prob_file\n";
33 | while (<F>) { push @OOVS, $_; }
34 | my $num_oovs = @OOVS;
35 | 
36 | $scale > 0.0 || die "Bad scale";
37 | print STDERR "$0: Creating LM file with additional unigrams, using $oov_prob_file\n";
38 | 
39 | my %vocab;
40 | my $unigram = 0;
41 | my $num_unigrams = 0;
42 | my @lines;
43 | 
44 | # Parse and record the head and unigrams in the ARPA LM.
45 | while(<STDIN>) {
46 |   if (m/^ngram 1=(\d+)/) { $num_unigrams = $1; }
47 |   
48 |   if (m/^\\2-grams:$/) { last; }
49 |   if (m/^\\1-grams:$/) { $unigram = 1; push(@lines, $_); next; }
50 |   if (m/^\\2-grams:$/) { $unigram = 0; }
51 | 
52 |   my @col = split(" ", $_);
53 |   if ( $unigram == 1 ) {
54 |     # Record in-vocab words into a map.
55 |     if ( @col > 0 ) {
56 |       my $word = $col[1];
57 |       $vocab{$word} = 1;
58 |       push(@lines, $_);
59 |     } else {
60 |       # Insert out-of-vocab words and their probs into the unigram list.
61 |       foreach my $l (@OOVS) {
62 |         my @A = split(" ", $l);
63 |         @A == 2 || die "bad line in oov2prob: $_;";
64 |         my $word = $A[0];
65 |         my $prob = $A[1];
66 |         if (exists($vocab{$word})) { next; }
67 |         $num_unigrams ++;
68 |         my $log10prob = (log($prob * $scale) / log(10.0));
69 |         $vocab{$word} = 1;
70 |         my $line = sprintf("%.6f\t$word\n", $log10prob);
71 |         push(@lines, $line);
72 |       }
73 |     }
74 |   } else { push(@lines, $_); }
75 | }
76 | 
77 | # Print the head and unigrams, with the updated # unigrams in the head.
78 | foreach my $l (@lines) {
79 |   if ($l =~ m/ngram 1=/) {
80 |     print "ngram 1=$num_unigrams\n";
81 |   } else {
82 |     print $l;
83 |   }
84 | }
85 | 
86 | # Print the left fields.
87 | print "\n\\2-grams:\n";
88 | while(<STDIN>) {
89 |   print;
90 | }
91 | 
92 | close(F);
93 | exit 0
94 | 


--------------------------------------------------------------------------------
/utils/lang/adjust_unk_arpa.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2018  Xiaohui Zhang
 4 | # Apache 2.0.
 5 | #
 6 | use strict;
 7 | use warnings;
 8 | use Getopt::Long;
 9 | 
10 | my $Usage = <<EOU;
11 | # This is a simple script to set/scale the prob of n-grams where the OOV dict entry is the predicted word, in an ARPA lm file.
12 | Usage: utils/lang/adjust_unk_arpa.pl [options] <oov-dict-entry> <unk-scale> <input-arpa >output-arpa
13 | 
14 | Allowed options:
15 |   --fixed-value (true|false)   : If true, interpret the unk-scale as a fixed value we'll set to
16 |                                  the unigram prob of the OOV dict entry, rather than using it to
17 |                                  scale the probs. In this case higher order n-grams containing
18 |                                  the OOV dict entry remain untouched. This is useful when the OOV
19 |                                  dict entry doesn't appear in n-grams (n>1) as the predicted word.
20 | EOU
21 | 
22 | my $fixed_value = "false";
23 | GetOptions('fixed-value=s' => \$fixed_value);
24 | 
25 | ($fixed_value eq "true" || $fixed_value eq "false") ||
26 |   die "$0: Bad value for option --fixed-value\n";
27 | 
28 | if (@ARGV != 2) {
29 |   die $Usage;
30 | }
31 | 
32 | # Gets parameters.
33 | my $unk_word = shift @ARGV;
34 | my $unk_scale = shift @ARGV;
35 | my $arpa_in = shift @ARGV;
36 | my $arpa_out = shift @ARGV;
37 | 
38 | $unk_scale > 0.0 || die "Bad unk_scale"; # this must be positive
39 | if ( $fixed_value eq "true" ) {
40 |   print STDERR "$0: Setting the unigram prob of $unk_word in LM file as $unk_scale.\n";
41 | } else {
42 |   print STDERR "$0: Scaling the probs of ngrams where $unk_word is the predicted word in LM file by $unk_scale.\n";
43 | }
44 | 
45 | my $ngram = 0; # the order of ngram we are visiting
46 | 
47 | # Change the unigram prob of the unk-word in the ARPA LM.
48 | while(<STDIN>) {
49 |   if (m/^\\1-grams:$/) { $ngram = 1; }
50 |   if (m/^\\2-grams:$/) { $ngram = 2; }
51 |   if (m/^\\3-grams:$/) { $ngram = 3; }
52 |   if (m/^\\4-grams:$/) { $ngram = 4; }
53 |   if (m/^\\5-grams:$/) { $ngram = 5; }
54 |   my @col = split(" ", $_);
55 |   if ( @col > 1 && $ngram > 0 && $col[$ngram] eq $unk_word ) {
56 |     if ( $fixed_value eq "true" && $ngram == 1 ) {
57 |       $col[0] = (log($unk_scale) / log(10.0));
58 |     } elsif ($fixed_value eq "false" ) {
59 |       $col[0] += (log($unk_scale) / log(10.0));
60 |     }
61 |     my $line = join("\t", @col);
62 |     print "$line\n";
63 |   } else {
64 |     print;
65 |   }
66 | }
67 | 
68 | exit 0
69 | 


--------------------------------------------------------------------------------
/utils/lang/adjust_unk_graph.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2018 Xiaohui Zhang
 3 | # Apache 2.0
 4 | 
 5 | # This script copies a fully expanded decoding graph (HCLG.fst) and scales the scores
 6 | # of all arcs whose output symbol is a user-specified OOV symbol (or any other word).
 7 | # This achieves an equivalent effect of utils/lang/adjust_unk_arpa.pl, which scales
 8 | # the LM prob of all ngrams predicting an OOV symbol, while avoiding re-creating the graph.
 9 | 
10 | set -o pipefail
11 | 
12 | if [ $# != 4 ]; then
13 |    echo "Usage: utils/adjust_unk_graph.sh <oov-dict-entry> <scale> <in-graph-dir> <out-graph-dir>"
14 |    echo "e.g.: utils/adjust_unk_graph.sh \"<unk>\" 0.1 exp/tri1/graph exp/tri1/graph_unk_scale_0.1"
15 |    exit 1;
16 | fi
17 | 
18 | if [ -f path.sh ]; then . ./path.sh; fi
19 | 
20 | oov_word=$1
21 | unk_scale=$2
22 | graphdir_in=$3
23 | graphdir_out=$4
24 | 
25 | mkdir -p $graphdir_out
26 | 
27 | required="HCLG.fst words.txt disambig_tid.int num_pdfs phones phones.txt words.txt"
28 | for f in $required; do
29 |   [ ! -e $graphdir_in/$f ] && echo "adjust_unk_graph.sh: expected $graphdir_in/$f to exist" && exit 1;
30 |   cp -r $graphdir_in/$f $graphdir_out
31 | done
32 | 
33 | cp -r $graphdir_in/{disambig_tid.int,num_pdfs,phones,phones.txt,words.txt} $graphdir_out
34 | 
35 | oov_id=`echo $oov_word | utils/sym2int.pl $graphdir_in/words.txt`
36 | [ -z $oov_id ] && echo "adjust_unk_graph.sh: the specified oov symbol $oov_word is out of the vocabulary." && exit 1;
37 | fstprint $graphdir_in/HCLG.fst | awk -v oov=$oov_id -v unk_scale=$unk_scale '{if($4==oov) $5=$5-log(unk_scale);print $0}' | \
38 |   fstcompile | fstconvert --fst_type=const  > $graphdir_out/HCLG.fst || exit 1;
39 | 


--------------------------------------------------------------------------------
/utils/lang/bpe/add_final_optional_silence.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | . ./path.sh
 3 | 
 4 | final_sil_prob=0.5
 5 | 
 6 | echo "$0 $@"  # Print the command line for logging
 7 | 
 8 | . ./utils/parse_options.sh
 9 | 
10 | if [ $# -ne 1 ]; then
11 |   echo "Usage: $0  <lang>"
12 |   echo " Add final optional silence to lexicon FSTs (L.fst and L_disambig.fst) in"
13 |   echo " lang/ directory <lang>."
14 |   echo " This can be useful in systems with byte-pair encoded (BPE) lexicons, in which"
15 |   echo " the word-initial silence is part of the lexicon, so we turn off the standard"
16 |   echo " optional silence in the lexicon"
17 |   echo "options:"
18 |   echo "   --final-sil-prob <final silence probability>      # default 0.5"
19 |   exit 1;
20 | fi
21 | 
22 | lang=$1
23 | 
24 | if [ $lang/phones/final_sil_prob -nt $lang/phones/nonsilence.txt ]; then
25 |   echo "$0 $lang/phones/final_sil_prob exists. Exiting..."
26 |   exit 1;
27 | fi
28 | 
29 | silphone=$(cat $lang/phones/optional_silence.int)
30 | 
31 | sil_eq_zero=$(echo $(perl -e "if ( $final_sil_prob == 0.0) {print 'true';} else {print 'false';}"))
32 | sil_eq_one=$(echo $(perl -e "if ( $final_sil_prob == 1.0) {print 'true';} else {print 'false';}"))
33 | sil_lt_zero=$(echo $(perl -e "if ( $final_sil_prob < 0.0) {print 'true';} else {print 'false';}"))
34 | sil_gt_one=$(echo $(perl -e "if ( $final_sil_prob > 1.0) {print 'true';} else {print 'false';}"))
35 | 
36 | if  $sil_lt_zero || $sil_gt_one; then
37 |   echo "$0 final-sil-prob should be between 0.0 and 1.0. Final silence was not added."
38 |   exit 1;
39 | else
40 |   if $sil_eq_zero; then
41 |     echo "$0 final-sil-prob = 0 => Final silence was not added."
42 |     exit 0;
43 |   elif $sil_eq_one; then
44 |     ( echo "0 1 $silphone 0";
45 |       echo "1" ) | fstcompile > $lang/final_sil.fst
46 |   else
47 |     log_silprob=$(echo $(perl -e "print log $final_sil_prob"))
48 |     ( echo "0 1 $silphone 0 $log_silprob";
49 |       echo "0 $log_silprob";
50 |       echo "1" ) | fstcompile > $lang/final_sil.fst
51 |   fi
52 |   mv $lang/L.fst $lang/L.fst.orig
53 |   mv $lang/L_disambig.fst $lang/L_disambig.fst.orig
54 |   fstconcat $lang/L.fst.orig $lang/final_sil.fst | fstarcsort --sort_type=olabel > $lang/L.fst
55 |   fstconcat $lang/L_disambig.fst.orig $lang/final_sil.fst | fstarcsort --sort_type=olabel > $lang/L_disambig.fst
56 |   echo "$final_sil_prob" > $lang/phones/final_sil_prob
57 | fi
58 | 


--------------------------------------------------------------------------------
/utils/lang/bpe/bidi.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright   2018 Chun-Chieh Chang
 3 | 
 4 | # This script is largely written by Stephen Rawls
 5 | # and uses the python package https://pypi.org/project/PyICU_BiDi/
 6 | # The code leaves right to left text alone and reverses left to right text.
 7 | 
 8 | import icu_bidi
 9 | import io
10 | import sys
11 | import unicodedata
12 | # R=strong right-to-left;  AL=strong arabic right-to-left
13 | rtl_set =  set(chr(i) for i in range(sys.maxunicode)
14 |                if unicodedata.bidirectional(chr(i)) in ['R','AL'])
15 | def determine_text_direction(text):
16 |     # Easy case first
17 |     for char in text:
18 |         if char in rtl_set:
19 |             return icu_bidi.UBiDiLevel.UBIDI_RTL
20 |     # If we made it here we did not encounter any strongly rtl char
21 |     return icu_bidi.UBiDiLevel.UBIDI_LTR
22 | 
23 | def utf8_visual_to_logical(text):
24 |     text_dir = determine_text_direction(text)
25 | 
26 |     bidi = icu_bidi.Bidi()
27 |     bidi.inverse = True
28 |     bidi.reordering_mode = icu_bidi.UBiDiReorderingMode.UBIDI_REORDER_INVERSE_LIKE_DIRECT
29 |     bidi.reordering_options = icu_bidi.UBiDiReorderingOption.UBIDI_OPTION_DEFAULT # icu_bidi.UBiDiReorderingOption.UBIDI_OPTION_INSERT_MARKS
30 | 
31 |     bidi.set_para(text, text_dir, None)
32 | 
33 |     res = bidi.get_reordered(0 | icu_bidi.UBidiWriteReorderedOpt.UBIDI_DO_MIRRORING | icu_bidi.UBidiWriteReorderedOpt.UBIDI_KEEP_BASE_COMBINING)
34 | 
35 |     return res
36 | 
37 | def utf8_logical_to_visual(text):
38 |     text_dir = determine_text_direction(text)
39 | 
40 |     bidi = icu_bidi.Bidi()
41 | 
42 |     bidi.reordering_mode = icu_bidi.UBiDiReorderingMode.UBIDI_REORDER_DEFAULT
43 |     bidi.reordering_options = icu_bidi.UBiDiReorderingOption.UBIDI_OPTION_DEFAULT  #icu_bidi.UBiDiReorderingOption.UBIDI_OPTION_INSERT_MARKS
44 | 
45 |     bidi.set_para(text, text_dir, None)
46 | 
47 |     res = bidi.get_reordered(0 | icu_bidi.UBidiWriteReorderedOpt.UBIDI_DO_MIRRORING | icu_bidi.UBidiWriteReorderedOpt.UBIDI_KEEP_BASE_COMBINING)
48 | 
49 |     return res
50 | 
51 | 
52 | ##main##
53 | sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding="utf8")
54 | sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf8")
55 | for line in sys.stdin:
56 |     line = line.strip()
57 |     line = utf8_logical_to_visual(line)[::-1]
58 |     sys.stdout.write(line + '\n')
59 | 


--------------------------------------------------------------------------------
/utils/lang/bpe/prepend_words.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This script, prepend '|' to every words in the transcript to mark
 4 | # the beginning of the words for finding the initial-space of every word
 5 | # after decoding.
 6 | 
 7 | import sys
 8 | import io
 9 | import re
10 | 
11 | whitespace = re.compile("[ \t]+")
12 | infile = io.TextIOWrapper(sys.stdin.buffer, encoding='latin-1')
13 | output = io.TextIOWrapper(sys.stdout.buffer, encoding='latin-1')
14 | for line in infile:
15 |     words = whitespace.split(line.strip(" \t\r\n"))
16 |     output.write(' '.join([ "|"+word for word in words]) + '\n')
17 | 


--------------------------------------------------------------------------------
/utils/lang/bpe/reverse.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # This script, reverse all latin and digits sequences
 5 | # (including words like MP3) to put them in the right order in the images.
 6 | 
 7 | import re, os, sys, io
 8 | 
 9 | in_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
10 | out_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
11 | for line in in_stream:
12 |     out_stream.write(re.sub(r'[a-zA-Z0-9][a-zA-Z0-9\s\.\:]*[a-zA-Z0-9]',
13 |                             lambda m:m.group(0)[::-1], line))
14 | 


--------------------------------------------------------------------------------
/utils/lang/check_phones_compatible.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright 2016 Hang Lyu
 3 | 
 4 | # Licensed udner the Apache License, Version 2.0 (the "Lincense");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OF IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script exits with status zero if the phone symbols tables are the same
18 | # except for possible differences in disambiguation symbols (meaning that all
19 | # symbols except those beginning with a # are mapped to the same values).
20 | # Otherwise it prints a warning and exits with status 1.
21 | # For the sake of compatibility with other scripts that did not write the
22 | # phones.txt to model directories, this script exits silently with status 0
23 | # if one of the phone symbol tables does not exist.
24 | 
25 | . utils/parse_options.sh || exit 1;
26 | 
27 | if [ $# -ne 2 ]; then
28 |   echo "Usage: utils/lang/check_phones_compatible.sh <phones-symbol-table1> <phones-symbol-table2>"
29 |   echo "e.g.: utils/lang/check_phones_compatible.sh data/lang/phones.txt exp/tri3/phones.txt"
30 |   exit 1;
31 | fi
32 | 
33 | table_first=$1
34 | table_second=$2
35 | 
36 | # check if the files exist or not
37 | if [ ! -f $table_first ]; then
38 |   if [ ! -f $table_second ]; then
39 |     echo "$0: Error! Both of the two phones-symbol tables are absent."
40 |     echo "Please check your command"
41 |     exit 1;
42 |   else
43 |     # The phones-symbol-table1 is absent. The model directory maybe created by old script.
44 |     # For back compatibility, this script exits silently with status 0.
45 |     exit 0;
46 |   fi
47 | elif [ ! -f $table_second ]; then
48 |   # The phones-symbol-table2 is absent. The model directory maybe created by old script.
49 |   # For back compatibility, this script exits silently with status 0.
50 |   exit 0;
51 | fi
52 | 
53 | # Check if the two tables are the same (except for possible difference in disambiguation symbols).
54 | if ! cmp -s <(grep -v "^#" $table_first) <(grep -v "^#" $table_second); then
55 |   echo "$0: phone symbol tables $table_first and $table_second are not compatible."
56 |   exit 1;
57 | fi
58 | 
59 | exit 0;
60 | 


--------------------------------------------------------------------------------
/utils/ln.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use File::Spec;
 3 | 
 4 | if ( @ARGV < 2 ) {
 5 |   print STDERR "usage: ln.pl input1 input2 dest-dir\n" .
 6 |     "This script does a soft link of input1, input2, etc." .
 7 |     "to dest-dir, using relative links where possible\n" .
 8 |     "Note: input-n and dest-dir may both be absolute pathnames,\n" .
 9 |     "or relative pathnames, relative to the current directlory.\n";
10 |   exit(1);
11 | }  
12 | 
13 | $dir = pop @ARGV;
14 | if ( ! -d $dir ) {
15 |   print STDERR "ln.pl: last argument must be a directory ($dir is not a directory)\n";
16 |   exit(1);
17 | }
18 | 
19 | $ans = 1; # true.
20 | 
21 | $absdir = File::Spec->rel2abs($dir); # Get $dir as abs path.
22 | defined $absdir || die "No such directory $dir";
23 | foreach $file (@ARGV) {
24 |   $absfile =  File::Spec->rel2abs($file); # Get $file as abs path.
25 |   defined $absfile || die "No such file or directory: $file";
26 |   @absdir_split = split("/", $absdir);
27 |   @absfile_split = split("/", $absfile);
28 | 
29 |   $newfile = $absdir . "/" . $absfile_split[$#absfile_split]; # we'll use this
30 |   # as the destination in the link command.
31 |   $num_removed = 0;
32 |   while (@absdir_split > 0 && $absdir_split[0] eq $absfile_split[0]) {
33 |     shift @absdir_split;
34 |     shift @absfile_split;
35 |     $num_removed++;
36 |   }
37 |   if (-l $newfile) { # newfile is already a link -> safe to delete it.
38 |     unlink($newfile); # "unlink" just means delete.
39 |   }
40 |   if ($num_removed == 0) { # will use absolute pathnames.
41 |     $oldfile = "/" . join("/", @absfile_split);
42 |     $ret = symlink($oldfile, $newfile);
43 |   } else {
44 |     $num_dots = @absdir_split;
45 |     $oldfile = join("/", @absfile_split);
46 |     for ($n = 0; $n < $num_dots; $n++) {
47 |       $oldfile = "../" . $oldfile;
48 |     }
49 |     $ret = symlink($oldfile, $newfile);
50 |   }
51 |   $ans = $ans && $ret;
52 |   if (! $ret) {
53 |     print STDERR "Error linking $oldfile to $newfile\n";
54 |   }
55 | }
56 | 
57 | exit ($ans == 1 ? 0 : 1);
58 | 
59 | 


--------------------------------------------------------------------------------
/utils/make_absolute.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script replaces the command readlink -f (which is not portable).
 4 | # It turns a pathname into an absolute pathname, including following soft links.
 5 | target_file=$1
 6 | 
 7 | cd $(dirname $target_file)
 8 | target_file=$(basename "$target_file")
 9 | 
10 | # Iterate down a (possible) chain of symlinks
11 | while [ -L "$target_file" ]; do
12 |     target_file=$(readlink $target_file)
13 |     cd $(dirname $target_file)
14 |     target_file=$(basename $target_file)
15 | done
16 | 
17 | # Compute the canonicalized name by finding the physical path 
18 | # for the directory we're in and appending the target file.
19 | phys_dir=$(pwd -P)
20 | result=$phys_dir/$target_file
21 | echo $result
22 | 


--------------------------------------------------------------------------------
/utils/make_unigram_grammar.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script is used in discriminative training.
18 | # This script makes a simple unigram-loop version of G.fst
19 | # using a unigram grammar estimated from some training transcripts.
20 | # This is for MMI training.
21 | # We don't have any silences in G.fst; these are supplied by the
22 | # optional silences in the lexicon.
23 | 
24 | # Note: the symbols in the transcripts become the input and output
25 | # symbols of G.txt; these can be numeric or not.
26 | 
27 | if(@ARGV != 0) {
28 |     die "Usage: make_unigram_grammar.pl < text-transcripts > G.txt"
29 | }
30 | 
31 | $totcount = 0;
32 | $nl = 0;
33 | while (<>) {
34 |   @A = split(" ", $_);
35 |   foreach $a (@A) {
36 |     $count{$a}++;
37 |     $totcount++;
38 |   }
39 |   $nl++;
40 |   $totcount++; # Treat end-of-sentence as a symbol for purposes of
41 |   # $totcount, so the grammar is properly stochastic.  This doesn't
42 |   # become </s>, it just becomes the final-prob.
43 | }
44 | 
45 | foreach $a (keys %count) {
46 |   $prob = $count{$a} / $totcount;
47 |   $cost = -log($prob);          # Negated natural-log probs.
48 |   print "0\t0\t$a\t$a\t$cost\n";
49 | }
50 | # Zero final-cost.
51 | $final_prob = $nl / $totcount;
52 | $final_cost = -log($final_prob);
53 | print "0\t$final_cost\n";
54 | 
55 | 


--------------------------------------------------------------------------------
/utils/nnet/gen_dct_mat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # ./gen_dct_mat.py
19 | # script generates matrix with DCT transform, which is sparse
20 | # and takes into account that data-layout is along frequency axis,
21 | # while DCT is done along temporal axis.
22 | 
23 | from __future__ import division
24 | from __future__ import print_function
25 | from math import *
26 | import sys
27 | 
28 | 
29 | from optparse import OptionParser
30 | 
31 | def print_on_same_line(text):
32 |     print(text, end=' ')
33 | 
34 | parser = OptionParser()
35 | parser.add_option('--fea-dim', dest='dim', help='feature dimension')
36 | parser.add_option('--splice', dest='splice', help='applied splice value')
37 | parser.add_option('--dct-basis', dest='dct_basis', help='number of DCT basis')
38 | (options, args) = parser.parse_args()
39 | 
40 | if(options.dim == None):
41 |     parser.print_help()
42 |     sys.exit(1)
43 | 
44 | dim=int(options.dim)
45 | splice=int(options.splice)
46 | dct_basis=int(options.dct_basis)
47 | 
48 | timeContext=2*splice+1
49 | 
50 | 
51 | #generate the DCT matrix
52 | M_PI = 3.1415926535897932384626433832795
53 | M_SQRT2 = 1.4142135623730950488016887
54 | 
55 | 
56 | #generate sparse DCT matrix
57 | print('[')
58 | for k in range(dct_basis):
59 |     for m in range(dim):
60 |         for n in range(timeContext):
61 |           if(n==0):
62 |               print_on_same_line(m*'0 ')
63 |           else:
64 |               print_on_same_line((dim-1)*'0 ')
65 |           print_on_same_line(str(sqrt(2.0/timeContext)*cos(M_PI/timeContext*k*(n+0.5))))
66 |           if(n==timeContext-1):
67 |               print_on_same_line((dim-m-1)*'0 ')
68 |         print()
69 |     print()
70 | 
71 | print(']')
72 | 
73 | 


--------------------------------------------------------------------------------
/utils/nnet/gen_hamm_mat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # ./gen_hamm_mat.py
19 | # script generates diagonal matrix with hamming window values
20 | 
21 | from __future__ import division
22 | from __future__ import print_function
23 | from math import *
24 | import sys
25 | 
26 | 
27 | from optparse import OptionParser
28 | 
29 | def print_on_same_line(text):
30 |     print(text, end=' ')
31 | 
32 | parser = OptionParser()
33 | parser.add_option('--fea-dim', dest='dim', help='feature dimension')
34 | parser.add_option('--splice', dest='splice', help='applied splice value')
35 | (options, args) = parser.parse_args()
36 | 
37 | if(options.dim == None):
38 |     parser.print_help()
39 |     sys.exit(1)
40 | 
41 | dim=int(options.dim)
42 | splice=int(options.splice)
43 | 
44 | 
45 | #generate the diagonal matrix with hammings
46 | M_2PI = 6.283185307179586476925286766559005
47 | 
48 | dim_mat=(2*splice+1)*dim
49 | timeContext=2*splice+1
50 | print('[')
51 | for row in range(dim_mat):
52 |     for col in range(dim_mat):
53 |         if col!=row:
54 |             print_on_same_line('0')
55 |         else:
56 |             i=int(row/dim)
57 |             print_on_same_line(str(0.54 - 0.46*cos((M_2PI * i) / (timeContext-1))))
58 |     print()
59 | 
60 | print(']')
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/utils/nnet/gen_splice.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # ./gen_splice.py
19 | # generates <splice> Component
20 | 
21 | from __future__ import print_function
22 | from math import *
23 | import sys
24 | 
25 | 
26 | from optparse import OptionParser
27 | 
28 | def print_on_same_line(text):
29 |     print(text, end=' ')
30 | 
31 | parser = OptionParser()
32 | parser.add_option('--fea-dim', dest='dim_in', help='feature dimension')
33 | parser.add_option('--splice', dest='splice', help='number of frames to concatenate with the central frame')
34 | parser.add_option('--splice-step', dest='splice_step', help='splicing step (frames dont need to be consecutive, --splice 3 --splice-step 2 will select offsets: -6 -4 -2 0 2 4 6)', default='1' )
35 | (options, args) = parser.parse_args()
36 | 
37 | if(options.dim_in == None):
38 |     parser.print_help()
39 |     sys.exit(1)
40 | 
41 | dim_in=int(options.dim_in)
42 | splice=int(options.splice)
43 | splice_step=int(options.splice_step)
44 | 
45 | dim_out=(2*splice+1)*dim_in
46 | 
47 | print('<splice> {0} {1}'.format(dim_out, dim_in))
48 | print_on_same_line('[')
49 | 
50 | splice_vec = list(range(-splice*splice_step, splice*splice_step+1, splice_step))
51 | for idx in range(len(splice_vec)):
52 |     print_on_same_line(splice_vec[idx])
53 | 
54 | print(']')
55 | 
56 | 


--------------------------------------------------------------------------------
/utils/nnet/subset_data_tr_cv.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # Copyright 2017  Brno University of Technology (Author: Karel Vesely);
 4 | # Apache 2.0
 5 | 
 6 | # This scripts splits 'data' directory into two parts:
 7 | # - training set with 90% of speakers
 8 | # - held-out set with 10% of speakers (cv)
 9 | # (to be used in frame cross-entropy training of 'nnet1' models),
10 | 
11 | # The script also accepts a list of held-out set speakers by '--cv-spk-list'
12 | # (with perturbed data, we pass the list of speakers externally).
13 | # The remaining set of speakers is the the training set.
14 | 
15 | cv_spk_percent=10
16 | cv_spk_list= # To be used with perturbed data,
17 | seed=777
18 | cv_utt_percent= # ignored (compatibility),
19 | . utils/parse_options.sh
20 | 
21 | if [ $# != 3 ]; then
22 |   echo "Usage: $0 [opts] <src-data> <train-data> <cv-data>"
23 |   echo "  --cv-spk-percent N (default 10)"
24 |   echo "  --cv-spk-list <file> (a pre-defined list with cv speakers)"
25 |   exit 1;
26 | fi
27 | 
28 | set -euo pipefail
29 | 
30 | src_data=$1
31 | trn_data=$2
32 | cv_data=$3
33 | 
34 | [ ! -r $src_data/spk2utt ] && echo "Missing '$src_data/spk2utt'. Error!" && exit 1
35 | 
36 | tmp=$(mktemp -d /tmp/${USER}_XXXXX)
37 | 
38 | if [ -z "$cv_spk_list" ]; then
39 |   # Select 'cv_spk_percent' speakers randomly,
40 |   cat $src_data/spk2utt | awk '{ print $1; }' | utils/shuffle_list.pl --srand $seed >$tmp/speakers
41 |   n_spk=$(wc -l <$tmp/speakers)
42 |   n_spk_cv=$(perl -e "print int($cv_spk_percent * $n_spk / 100); ")
43 |   #
44 |   head -n $n_spk_cv $tmp/speakers >$tmp/speakers_cv
45 |   tail -n+$((n_spk_cv+1)) $tmp/speakers >$tmp/speakers_trn
46 | else
47 |   # Use pre-defined list of speakers,
48 |   cp $cv_spk_list $tmp/speakers_cv
49 |   join -v2 <(sort $cv_spk_list) <(awk '{ print $1; }' <$src_data/spk2utt | sort) >$tmp/speakers_trn
50 | fi
51 | 
52 | # Sanity checks,
53 | n_spk=$(wc -l <$src_data/spk2utt)
54 | echo "Speakers, src=$n_spk, trn=$(wc -l <$tmp/speakers_trn), cv=$(wc -l $tmp/speakers_cv)"
55 | overlap=$(join <(sort $tmp/speakers_trn) <(sort $tmp/speakers_cv) | wc -l)
56 | [ $overlap != 0 ] && \
57 |   echo "WARNING, speaker overlap detected!" && \
58 |   join <(sort $tmp/speakers_trn) <(sort $tmp/speakers_cv) | head && \
59 |   echo '...'
60 | 
61 | # Create new data dirs,
62 | utils/data/subset_data_dir.sh --spk-list $tmp/speakers_trn $src_data $trn_data
63 | utils/data/subset_data_dir.sh --spk-list $tmp/speakers_cv $src_data $cv_data
64 | 
65 | 


--------------------------------------------------------------------------------
/utils/parallel/limit_num_gpus.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script functions as a wrapper of a bash command that uses GPUs.
 4 | #
 5 | # It sets the CUDA_VISIBLE_DEVICES variable so that it limits the number of GPUs
 6 | # used for programs. It is neccesary for running a job on the grid if the job
 7 | # would automatically grabs all resources available on the system, e.g. a
 8 | # TensorFlow program.
 9 | 
10 | num_gpus=1 # this variable indicates how many GPUs we will allow the command
11 |            # passed to this script will run on. We achieve this by setting the
12 |            # CUDA_VISIBLE_DEVICES variable
13 | set -e
14 | 
15 | if [ "$1" == "--num-gpus" ]; then
16 |   num_gpus=$2
17 |   shift
18 |   shift
19 | fi
20 | 
21 | if ! printf "%d" "$num_gpus" >/dev/null || [ $num_gpus -le -1 ]; then
22 |   echo $0: Must pass a positive interger or 0 after --num-gpus
23 |   echo e.g. $0 --num-gpus 2 local/tfrnnlm/run_lstm.sh
24 |   exit 1
25 | fi
26 | 
27 | if [ $# -eq 0 ]; then
28 |   echo "Usage:  $0 [--num-gpus <num-gpus>] <command> [<arg1>...]"
29 |   echo "Runs <command> with args after setting CUDA_VISIBLE_DEVICES to "
30 |   echo "make sure exactly <num-gpus> GPUs are visible (default: 1)."
31 |   exit 1
32 | fi
33 | 
34 | CUDA_VISIBLE_DEVICES=
35 | num_total_gpus=`nvidia-smi -L | wc -l`
36 | num_gpus_assigned=0
37 | 
38 | if [ $num_gpus -eq 0 ] ; then
39 |     echo "$0: Running the job on CPU. Disabling submitting to gpu"
40 |     export CUDA_VISIBLE_DEVICES=""
41 | else
42 |     for i in `seq 0 $[$num_total_gpus-1]`; do
43 |     # going over all GPUs and check if it is idle, and add to the list if yes
44 |       if nvidia-smi -i $i | grep "No running processes found" >/dev/null; then
45 |         CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}$i, && num_gpus_assigned=$[$num_gpus_assigned+1]
46 |       fi
47 |     # once we have enough GPUs, break out of the loop
48 |       [ $num_gpus_assigned -eq $num_gpus ] && break
49 |     done
50 | 
51 |     [ $num_gpus_assigned -ne $num_gpus ] && echo Could not find enough idle GPUs && exit 1
52 | 
53 |     export CUDA_VISIBLE_DEVICES=$(echo $CUDA_VISIBLE_DEVICES | sed "s=,$==g")
54 | 
55 |     echo "$0: Running the job on GPU(s) $CUDA_VISIBLE_DEVICES"
56 | fi
57 | 
58 | "$@"
59 | 


--------------------------------------------------------------------------------
/utils/prepare_online_nnet_dist_build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2015  Johns Hopkins University (Author: Vijayaditya Peddinti)
 4 | #                 Guoguo Chen
 5 | # Apache 2.0
 6 | # Script to prepare the distribution from the online-nnet build
 7 | 
 8 | other_files= #other files to be included in the build
 9 | other_dirs=
10 | conf_files="ivector_extractor.conf mfcc.conf online_cmvn.conf online_nnet2_decoding.conf splice.conf"
11 | ivec_extractor_files="final.dubm final.ie final.mat global_cmvn.stats online_cmvn.conf splice_opts"
12 | 
13 | echo "$0 $@"  # Print the command line for logging
14 | [ -f path.sh ] && . ./path.sh;
15 | . parse_options.sh || exit 1;
16 | 
17 | if [ $# -ne 3 ]; then
18 |    echo "Usage: $0 <lang-dir> <model-dir> <output-tgz>"
19 |    echo "e.g.: $0 data/lang exp/nnet2_online/nnet_ms_a_online tedlium.tgz"
20 |    exit 1;
21 | fi
22 | 
23 | lang=$1
24 | modeldir=$2
25 | tgzfile=$3
26 | 
27 | for f in $lang/phones.txt $other_files; do
28 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
29 | done
30 | 
31 | build_files=
32 | for d in $modeldir/conf $modeldir/ivector_extractor; do
33 |   [ ! -d $d ] && echo "$0: no such directory $d" && exit 1;
34 | done
35 | 
36 | for f in $ivec_extractor_files; do
37 |   f=$modeldir/ivector_extractor/$f
38 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
39 |   build_files="$build_files $f"
40 | done
41 | 
42 | # Makes a copy of the original config files, as we will change the absolute path
43 | # to relative.
44 | rm -rf $modeldir/conf_abs_path
45 | mkdir -p $modeldir/conf_abs_path
46 | cp -r $modeldir/conf/* $modeldir/conf_abs_path
47 | 
48 | for f in $conf_files; do 
49 |   [ ! -f $modeldir/conf/$f ] && \
50 |     echo "$0: no such file $modeldir/conf/$f" && exit 1;
51 |   # Changes absolute path to relative path. The path entries in the config file
52 |   # are generated by scripts and it is safe to assume that they have structure:
53 |   # variable=path
54 |   cat $modeldir/conf_abs_path/$f | perl -e '
55 |     use File::Spec;
56 |     while(<STDIN>) {
57 |       chomp;
58 |       @col = split("=", $_);
59 |       if (@col == 2 && (-f $col[1])) {
60 |         $col[1] = File::Spec->abs2rel($col[1]);
61 |         print "$col[0]=$col[1]\n";
62 |       } else {
63 |         print "$_\n";
64 |       }
65 |     }
66 |   ' > $modeldir/conf/$f
67 |   build_files="$build_files $modeldir/conf/$f"
68 | done
69 | 
70 | tar -hczvf $tgzfile $lang $build_files $other_files $other_dirs \
71 |   $modeldir/final.mdl $modeldir/tree >/dev/null
72 | 
73 | # Changes back to absolute path.
74 | rm -rf $modeldir/conf
75 | mv $modeldir/conf_abs_path $modeldir/conf
76 | 


--------------------------------------------------------------------------------
/utils/remove_data_links.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This program searches within a directory for soft links that
 4 | # appear to be created by 'create_data_link.pl' to a 'storage/' subdirectory,
 5 | # and it removes both the soft links and the things they point to.
 6 | # for instance, if you have a soft link 
 7 | #   foo/egs/1.1.egs -> storage/2/1.1.egs
 8 | # it will remove both foo/egs/storage/2/1.1.egs, and foo/egs/1.1.egs.
 9 | 
10 | ret=0
11 | 
12 | dry_run=false
13 | 
14 | if [ "$1" == "--dry-run" ]; then
15 |   dry_run=true
16 |   shift
17 | fi
18 | 
19 | if [ $# == 0 ]; then
20 |   echo "Usage:  $0 [--dry-run] <list-of-directories>"
21 |   echo "e.g.: $0 exp/nnet4a/egs/"
22 |   echo " Removes from any subdirectories of the command-line arguments, soft links that "
23 |   echo " appear to have been created by utils/create_data_link.pl, as well as the things"
24 |   echo " that those soft links point to.  Will typically be called on a directory prior"
25 |   echo " to 'rm -r' on that directory, to ensure that data that was distributed on other"
26 |   echo " volumes also gets deleted."
27 |   echo " With --dry-run, just prints what it would do."
28 | fi
29 | 
30 | for dir in $*; do
31 |   if [ ! -d $dir ]; then
32 |     echo "$0: not a directory: $dir"
33 |     ret=1
34 |   else
35 |     for subdir in $(find $dir -type d); do
36 |       if [ -d $subdir/storage ]; then
37 |         for x in $(ls $subdir); do
38 |           f=$subdir/$x
39 |           if [ -L $f ] && [[ $(readlink $f) == storage/* ]]; then
40 |             target=$subdir/$(readlink $f)
41 |             if $dry_run; then
42 |               echo rm $f $target
43 |             else
44 |               rm $f $target
45 |             fi
46 |           fi
47 |         done
48 |       fi
49 |     done
50 |   fi
51 | done
52 | 
53 | exit $ret
54 | 


--------------------------------------------------------------------------------
/utils/remove_oovs.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script removes lines that contain these OOVs on either the
18 | # third or fourth fields  of the line.  It is intended to remove arcs
19 | # with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
20 | 
21 | if (  @ARGV < 1 && @ARGV > 2) {
22 |     die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
23 | }
24 | 
25 | $unklist = shift @ARGV;
26 | open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
27 | while(<S>){ 
28 |     @A = split(" ", $_);
29 |     @A == 1 || die "Bad line in unknown-symbol list: $_";
30 |     $unk{$A[0]} = 1;
31 | }
32 | 
33 | $num_removed = 0;
34 | while(<>){ 
35 |     @A = split(" ", $_);
36 |     if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
37 |         $num_removed++;
38 |     } else {
39 |         print;
40 |     }
41 | }
42 | print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
43 | 
44 | 


--------------------------------------------------------------------------------
/utils/s2eps.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script replaces <s> and </s> with <eps> (on both input and output sides),
18 | # for the G.fst acceptor.
19 | 
20 | while(<>){
21 |     @A = split(" ", $_);
22 |     if ( @A >= 4 ) {
23 |         if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
24 |         if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
25 |     }
26 |     print join("\t", @A) . "\n";
27 | }
28 | 


--------------------------------------------------------------------------------
/utils/scoring/wer_report.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2015 Johns Hopkins University (author: Jan Trmal <jtrmal@gmail.com>)
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | # This script reads per-utt table generated for example during scoring
19 | # and outpus the WER similar to the format the compute-wer utility 
20 | # or the utils/best_wer.pl produces
21 | # i.e. from table containing lines in this format
22 | # SUM raw 23344 243230 176178 46771 9975 20281 77027 16463
23 | # produces something output like this
24 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] 
25 | # NB: if the STDIN stream will contain more of the SUM raw entries,
26 | #     the best one will be found and printed 
27 | #
28 | # If the script is called with parameters, it uses them pro provide 
29 | # a description of the output
30 | # i.e.
31 | # cat per-spk-report | utils/scoring/wer_report.pl Full set
32 | # the following output will be produced
33 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] Full set
34 | 
35 | 
36 | while (<STDIN>) {
37 |   if ( m:SUM\s+raw:) {
38 |     @F = split;
39 |     if ((!defined $wer) || ($wer > $F[8])) {
40 |       $corr=$F[4];
41 |       $sub=$F[5];
42 |       $ins=$F[6];
43 |       $del=$F[7];
44 |       $wer=$F[8];
45 |       $words=$F[3];
46 |     }
47 |   }
48 | }
49 | 
50 | if (defined $wer) {
51 |   $wer_str = sprintf("%.2f", (100.0 * $wer) / $words);
52 |   print "%WER $wer_str [ $wer / $words,  $ins ins, $del del, $sub sub ]";
53 |   print " " . join(" ", @ARGV) if @ARGV > 0;
54 |   print "\n";
55 | }
56 | 


--------------------------------------------------------------------------------
/utils/show_lattice.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | format=pdf # pdf svg
 4 | mode=save # display save
 5 | lm_scale=0.0
 6 | acoustic_scale=0.0
 7 | #end of config
 8 | 
 9 | . utils/parse_options.sh
10 | 
11 | if [ $# != 3 ]; then
12 |    echo "usage: $0 [--mode display|save] [--format pdf|svg] <utt-id> <lattice-ark> <word-list>"
13 |    echo "e.g.:  $0 utt-0001 \"test/lat.*.gz\" tri1/graph/words.txt"
14 |    exit 1;
15 | fi
16 | 
17 | . ./path.sh
18 | 
19 | uttid=$1
20 | lat=$2
21 | words=$3
22 | 
23 | tmpdir=$(mktemp -d /tmp/kaldi.XXXX); # trap "rm -r $tmpdir" EXIT # cleanup
24 | 
25 | gunzip -c $lat | lattice-to-fst --lm-scale=$lm_scale --acoustic-scale=$acoustic_scale ark:- "scp,p:echo $uttid $tmpdir/$uttid.fst|" || exit 1;
26 | ! [ -s $tmpdir/$uttid.fst ] && \
27 |   echo "Failed to extract lattice for utterance $uttid (not present?)" && exit 1;
28 | fstdraw --portrait=true --osymbols=$words $tmpdir/$uttid.fst | dot -T${format} > $tmpdir/$uttid.${format}
29 | 
30 | if [ "$(uname)" == "Darwin" ]; then
31 |     doc_open=open
32 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then
33 |     doc_open=xdg-open
34 | elif [ $mode == "display" ] ; then
35 |         echo "Can not automaticaly open file on your operating system"
36 |         mode=save
37 | fi
38 | 
39 | [ $mode == "display" ] && $doc_open $tmpdir/$uttid.${format}
40 | [[ $mode == "display" && $? -ne 0 ]] && echo "Failed to open ${format} format." && mode=save
41 | [ $mode == "save" ] && echo "Saving to $uttid.${format}" && cp $tmpdir/$uttid.${format} .
42 | 
43 | exit 0
44 | 


--------------------------------------------------------------------------------
/utils/shuffle_list.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | if ($ARGV[0] eq "--srand") {
20 |   $n = $ARGV[1];
21 |   $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\"";
22 |   srand($ARGV[1]);
23 |   shift;
24 |   shift;
25 | } else {
26 |   srand(0); # Gives inconsistent behavior if we don't seed.
27 | }
28 | 
29 | if (@ARGV > 1 || $ARGV[0] =~ m/^-.+/) { # >1 args, or an option we 
30 |   # don't understand.
31 |   print "Usage: shuffle_list.pl [--srand N] [input file]  > output\n";
32 |   print "randomizes the order of lines of input.\n";
33 |   exit(1);
34 | }
35 | 
36 | @lines;
37 | while (<>) {
38 |   push @lines, [ (rand(), $_)] ;
39 | }
40 | 
41 | @lines = sort { $a->[0] cmp $b->[0] } @lines;
42 | foreach $l (@lines) {
43 |     print $l->[1];
44 | }
45 | 


--------------------------------------------------------------------------------
/utils/spk2utt_to_utt2spk.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | while(<>){ 
19 |     @A = split(" ", $_);
20 |     @A > 1 || die "Invalid line in spk2utt file: $_";
21 |     $s = shift @A;
22 |     foreach $u ( @A ) {
23 |         print "$u $s\n";
24 |     }
25 | }
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/utils/subset_data_dir_tr_cv.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # Copyright 2017  Brno University of Technology (Author: Karel Vesely);
 4 | # Apache 2.0
 5 | 
 6 | # This scripts splits 'data' directory into two parts:
 7 | # - training set with 90% of speakers
 8 | # - held-out set with 10% of speakers (cv)
 9 | # (to be used in frame cross-entropy training of 'nnet1' models),
10 | 
11 | # The script also accepts a list of held-out set speakers by '--cv-spk-list'
12 | # (with perturbed data, we pass the list of speakers externally).
13 | # The remaining set of speakers is the the training set.
14 | 
15 | cv_spk_percent=10
16 | cv_spk_list= # To be used with perturbed data,
17 | seed=777
18 | cv_utt_percent= # ignored (compatibility),
19 | . utils/parse_options.sh
20 | 
21 | if [ $# != 3 ]; then
22 |   echo "Usage: $0 [opts] <src-data> <train-data> <cv-data>"
23 |   echo "  --cv-spk-percent N (default 10)"
24 |   echo "  --cv-spk-list <file> (a pre-defined list with cv speakers)"
25 |   exit 1;
26 | fi
27 | 
28 | set -euo pipefail
29 | 
30 | src_data=$1
31 | trn_data=$2
32 | cv_data=$3
33 | 
34 | [ ! -r $src_data/spk2utt ] && echo "Missing '$src_data/spk2utt'. Error!" && exit 1
35 | 
36 | tmp=$(mktemp -d /tmp/${USER}_XXXXX)
37 | 
38 | if [ -z "$cv_spk_list" ]; then
39 |   # Select 'cv_spk_percent' speakers randomly,
40 |   cat $src_data/spk2utt | awk '{ print $1; }' | utils/shuffle_list.pl --srand $seed >$tmp/speakers
41 |   n_spk=$(wc -l <$tmp/speakers)
42 |   n_spk_cv=$(perl -e "print int($cv_spk_percent * $n_spk / 100); ")
43 |   #
44 |   head -n $n_spk_cv $tmp/speakers >$tmp/speakers_cv
45 |   tail -n+$((n_spk_cv+1)) $tmp/speakers >$tmp/speakers_trn
46 | else
47 |   # Use pre-defined list of speakers,
48 |   cp $cv_spk_list $tmp/speakers_cv
49 |   join -v2 <(sort $cv_spk_list) <(awk '{ print $1; }' <$src_data/spk2utt | sort) >$tmp/speakers_trn
50 | fi
51 | 
52 | # Sanity checks,
53 | n_spk=$(wc -l <$src_data/spk2utt)
54 | echo "Speakers, src=$n_spk, trn=$(wc -l <$tmp/speakers_trn), cv=$(wc -l $tmp/speakers_cv)"
55 | overlap=$(join <(sort $tmp/speakers_trn) <(sort $tmp/speakers_cv) | wc -l)
56 | [ $overlap != 0 ] && \
57 |   echo "WARNING, speaker overlap detected!" && \
58 |   join <(sort $tmp/speakers_trn) <(sort $tmp/speakers_cv) | head && \
59 |   echo '...'
60 | 
61 | # Create new data dirs,
62 | utils/data/subset_data_dir.sh --spk-list $tmp/speakers_trn $src_data $trn_data
63 | utils/data/subset_data_dir.sh --spk-list $tmp/speakers_cv $src_data $cv_data
64 | 
65 | 


--------------------------------------------------------------------------------
/utils/subword/prepare_subword_text.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # 2019 Dongji Gao
 4 | 
 5 | # This script generates subword text form word text.
 6 | # For example, <noise> internatioal -> <noise> inter@@ nation@@ al
 7 | # @@ here is the separator indicate the poisition of subword in word.
 8 | # Subword directly followed by separator can only appear at he begining or middle of word.
 9 | # "<noise>" here can be reserved if added to the option "--glossaries"
10 | 
11 | # Begin configuration section
12 | separator="@@"
13 | glossaries=
14 | # End configuration section
15 | 
16 | . utils/parse_options.sh
17 | 
18 | echo "$0 $@"
19 | 
20 | if [ $# -ne 3 ]; then
21 |   echo "Usage: utils/prepare_subword_text.sh <word-text> <pair_code> <subword-text>"
22 |   echo "e.g.: utils/prepare_subword_text.sh data/train/text data/local/pair_code.txt data/train/text_subword"
23 |   echo "    --seperator <separator>         # default: @@"
24 |   echo "    --glossaries <reserved-words>   # glossaries are words reserved"
25 |   exit 1;
26 | fi
27 | 
28 | word_text=$1
29 | pair_code=$2
30 | subword_text=$3
31 | 
32 | [ ! -f $word_text ] && echo "Word text $word_text does not exits." && exit 1;
33 | 
34 | grep -q $separator $word_text && echo "$0: Error, word text file contains separator $separator. This might be a subword text file or you need to choose a different separator" && exit 1;
35 | 
36 | glossaries_opt=
37 | [ -z $glossaires ] && glossaries_opt="--glossaries $glossaries"
38 | cut -d ' ' -f2- $word_text | \
39 |   utils/lang/bpe/apply_bpe.py -c $pair_code --separator $separator $glossaires_opt > ${word_text}.sub
40 |   if [ $word_text == $subword_text ]; then
41 |     mv $word_text ${word_text}.old
42 |     cut -d ' ' -f1 ${word_text}.old | paste -d ' ' - ${word_text}.sub > $subword_text
43 |   else
44 |     cut -d ' ' -f1 $word_text | paste -d ' ' - ${word_text}.sub > $subword_text
45 |   fi
46 | 
47 | rm ${word_text}.sub
48 | echo "Subword text created."
49 | 


--------------------------------------------------------------------------------
/utils/summarize_warnings.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | 
 5 |  @ARGV != 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;
 6 | 
 7 | $dir = $ARGV[0];
 8 | 
 9 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" && exit 1;
10 | 
11 | $dir =~ s:/$::; # Remove trailing slash.
12 | 
13 | 
14 | # Group the files into categories where all have the same base-name.
15 | foreach $f (glob ("$dir/*.log")) {
16 |   $f_category = $f;
17 |   # do next expression twice; s///g doesn't work as they overlap.
18 |   $f_category =~ s:\.\d+\.:.*.:;
19 |   $f_category =~ s:\.\d+\.:.*.:;
20 |   $fmap{$f_category} .= " $f";
21 | }
22 | 
23 | sub split_hundreds { # split list of filenames into groups of 100.
24 |   my $names = shift @_;
25 |   my @A = split(" ", $names);
26 |   my @ans = ();
27 |   while (@A > 0) {
28 |     my $group = "";
29 |     for ($x = 0; $x < 100 && @A>0; $x++) {
30 |       $fname = pop @A;
31 |       $group .= "$fname ";
32 |     }
33 |     push @ans, $group;
34 |   }
35 |   return @ans;
36 | }
37 | 
38 | foreach $c (keys %fmap) {
39 |   $n = 0;
40 |   foreach $fgroup (split_hundreds($fmap{$c})) {
41 |     $n += `grep -w WARNING $fgroup | wc -l`;
42 |   }
43 |   if ($n != 0) {
44 |     print "$n warnings in $c\n"
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/utils/train_arpa_with_kenlm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # 2020 Author Jiayu DU
 4 | # Apache 2.0
 5 | 
 6 | # This script uses kenlm to estimate an arpa model from plain text,
 7 | # it is a resort when you hit memory limit dealing with large corpus
 8 | # kenlm estimates arpa using on-disk structure,
 9 | # as long as you have big enough hard disk, memory shouldn't be a problem.
10 | # by default, kenlm use up to 50% of your local memory,
11 | # you can control this through -S option
12 | 
13 | [ -f path.sh ] && . ./path.sh;
14 | 
15 | kenlm_opts="" # e.g. "-o 4 -S 50% --prune 0 5 7 7"
16 | 
17 | if [ $# != 4 ]; then
18 |   echo "$0 <text> <kaldi_symbol_table> <working_dir> <arpa_name>"
19 |   echo "e.g. $0 train.txt words.txt wdir 4gram"
20 |   exit 1
21 | fi
22 | 
23 | text=$1
24 | symbol_table=$2
25 | dir=$3
26 | arpa_name=$4
27 | 
28 | if ! which lmplz >& /dev/null ; then
29 |   echo "$0: cannot find training tool *lmplz*."
30 |   echo "tools/extras/install_kenlm_query_only.sh installs kenlm at tools/kenlm"
31 |   echo "it only supports runtime mode, to actually train an arpa using KenLM,"
32 |   echo "you need a complete KenLM installation(depends on EIGEN and BOOST),"
33 |   echo "follow KenLM's building instructions at (https://github.com/kpu/kenlm)"
34 |   exit 1
35 | fi
36 | 
37 | # the text should be properly pre-processed, e.g:
38 | #   cleand, normalized and possibly word-segmented
39 | 
40 | # get rid off irrelavent symbols
41 | grep -v '<eps>' $symbol_table \
42 |   | grep -v '#0' \
43 |   | grep -v '<unk>' | grep -v '<UNK>' \
44 |   | grep -v '<s>' | grep -v '</s>' \
45 |   | awk '{print $1}' \
46 |   > $dir/ngram.vocab
47 | 
48 | # To make sure that kenlm & kaldi have strictly the same vocabulary:
49 | # 1. feed vocabulary into kenlm via --limit_vocab_file
50 | # 2. cat vocabulary to training text, so each word at least appear once
51 | # 
52 | # TL;DR reason:
53 | # Unlike SRILM's -limit-vocab, kenlm's --limit_vocab_file option 
54 | # spcifies a *valid* set of vocabulary, whereas *valid but unseen* 
55 | # words are discarded in final arpa.
56 | # So the trick is, 
57 | # we explicitly add kaldi's vocab(one word per line) to training text, 
58 | # making each word appear at least once.
59 | # kenlm never prunes unigram, 
60 | # so this always generates consistent kenlm vocabuary as kaldi has.
61 | # The effect of this is like add-one smoothing to unigram counts,
62 | # shouldn't have significant impacts in practice.
63 | cat $dir/ngram.vocab $text \
64 |   | lmplz $kenlm_opts --limit_vocab_file $dir/ngram.vocab \
65 |   > $dir/${arpa_name}.arpa
66 | 
67 | echo "$0: Done training arpa to: $dir/${arpa_name}.arpa"
68 | 


--------------------------------------------------------------------------------
/utils/utt2spk_to_spk2utt.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # converts an utt2spk file to a spk2utt file.
18 | # Takes input from the stdin or from a file argument;
19 | # output goes to the standard out.
20 | 
21 | if ( @ARGV > 1 ) {
22 |     die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
23 | }
24 | 
25 | while(<>){ 
26 |     @A = split(" ", $_);
27 |     @A == 2 || die "Invalid line in utt2spk file: $_";
28 |     ($u,$s) = @A;
29 |     if(!$seen_spk{$s}) {
30 |         $seen_spk{$s} = 1;
31 |         push @spklist, $s;
32 |     }
33 |     push (@{$spk_hash{$s}}, "$u");
34 | }
35 | foreach $s (@spklist) {
36 |     $l = join(' ',@{$spk_hash{$s}});
37 |     print "$s $l\n";
38 | }
39 | 


--------------------------------------------------------------------------------