├── .gitignore
├── INSTALL
├── LICENSE
├── README.md
├── asr_egs
├── hkust
│ └── v1
│ │ ├── README.md
│ │ ├── RESULTS
│ │ ├── cmd.sh
│ │ ├── conf
│ │ ├── cmu2pinyin
│ │ ├── fbank.conf
│ │ ├── mfcc.conf
│ │ ├── pinyin2cmu
│ │ └── pitch.conf
│ │ ├── local
│ │ ├── hkust_data_prep.sh
│ │ ├── hkust_decode_graph.sh
│ │ ├── hkust_normalize.pl
│ │ ├── hkust_prepare_char_dict.sh
│ │ ├── hkust_prepare_dict.sh
│ │ ├── hkust_prepare_phn_dict.sh
│ │ ├── hkust_segment.py
│ │ ├── hkust_train_lms.sh
│ │ └── score.sh
│ │ ├── path.sh
│ │ ├── run_ctc_char.sh
│ │ ├── run_ctc_phn.sh
│ │ ├── steps
│ │ └── utils
├── librispeech
│ ├── README.md
│ ├── RESULTS
│ ├── cmd.sh
│ ├── config
│ │ ├── README.reduced_dict
│ │ ├── fbconf-10
│ │ ├── fbconf-11
│ │ ├── fbconf-8
│ │ ├── librispeech_phn_reduced_dict.txt
│ │ ├── nnet.proto.nml_seq_fw_seq_tw
│ │ ├── nnet.proto.nml_seq_fw_seq_tw_cascade
│ │ └── nnet.proto.nml_seq_fw_step_2_nml_step_fw_seq_cascade
│ ├── local
│ │ ├── data_prep.sh
│ │ ├── download_and_untar.sh
│ │ ├── download_lm.sh
│ │ ├── ls_decode_graph.sh
│ │ ├── ls_prepare_phoneme_dict.sh
│ │ └── score.sh
│ ├── path.sh
│ ├── run_nml_seq_fw_seq_tw.sh
│ ├── run_nml_seq_fw_step_2_nml_step_fw_seq_cascade.sh
│ ├── steps
│ │ ├── compute_cmvn_stats.sh
│ │ ├── compute_cmvn_stats_mult.sh
│ │ ├── decode_ctc_lat_splicefeat.sh
│ │ ├── make_fbank.sh
│ │ ├── make_fbank_mult.sh
│ │ └── train_ctc_parallel_mult.sh
│ └── utils
│ │ ├── add_lex_disambig.pl
│ │ ├── best_wer.sh
│ │ ├── create_data_link.pl
│ │ ├── ctc_compile_dict_token.sh
│ │ ├── ctc_token_fst.py
│ │ ├── eps2disambig.pl
│ │ ├── filter_scp.pl
│ │ ├── find_arpa_oovs.pl
│ │ ├── fix_data_dir.sh
│ │ ├── int2sym.pl
│ │ ├── make_lexicon_fst.pl
│ │ ├── parse_options.sh
│ │ ├── prep_ctc_trans.py
│ │ ├── remove_oovs.pl
│ │ ├── run.pl
│ │ ├── s2eps.pl
│ │ ├── shuffle_list.pl
│ │ ├── spk2utt_to_utt2spk.pl
│ │ ├── split_data.sh
│ │ ├── split_scp.pl
│ │ ├── subset_data_dir.sh
│ │ ├── subset_data_dir_tr_cv.sh
│ │ ├── sym2int.pl
│ │ ├── utt2spk_to_spk2utt.pl
│ │ └── validate_data_dir.sh
├── swbd
│ └── v1
│ │ ├── README.md
│ │ ├── RESULTS
│ │ ├── cmd.sh
│ │ ├── conf
│ │ ├── char_l5_c640.proto
│ │ ├── fbank.conf
│ │ ├── mfcc.conf
│ │ └── phn_l5_c640.proto
│ │ ├── local
│ │ ├── eval2000_data_prep.sh
│ │ ├── extend_segments.pl
│ │ ├── fisher_map_words.pl
│ │ ├── remove_dup_utts.sh
│ │ ├── score_sclite.sh
│ │ ├── score_sclite_conf.sh
│ │ ├── swbd1_data_prep.sh
│ │ ├── swbd1_decode_graph.sh
│ │ ├── swbd1_map_words.pl
│ │ ├── swbd1_prepare_char_dict.sh
│ │ ├── swbd1_prepare_phn_dict.sh
│ │ └── swbd1_train_lms.sh
│ │ ├── path.sh
│ │ ├── run_ctc_char.sh
│ │ ├── run_ctc_phn.sh
│ │ ├── steps
│ │ └── utils
├── tedlium
│ ├── v1
│ │ ├── RESULTS
│ │ ├── cmd.sh
│ │ ├── conf
│ │ │ ├── fbank.conf
│ │ │ ├── mfcc.conf
│ │ │ └── pitch.conf
│ │ ├── local
│ │ │ ├── join_suffix.py
│ │ │ ├── score_sclite.sh
│ │ │ ├── tedlium_decode_graph.sh
│ │ │ ├── tedlium_download_data.sh
│ │ │ ├── tedlium_prepare_char_dict.sh
│ │ │ ├── tedlium_prepare_data.sh
│ │ │ └── tedlium_prepare_phn_dict.sh
│ │ ├── path.sh
│ │ ├── run_ctc_char.sh
│ │ ├── run_ctc_phn.sh
│ │ ├── steps
│ │ └── utils
│ └── v2-30ms
│ │ ├── RESULTS
│ │ ├── cmd.sh
│ │ ├── conf
│ │ ├── local
│ │ ├── path.sh
│ │ ├── run_ctc_phn.sh
│ │ ├── steps
│ │ └── utils
└── wsj
│ ├── README.md
│ ├── RESULTS
│ ├── cmd.sh
│ ├── conf
│ ├── fbank.conf
│ └── mfcc.conf
│ ├── local
│ ├── find_transcripts.pl
│ ├── flist2scp.pl
│ ├── ndx2flist.pl
│ ├── normalize_transcript.pl
│ ├── score.sh
│ ├── wsj_data_prep.sh
│ ├── wsj_decode_graph.sh
│ ├── wsj_decode_graph_local_lms.sh
│ ├── wsj_expand_vocab.sh
│ ├── wsj_prepare_char_dict.sh
│ ├── wsj_prepare_phn_dict.sh
│ └── wsj_train_lms.sh
│ ├── path.sh
│ ├── run_ctc_char.sh
│ ├── run_ctc_phn.sh
│ ├── steps
│ ├── align_ctc_single_utt.sh
│ ├── compute_cmvn_stats.sh
│ ├── decode_ctc.sh
│ ├── decode_ctc_lat.sh
│ ├── make_fbank.sh
│ ├── make_fbank_pitch.sh
│ ├── train_ctc_parallel.sh
│ ├── train_ctc_parallel_h.sh
│ └── train_ctc_parallel_x3.sh
│ └── utils
│ ├── add_lex_disambig.pl
│ ├── best_wer.sh
│ ├── build_const_arpa_lm.sh
│ ├── convert_ctm.pl
│ ├── create_data_link.pl
│ ├── ctc_compile_dict_token.sh
│ ├── ctc_token_fst.py
│ ├── distribute_scp.pl
│ ├── eps2disambig.pl
│ ├── filter_scp.pl
│ ├── find_arpa_oovs.pl
│ ├── fix_data_dir.sh
│ ├── format_lm_sri.sh
│ ├── int2sym.pl
│ ├── make_lexicon_fst.pl
│ ├── model_topo.py
│ ├── parse_options.sh
│ ├── pinyin_map.pl
│ ├── prep_ctc_trans.py
│ ├── prep_ctc_trans_bkup.py
│ ├── prep_scps.sh
│ ├── queue.pl
│ ├── remove_oovs.pl
│ ├── run.pl
│ ├── run_rocks.pl
│ ├── s2eps.pl
│ ├── shuffle_list.pl
│ ├── slurm.pl
│ ├── slurm_comet.pl
│ ├── spk2utt_to_utt2spk.pl
│ ├── split_data.sh
│ ├── split_scp.pl
│ ├── subset_data_dir.sh
│ ├── subset_data_dir_tr_cv.sh
│ ├── subset_scp.pl
│ ├── sym2int.pl
│ ├── training_trans_fst.py
│ ├── utt2spk_to_spk2utt.pl
│ └── validate_data_dir.sh
├── src
├── Makefile
├── base
│ ├── Makefile
│ ├── io-funcs-inl.h
│ ├── io-funcs-test.cc
│ ├── io-funcs.cc
│ ├── io-funcs.h
│ ├── kaldi-common.h
│ ├── kaldi-error-test.cc
│ ├── kaldi-error.cc
│ ├── kaldi-error.h
│ ├── kaldi-math-test.cc
│ ├── kaldi-math.cc
│ ├── kaldi-math.h
│ ├── kaldi-types.h
│ ├── kaldi-utils.cc
│ ├── kaldi-utils.h
│ ├── timer-test.cc
│ └── timer.h
├── configure
├── cpucompute
│ ├── Makefile
│ ├── Matrix.vcxproj
│ ├── blas.h
│ ├── cblas-wrappers.h
│ ├── compressed-matrix.cc
│ ├── compressed-matrix.h
│ ├── matrix-common.h
│ ├── matrix-functions-inl.h
│ ├── matrix-functions.cc
│ ├── matrix-functions.h
│ ├── matrix-inl.h
│ ├── matrix-lib.h
│ ├── matrix.cc
│ ├── matrix.h
│ ├── vector-inl.h
│ ├── vector.cc
│ └── vector.h
├── decoder
│ ├── Makefile
│ ├── decodable-itf.h
│ ├── decodable-matrix.h
│ ├── decoder-wrappers.cc
│ ├── decoder-wrappers.h
│ ├── faster-decoder.cc
│ ├── faster-decoder.h
│ ├── lattice-faster-decoder.cc
│ └── lattice-faster-decoder.h
├── decoderbin
│ ├── Makefile
│ ├── analyze-counts.cc
│ ├── arpa2fst.cc
│ ├── compute-wer.cc
│ ├── decode-faster.cc
│ ├── latgen-faster.cc
│ ├── lattice-1best.cc
│ ├── lattice-add-penalty.cc
│ ├── lattice-best-path.cc
│ ├── lattice-prune.cc
│ ├── lattice-scale.cc
│ ├── lattice-to-ctm-conf.cc
│ ├── lattice-to-nbest.cc
│ └── nbest-to-ctm.cc
├── feat
│ ├── Makefile
│ ├── cmvn.cc
│ ├── cmvn.h
│ ├── feature-fbank-test.cc
│ ├── feature-fbank.cc
│ ├── feature-fbank.h
│ ├── feature-functions-test.cc
│ ├── feature-functions.cc
│ ├── feature-functions.h
│ ├── feature-mfcc-test.cc
│ ├── feature-mfcc.cc
│ ├── feature-mfcc.h
│ ├── feature-plp-test.cc
│ ├── feature-plp.cc
│ ├── feature-plp.h
│ ├── feature-sdc-test.cc
│ ├── feature-spectrogram.cc
│ ├── feature-spectrogram.h
│ ├── mel-computations.cc
│ ├── mel-computations.h
│ ├── online-feature-itf.h
│ ├── online-feature.cc
│ ├── online-feature.h
│ ├── pitch-functions-test.cc
│ ├── pitch-functions.cc
│ ├── pitch-functions.h
│ ├── resample-test.cc
│ ├── resample.cc
│ ├── resample.h
│ ├── srfft.cc
│ ├── srfft.h
│ ├── wave-reader.cc
│ └── wave-reader.h
├── featbin
│ ├── Makefile
│ ├── add-deltas.cc
│ ├── apply-cmvn.cc
│ ├── compute-cmvn-stats.cc
│ ├── compute-fbank-feats.cc
│ ├── compute-kaldi-pitch-feats.cc
│ ├── compute-mfcc-feats.cc
│ ├── compute-plp-feats.cc
│ ├── copy-feats.cc
│ ├── extract-segments.cc
│ ├── feat-to-dim.cc
│ ├── feat-to-len.cc
│ ├── paste-feats.cc
│ ├── process-kaldi-pitch-feats.cc
│ ├── splice-feats.cc
│ └── subsample-feats.cc
├── fstbin
│ ├── Makefile
│ ├── fstaddselfloops.cc
│ ├── fstaddsubsequentialloop.cc
│ ├── fstcomposecontext.cc
│ ├── fstcopy.cc
│ ├── fstdeterminizelog.cc
│ ├── fstdeterminizestar.cc
│ ├── fstfactor.cc
│ ├── fstisstochastic.cc
│ ├── fstminimizeencoded.cc
│ ├── fstphicompose.cc
│ ├── fstpropfinal.cc
│ ├── fstpushspecial.cc
│ ├── fstrand.cc
│ ├── fstrhocompose.cc
│ ├── fstrmepslocal.cc
│ ├── fstrmsymbols.cc
│ ├── fsts-to-transcripts.cc
│ └── fsttablecompose.cc
├── fstext
│ ├── Makefile
│ ├── context-dep-itf.h
│ ├── context-fst-inl.h
│ ├── context-fst.h
│ ├── deterministic-fst-inl.h
│ ├── deterministic-fst-test.cc
│ ├── deterministic-fst.h
│ ├── determinize-lattice-inl.h
│ ├── determinize-lattice-test.cc
│ ├── determinize-lattice.h
│ ├── determinize-star-inl.h
│ ├── determinize-star-test.cc
│ ├── determinize-star.h
│ ├── epsilon-property-inl.h
│ ├── epsilon-property-test.cc
│ ├── epsilon-property.h
│ ├── factor-inl.h
│ ├── factor-test.cc
│ ├── factor.h
│ ├── fst-test-utils.h
│ ├── fstext-lib.h
│ ├── fstext-utils-inl.h
│ ├── fstext-utils-test.cc
│ ├── fstext-utils.h
│ ├── lattice-utils-inl.h
│ ├── lattice-utils-test.cc
│ ├── lattice-utils.h
│ ├── lattice-weight-test.cc
│ ├── lattice-weight.h
│ ├── pre-determinize-inl.h
│ ├── pre-determinize-test.cc
│ ├── pre-determinize.h
│ ├── prune-special-inl.h
│ ├── prune-special-test.cc
│ ├── prune-special.h
│ ├── push-special-test.cc
│ ├── push-special.cc
│ ├── push-special.h
│ ├── rand-fst.h
│ ├── ref-counter.h
│ ├── remap-leaves.h
│ ├── remove-eps-local-inl.h
│ ├── remove-eps-local-test.cc
│ ├── remove-eps-local.h
│ ├── rescale-inl.h
│ ├── rescale-test.cc
│ ├── rescale.h
│ ├── table-matcher-test.cc
│ ├── table-matcher.h
│ ├── trivial-factor-weight-test.cc
│ └── trivial-factor-weight.h
├── gpucompute
│ ├── Makefile
│ ├── ctc-utils.h
│ ├── cuPrintf.cu
│ ├── cuPrintf.cuh
│ ├── cublas-wrappers.h
│ ├── cuda-array-inl.h
│ ├── cuda-array.h
│ ├── cuda-common.cc
│ ├── cuda-common.h
│ ├── cuda-device.cc
│ ├── cuda-device.h
│ ├── cuda-kernels-wrappers.h
│ ├── cuda-kernels.cu
│ ├── cuda-kernels.h
│ ├── cuda-math.cc
│ ├── cuda-math.h
│ ├── cuda-matrix-inl.h
│ ├── cuda-matrix.cc
│ ├── cuda-matrix.h
│ ├── cuda-matrixdim.h
│ ├── cuda-rand.cc
│ ├── cuda-rand.h
│ ├── cuda-randkernels-wrappers.h
│ ├── cuda-randkernels.cu
│ ├── cuda-randkernels.h
│ ├── cuda-value.h
│ ├── cuda-vector.cc
│ └── cuda-vector.h
├── lat
│ ├── Makefile
│ ├── arctic-weight.h
│ ├── confidence.cc
│ ├── confidence.h
│ ├── determinize-lattice-pruned-test.cc
│ ├── determinize-lattice-pruned.cc
│ ├── determinize-lattice-pruned.h
│ ├── kaldi-lattice-test.cc
│ ├── kaldi-lattice.cc
│ ├── kaldi-lattice.h
│ ├── lattice-functions.cc
│ ├── lattice-functions.h
│ ├── minimize-lattice-test.cc
│ ├── minimize-lattice.cc
│ ├── minimize-lattice.h
│ ├── push-lattice-test.cc
│ ├── push-lattice.cc
│ ├── push-lattice.h
│ ├── sausages.cc
│ └── sausages.h
├── lm
│ ├── Makefile
│ ├── README
│ ├── const-arpa-lm.cc
│ ├── const-arpa-lm.h
│ ├── irstlm.mk
│ ├── kaldi-lm.cc
│ ├── kaldi-lm.h
│ ├── kaldi-lmtable.cc
│ └── kaldi-lmtable.h
├── makefiles
│ ├── common.mk
│ ├── cygwin.mk
│ ├── darwin_10_10.mk
│ ├── darwin_10_11.mk
│ ├── darwin_10_5.mk
│ ├── darwin_10_6.mk
│ ├── darwin_10_7.mk
│ ├── darwin_10_8.mk
│ ├── darwin_10_9.mk
│ ├── default_rules.mk
│ ├── linux_atlas.mk
│ ├── linux_clapack.mk
│ ├── linux_cuda.mk
│ ├── linux_openblas.mk
│ ├── linux_x86_64_cuda.mk
│ └── linux_x86_64_mkl.mk
├── net
│ ├── Makefile
│ ├── affine-trans-layer.h
│ ├── bilstm-layer.h
│ ├── bilstm-parallel-layer.h
│ ├── ce-loss.cc
│ ├── ce-loss.h
│ ├── class-prior.cc
│ ├── class-prior.h
│ ├── communicator.h
│ ├── ctc-loss.cc
│ ├── ctc-loss.h
│ ├── layer.cc
│ ├── layer.h
│ ├── lstm-layer.h
│ ├── lstm-parallel-layer.h
│ ├── net.cc
│ ├── net.h
│ ├── sigmoid-layer.h
│ ├── softmax-layer.h
│ ├── tanh-layer.h
│ ├── train-opts.h
│ ├── trainable-layer.h
│ └── utils-functions.h
├── netbin
│ ├── Makefile
│ ├── format-to-nonparallel.cc
│ ├── net-average.cc
│ ├── net-change-model.cc
│ ├── net-copy.cc
│ ├── net-initialize.cc
│ ├── net-model-info.cc
│ ├── net-output-extract.cc
│ ├── train-ce-parallel.cc
│ ├── train-ce.cc
│ ├── train-ctc-parallel.cc
│ └── train-ctc.cc
└── util
│ ├── Makefile
│ ├── basic-filebuf.h
│ ├── common-utils.h
│ ├── const-integer-set-inl.h
│ ├── const-integer-set-test.cc
│ ├── const-integer-set.h
│ ├── edit-distance-inl.h
│ ├── edit-distance-test.cc
│ ├── edit-distance.h
│ ├── hash-list-inl.h
│ ├── hash-list-test.cc
│ ├── hash-list.h
│ ├── kaldi-holder-inl.h
│ ├── kaldi-holder.h
│ ├── kaldi-io-inl.h
│ ├── kaldi-io-test.cc
│ ├── kaldi-io.cc
│ ├── kaldi-io.h
│ ├── kaldi-pipebuf.h
│ ├── kaldi-table-inl.h
│ ├── kaldi-table-test.cc
│ ├── kaldi-table.cc
│ ├── kaldi-table.h
│ ├── options-itf.h
│ ├── parse-options-test.cc
│ ├── parse-options.cc
│ ├── parse-options.h
│ ├── simple-io-funcs.cc
│ ├── simple-io-funcs.h
│ ├── simple-options-test.cc
│ ├── simple-options.cc
│ ├── simple-options.h
│ ├── stl-utils-test.cc
│ ├── stl-utils.h
│ ├── table-types.h
│ ├── text-utils-test.cc
│ ├── text-utils.cc
│ ├── text-utils.h
│ └── timer.h
└── tools
├── .gitignore
├── CLAPACK
├── README.txt
├── cblas.h
├── clapack.h
└── f2c.h
├── Makefile
├── extras
├── README.txt
├── check_dependencies.sh
├── install_atlas.sh
├── install_irstlm.sh
├── install_openblas.sh
├── install_sctk_patched.sh
├── install_srilm.sh
├── irstlm.patch
├── openfst-1.3.4.patch
├── openfst-1.4.1.patch
├── openfst-1.5.1.patch
├── openfst_gcc41up.patch
├── travis_install_bindeps.sh
├── travis_script.sh
└── travis_show_failures.sh
├── install_atlas.sh
└── install_srilm.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled extensionless executable files in /src/*/
2 | /src/*/*
3 | !/src/*/*.*
4 | !/src/doc/*
5 | !/src/*/Makefile
6 | !/src/*/README
7 |
8 | # Compiled Object files
9 | *.o
10 |
11 | # Compiled Static and Dynamic libraries
12 | *.a
13 | *.so
14 | *.dylib
15 | *.dSYM
16 |
17 | # Make dependencies
18 | .depend.mk
19 |
20 | # /src/
21 | /src/config.mk
22 |
23 | # backups
24 | *~
25 | *.bak
26 | project.xcworkspace
27 |
28 | # swp files
29 | *.swp
30 |
--------------------------------------------------------------------------------
/INSTALL:
--------------------------------------------------------------------------------
1 |
2 | These instructions are valid for UNIX-like systems. Installation on Windows is not supported currently.
3 |
4 | 1. Requirements:
5 |
6 | > cd tools/
7 | > make (this will install most of the requirements)
8 |
9 | Additionally, run install_atlas.sh and install_srilm.sh to install ATLAS and SRILM respectively.
10 |
11 | 2. Installation
12 |
13 | The CUDA library should be installed before installing Eesen. Then Eesen can be installed as:
14 |
15 | > cd src
16 | > ./configure --shared --cudatk-dir=/path/to/cuda_library
17 | > make depend
18 | > make
19 |
20 | EESEN can be built without CUDA (for decoding only, for example)
21 | by omitting the cudatk-dir argument and specifying --use-cuda=no
22 |
23 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/README.md:
--------------------------------------------------------------------------------
1 |
2 | This recipe builds Chinese Mandarin systems with the HKUST Mandarin Telephone Speech corpus.
3 |
4 | You need to obtain the following 2 LDC datasets:
5 |
6 | LDC2005S15 : http://www.ldc.upenn.edu/Catalog/catalogEntry.jsp?catalogId=LDC2005S15
7 |
8 | LDC2005T32 : http://www.ldc.upenn.edu/Catalog/catalogEntry.jsp?catalogId=LDC2005T32
9 |
10 | The recipe run_ctc_char.sh models **3600+ Mandarin characters** directly. Dictionaries are not required.
11 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/RESULTS:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Character Error Rate (CER)
4 |
5 | for x in exp/*/decode*; do [ -d $x ] && grep WER $x/cer_* | utils/best_wer.sh; done
6 | exit 0
7 |
8 | # CTC Characters, with FBank features
9 | %WER 39.70 [ 22295 / 56154, 4074 ins, 1926 del, 16295 sub ] exp/train_char_l5_c320/decode_dev/cer_8
10 | # CTC Characters, with FBank+Pitch features
11 | %WER 38.67 [ 21714 / 56154, 3849 ins, 1974 del, 15891 sub ] exp/train_char_l5_c320_pitch/decode_dev/cer_8
12 |
13 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/cmd.sh:
--------------------------------------------------------------------------------
1 | # "queue.pl" uses qsub. The options to it are
2 | # options to qsub. If you have GridEngine installed,
3 | # change this to a queue you have access to.
4 | # Otherwise, use "run.pl", which will run jobs locally
5 | # (make sure your --num-jobs options are no more than
6 | # the number of cpus on your machine.
7 |
8 | #a) JHU cluster options
9 | #export train_cmd="queue.pl -l arch=*64"
10 | #export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
11 | #export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
12 | #export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
13 | #export cuda_cmd="queue.pl -l gpu=1"
14 |
15 | #c) run it locally... works for CMU rocks cluster
16 | export train_cmd=run.pl
17 | export decode_cmd=run.pl
18 | export cuda_cmd=run.pl
19 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/conf/cmu2pinyin:
--------------------------------------------------------------------------------
1 | AA A
2 | AE A
3 | AH A
4 | AO UO
5 | AW U
6 | AY AI
7 | B B
8 | CH CH
9 | D D
10 | DH S I
11 | EH AI
12 | ER E
13 | EY AI
14 | F F
15 | G G
16 | HH H
17 | IH I
18 | IY I
19 | JH ZH
20 | K K
21 | L L
22 | M M
23 | N N
24 | NG N
25 | OW UO
26 | OY UO
27 | P P
28 | R R
29 | S S
30 | SH SH
31 | T T
32 | TH S
33 | UH U
34 | UW U
35 | V W
36 | W W
37 | Y Y
38 | Z Z
39 | ZH X
40 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/conf/fbank.conf:
--------------------------------------------------------------------------------
1 | --num-mel-bins=40
2 | --sample-frequency=8000
3 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false # only non-default option.
2 | --sample-frequency=8000 # Switchboard is sampled at 8kHz
3 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/conf/pinyin2cmu:
--------------------------------------------------------------------------------
1 | A AA
2 | AI AY
3 | AN AE N
4 | ANG AE NG
5 | AO AW
6 | B B
7 | CH CH
8 | C T S
9 | D D
10 | E ER
11 | EI EY
12 | EN AH N
13 | ENG AH NG
14 | ER AA R
15 | F F
16 | G G
17 | H HH
18 | IA IY AA
19 | IANG IY AE NG
20 | IAN IY AE N
21 | IAO IY AW
22 | IE IY EH
23 | I IY
24 | ING IY NG
25 | IN IY N
26 | IONG IY UH NG
27 | IU IY UH
28 | J J
29 | K K
30 | L L
31 | M M
32 | N N
33 | O AO
34 | ONG UH NG
35 | OU OW
36 | P P
37 | Q Q
38 | R R
39 | SH SH
40 | S S
41 | T T
42 | UAI UW AY
43 | UANG UW AE NG
44 | UAN UW AE N
45 | UA UW AA
46 | UI UW IY
47 | UN UW AH N
48 | UO UW AO
49 | U UW
50 | UE IY EH
51 | VE IY EH
52 | V IY UW
53 | VN IY N
54 | W W
55 | X X
56 | Y Y
57 | ZH JH
58 | Z Z
59 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/conf/pitch.conf:
--------------------------------------------------------------------------------
1 | --sample-frequency=8000
2 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/local/hkust_decode_graph.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 |
4 | if [ -f path.sh ]; then . path.sh; fi
5 |
6 | lm_dir=$1
7 | src_lang=$2
8 | tgt_lang=$3
9 |
10 | arpa_lm=${lm_dir}/3gram-mincount/lm_unpruned.gz
11 | [ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
12 |
13 | rm -rf $tgt_lang
14 | cp -r $src_lang $tgt_lang
15 |
16 | # Compose the language model to FST
17 | gunzip -c "$arpa_lm" | \
18 | grep -v ' ' | \
19 | grep -v ' ' | \
20 | grep -v ' ' | \
21 | arpa2fst - | fstprint | \
22 | utils/remove_oovs.pl /dev/null | \
23 | utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$tgt_lang/words.txt \
24 | --osymbols=$tgt_lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
25 | fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G.fst
26 |
27 |
28 | echo "Checking how stochastic G is (the first of these numbers should be small):"
29 | fstisstochastic $tgt_lang/G.fst
30 |
31 | # Compose the token, lexicon and language-model FST into the final decoding graph
32 | fsttablecompose $tgt_lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
33 | fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
34 | fsttablecompose $tgt_lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
35 |
36 | echo "Composing decoding graph TLG.fst succeeded"
37 | rm -r $tgt_lang/LG.fst # We don't need to keep this intermediate FST
38 |
39 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/local/hkust_normalize.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 | use warnings; #sed replacement for -w perl parameter
3 | # Copyright Chao Weng
4 |
5 | # normalizations for hkust trascript
6 | # see the docs/trans-guidelines.pdf for details
7 |
8 | while () {
9 | @A = split(" ", $_);
10 | print "$A[0] ";
11 | for ($n = 1; $n < @A; $n++) {
12 | $a = $A[$n];
13 | if (($a eq "{breath}")||($a eq "{cough}")||($a eq "{sneeze}")
14 | || ($a eq "{lipsmack}")) {print "[VOCALIZED-NOISE] "; next;}
15 | if (($a eq "{laugh}")) {print "[LAUGHTER] "; next;}
16 | if (($a eq "")) {print "[NOISE] "; next;}
17 | $tmp = $a;
18 | if ($tmp =~ /[^.,?+-]{0,}[.,?+-]+/) { $tmp =~ s:([^.,?+-]{0,})[.,?+-]+:$1:; }
19 | if ($tmp =~ /\~[A-Z]/) { $tmp =~ s:\~([A-Z]):$1:; }
20 | if ($tmp =~ /%\S/) { $tmp =~ s:%(\S):$1:; }
21 | if ($tmp =~ /[a-zA-Z]/) {$tmp=uc($tmp);}
22 | print "$tmp ";
23 | }
24 | print "\n";
25 | }
26 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/local/hkust_prepare_char_dict.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Creates a lexicon in which each word is represented by the sequence of its characters (spelling).
4 |
5 | srcdict=data/local/dict/lexicon.txt
6 | dir=data/local/dict_char
7 | mkdir -p $dir
8 |
9 | [ -f path.sh ] && . ./path.sh
10 |
11 | cat $srcdict | grep -v "!SIL" | grep -v "\[VOCALIZED-NOISE\]" | grep -v "\[NOISE\]" | \
12 | grep -v "\[LAUGHTER\]" | grep -v "\" | \
13 | perl -e 'while(<>){@A = split; if(! $seen{$A[0]}) {$seen{$A[0]} = 1; print $_;}}' \
14 | > $dir/lexicon1.txt || exit 1;
15 |
16 | #cat $phndir/lexicon.txt | grep -v "\[VOCALIZED-NOISE\]" | grep -v "\[NOISE\]" | \
17 | # grep -v "\[LAUGHTER\]" | grep -v "\" \
18 | # > $dir/lexicon1.txt
19 |
20 | unset LC_ALL
21 | cat $dir/lexicon1.txt | awk '{print $1}' | \
22 | perl -e 'use encoding utf8; while(<>){ chop; $str="$_"; foreach $p (split("", $_)) {$str="$str $p"}; print "$str\n";}' \
23 | > $dir/lexicon2.txt
24 |
25 | # Get the set of lexicon units without noises
26 | cut -d' ' -f2- $dir/lexicon2.txt | tr ' ' '\n' | sort -u > $dir/units_nosil.txt
27 |
28 | # Add special noises words & characters into the lexicon.
29 | (echo '[VOCALIZED-NOISE] [VOCALIZED-NOISE]'; echo '[NOISE] [NOISE]'; echo '[LAUGHTER] [LAUGHTER]'; echo ' '; echo ' ';) | \
30 | cat - $dir/lexicon2.txt | sort | uniq > $dir/lexicon3.txt || exit 1;
31 |
32 | cat $dir/lexicon3.txt | sort -u > $dir/lexicon.txt || exit 1;
33 |
34 | # The complete set of lexicon units, indexed by numbers starting from 1
35 | (echo '[VOCALIZED-NOISE]'; echo '[NOISE]'; echo '[LAUGHTER]'; echo ''; echo '';) | cat - $dir/units_nosil.txt | awk '{print $1 " " NR}' > $dir/units.txt
36 |
37 | # Convert phoneme sequences into the corresponding sequences of units indices, encoded by units.txt
38 | utils/sym2int.pl -f 2- $dir/units.txt < $dir/lexicon.txt > $dir/lexicon_numbers.txt
39 |
40 | echo "Character-based dictionary preparation succeeded"
41 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/local/hkust_prepare_phn_dict.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script prepares the phoneme-based lexicon. It also generates the list of lexicon units
4 | # and represents the lexicon using the indices of the units.
5 |
6 | srcdir=data/local/train
7 | dir=data/local/dict_phn
8 | mkdir -p $dir
9 | srcdict=data/local/dict/lexicon.txt
10 |
11 | [ -f path.sh ] && . ./path.sh
12 |
13 | [ ! -f "$srcdict" ] && echo "No such file $srcdict" && exit 1;
14 |
15 | # Raw dictionary preparation
16 | cat $srcdict | grep -v "!SIL" | \
17 | perl -e 'while(<>){@A = split; if(! $seen{$A[0]}) {$seen{$A[0]} = 1; print $_;}}' \
18 | > $dir/lexicon.txt || exit 1;
19 | #awk 'BEGIN{getline}($0 !~ /^#/) {$0=tolower($0); print}' \
20 | # $srcdict | sort | awk '($0 !~ /^[[:space:]]*$/) {print}' | \
21 | # perl -e 'while(<>){ chop; $_=~ s/ +/ /; $_=~ s/\s*$//; print "$_\n";}' \
22 | # > $dir/lexicon1.txt || exit 1;
23 |
24 | # Get the set of lexicon units without noises
25 | cut -d' ' -f2- $dir/lexicon.txt | tr ' ' '\n' | sort -u | awk '{print $1 " " NR}' > $dir/units.txt
26 |
27 | # Convert phoneme sequences into the corresponding sequences of units indices, encoded by units.txt
28 | utils/sym2int.pl -f 2- $dir/units.txt < $dir/lexicon.txt > $dir/lexicon_numbers.txt
29 |
30 | echo "Phoneme-based dictionary preparation succeeded"
31 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/local/hkust_segment.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #coding:utf-8
3 | #!/usr/bin/env python
4 | import sys
5 | from mmseg import seg_txt
6 | for line in sys.stdin:
7 | blks = str.split(line)
8 | out_line = blks[0]
9 | for i in range(1, len(blks)):
10 | if blks[i] == "[VOCALIZED-NOISE]" or blks[i] == "[NOISE]" or blks[i] == "[LAUGHTER]":
11 | out_line += " " + blks[i]
12 | continue
13 | for j in seg_txt(blks[i]):
14 | out_line += " " + j
15 | print out_line
16 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/path.sh:
--------------------------------------------------------------------------------
1 | export EESEN_ROOT=`pwd`/../../..
2 | export PATH=$PWD/utils/:$EESEN_ROOT/src/netbin:$EESEN_ROOT/src/featbin:$EESEN_ROOT/src/decoderbin:$EESEN_ROOT/src/fstbin:$EESEN_ROOT/tools/openfst/bin:$EESEN_ROOT/tools/irstlm/bin/:$PWD:$PATH
3 | export LC_ALL=C
4 |
5 | if [[ `uname -n` =~ comet-* ]]; then
6 | # SDSC Comet cluster
7 | :
8 |
9 | elif [[ `uname -n`=~ bridges ]]; then
10 | # PSC Bridges cluster
11 | :
12 |
13 | elif [[ `uname -n`=~ compute-* ]]; then
14 | # CMU Rocks cluster
15 |
16 | module load gcc-4.9.2
17 | module load cuda-8.0
18 | module load python27
19 |
20 | [ -n "$PBS_JOBID" ] && export CUDA_VISIBLE_DEVICES=`qstat -n $PBS_JOBID|awk 'END {split ($NF, a, "/"); printf ("%s\n", a[2])}'`
21 |
22 | export TMPDIR=/scratch
23 | export LD_LIBRARY_PATH=/data/ASR1/tools/sox-14.4.2/install/lib:$LD_LIBRARY_PATH
24 |
25 | else
26 | echo "Which cluster is this?"
27 | exit 1;
28 | fi
29 |
30 | [ -f ${EESEN_ROOT}/tools/env.sh ] && . ${EESEN_ROOT}/tools/env.sh
31 | [ -f ./local.sh ] && . ./local.sh
32 |
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/steps:
--------------------------------------------------------------------------------
1 | ../../wsj/steps
--------------------------------------------------------------------------------
/asr_egs/hkust/v1/utils:
--------------------------------------------------------------------------------
1 | ../../wsj/utils
--------------------------------------------------------------------------------
/asr_egs/librispeech/README.md:
--------------------------------------------------------------------------------
1 |
2 | This dir contains fully fledged recipes to build end-to-end ASR systems using
3 | the Librispeech 100hr corpus. These scripts will start by downloading
4 | the relevant training data and language models, then start/complete training
5 | and decoding.
6 |
7 | There are two recipes, illustrating max perturbation, stochastic and cascade
8 | dropout combination for a phoneme based system
9 |
10 | run_nml_seq_fw_seq_tw.sh - max perturbation + stochastic dropout combo
11 |
12 | run_nml_seq_fw_step_2_nml_step_fw_seq_cascade.sh - max perturbation + cascade
13 | dropout combo
14 |
15 | NOTE:
16 |
17 | - please create/link exp and tmp directories prior to running scripts.
18 | - these take a *long* time to run.
19 |
20 | For dropout and max perturbation, please cite:
21 | - "Improving LSTM-CTC based ASR performance in domains with limited training data", Jayadev Billa (https://arxiv.org/pdf/1707.00722.pdf)
22 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/RESULTS:
--------------------------------------------------------------------------------
1 | ## Results for NML-sequence + Forward-sequence stochastic combination with max perturbation
2 | $ for x in exp/nml_seq_fw_seq_tw/train_lstm/*decode_*; do [ -d $x ] && grep WER $x/wer_* |utils/best_wer.sh ;done
3 |
4 | # dev_clean
5 | %WER 7.44 [ 4045 / 54402, 490 ins, 383 del, 3172 sub ] exp/nml_seq_fw_seq_tw/train_lstm/decode_dev_clean_tgmed/wer_12_0.5
6 | %WER 7.93 [ 4315 / 54402, 486 ins, 444 del, 3385 sub ] exp/nml_seq_fw_seq_tw/train_lstm/decode_dev_clean_tgsmall/wer_14_1.0
7 | # dev_other
8 | %WER 24.00 [ 12229 / 50948, 1280 ins, 1609 del, 9340 sub ] exp/nml_seq_fw_seq_tw/train_lstm/decode_dev_other_tgmed/wer_10_1.0
9 | %WER 25.05 [ 12762 / 50948, 1345 ins, 1664 del, 9753 sub ] exp/nml_seq_fw_seq_tw/train_lstm/decode_dev_other_tgsmall/wer_9_0.5
10 | # test_clean
11 | %WER 8.15 [ 4286 / 52576, 552 ins, 422 del, 3312 sub ] exp/nml_seq_fw_seq_tw/train_lstm/decode_test_clean_tgmed/wer_11_0.5
12 | %WER 8.67 [ 4557 / 52576, 543 ins, 506 del, 3508 sub ] exp/nml_seq_fw_seq_tw/train_lstm/decode_test_clean_tgsmall/wer_12_1.0
13 | # test_other
14 | %WER 25.08 [ 13130 / 52343, 1319 ins, 1733 del, 10078 sub ] exp/nml_seq_fw_seq_tw/train_lstm/decode_test_other_tgmed/wer_10_1.0
15 | %WER 26.12 [ 13674 / 52343, 1396 ins, 1701 del, 10577 sub ] exp/nml_seq_fw_seq_tw/train_lstm/decode_test_other_tgsmall/wer_10_0.5
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/cmd.sh:
--------------------------------------------------------------------------------
1 | # "queue.pl" uses qsub. The options to it are
2 | # options to qsub. If you have GridEngine installed,
3 | # change this to a queue you have access to.
4 | # Otherwise, use "run.pl", which will run jobs locally
5 | # (make sure your --num-jobs options are no more than
6 | # the number of cpus on your machine.
7 |
8 | #a) JHU cluster options
9 | #export train_cmd="queue.pl -l arch=*64"
10 | #export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
11 | #export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
12 | #export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
13 | #export cuda_cmd="queue.pl -l gpu=1"
14 |
15 | #c) run it locally... works for CMU rocks cluster
16 | export train_cmd=run.pl
17 | export decode_cmd=run.pl
18 | export cuda_cmd=run.pl
19 | # Comet cluster
20 | #export cuda_cmd="slurm_comet.pl -p gpu-shared -t 48:00:00 --gpu 1"
21 |
22 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/config/README.reduced_dict:
--------------------------------------------------------------------------------
1 | The dictionary librispeech_phn_reduced_dict.txt is generated by removing the phoneme stress markers in the original librispeech lexicon. Specific
2 | command to generate this files is below:
3 |
4 | $ perl -nae '$w = shift @F; if(! $seen{$w}) {$seen{$w} = 1; $prn=join(" ", @F); $prn =~ s/(\D)\d(\s*)/$1$2/g; print "$w $prn\n";}' librispeech-lexicon.txt
5 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/config/fbconf-10:
--------------------------------------------------------------------------------
1 | --num-mel-bins=40
2 | --frame-shift=10
3 | --frame-length=25
4 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/config/fbconf-11:
--------------------------------------------------------------------------------
1 | --num-mel-bins=40
2 | --frame-shift=11
3 | --frame-length=25
4 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/config/fbconf-8:
--------------------------------------------------------------------------------
1 | --num-mel-bins=40
2 | --frame-shift=8
3 | --frame-length=25
4 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/config/nnet.proto.nml_seq_fw_seq_tw:
--------------------------------------------------------------------------------
1 |
2 | 360 640 0.1 1.0 50.0 1.0 0.2 T 0.2 T T T
3 | 640 640 0.1 1.0 50.0 1.0 0.2 T 0.2 T T T
4 | 640 640 0.1 1.0 50.0 1.0 0.2 T 0.2 T T T
5 | 640 640 0.1 1.0 50.0 1.0 0.2 T 0.2 T T T
6 | 640 44 0.1
7 | 44 44
8 |
9 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/config/nnet.proto.nml_seq_fw_seq_tw_cascade:
--------------------------------------------------------------------------------
1 |
2 | 360 640 0.1 1.0 50.0 1.0 0.2 T T
3 | 640 640 0.1 1.0 50.0 1.0 0.2 T T
4 | 640 640 0.1 1.0 50.0 1.0 0.2 T T
5 | 640 640 0.1 1.0 50.0 1.0 0.2 T T
6 | 640 44 0.1
7 | 44 44
8 |
9 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/config/nnet.proto.nml_seq_fw_step_2_nml_step_fw_seq_cascade:
--------------------------------------------------------------------------------
1 |
2 | 360 640 0.1 1.0 50.0 1.0 0.2 T 0.2 T T
3 | 640 640 0.1 1.0 50.0 1.0 0.2 T 0.2 T T
4 | 640 640 0.1 1.0 50.0 1.0 0.2 T 0.2 T T
5 | 640 640 0.1 1.0 50.0 1.0 0.2 T 0.2 T T
6 | 640 44 0.1
7 | 44 44
8 |
9 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/path.sh:
--------------------------------------------------------------------------------
1 | export EESEN_ROOT=`pwd`/../..
2 | export PATH=$PWD/utils/:$EESEN_ROOT/src/netbin:$EESEN_ROOT/src/featbin:$EESEN_ROOT/src/decoderbin:$EESEN_ROOT/src/fstbin:$EESEN_ROOT/tools/openfst/bin:$EESEN_ROOT/tools/irstlm/bin/:$PWD:$PATH
3 | export LC_ALL=C
4 |
5 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/utils/best_wer.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright 2010-2011 Microsoft Corporation
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | # To be run from one directory above this script.
19 |
20 | perl -e 'while(<>){
21 | s/\|(\d)/\| $1/g; s/(\d)\|/$1 \|/g;
22 | if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool.
23 | elsif (m: (Mean|Sum/Avg|)\s*\|\s*\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|:
24 | && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } } # sclite.
25 | if (defined $bestline){ print $bestline; } ' | \
26 | awk 'BEGIN{ FS="%WER"; } { if(NF == 2) { print FS$2" "$1; } else { print $0; }}' | \
27 | awk 'BEGIN{ FS="Sum/Avg"; } { if(NF == 2) { print $2" "$1; } else { print $0; }}' | \
28 | awk '{ if($1!~/%WER/) { print "%WER "$9" "$0; } else { print $0; }}' | \
29 | sed -e 's|\s\s*| |g' -e 's|\:$||' -e 's|\:\s*\|\s*$||'
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/utils/ctc_token_fst.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Apache 2.0
4 |
5 | import sys
6 |
7 | fread = open(sys.argv[1], 'r')
8 |
9 | print '0 1 '
10 | print '1 1 '
11 | print '2 2 '
12 | print '2 0 '
13 |
14 | nodeX = 3
15 | for entry in fread.readlines():
16 | entry = entry.replace('\n','').strip()
17 | fields = entry.split(' ')
18 | phone = fields[0]
19 | if phone == '' or phone == '':
20 | continue
21 |
22 | if '#' in phone:
23 | print str(0) + ' ' + str(0) + ' ' + '' + ' ' + phone;
24 | else:
25 | print str(1) + ' ' + str(nodeX) + ' ' + phone + ' ' + phone;
26 | print str(nodeX) + ' ' + str(nodeX) + ' ' + phone + ' ';
27 | print str(nodeX) + ' ' + str(2) + ' ' + ' ';
28 | nodeX += 1
29 | print '0'
30 |
31 | fread.close()
32 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/utils/eps2disambig.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # This script replaces epsilon with #0 on the input side only, of the G.fst
18 | # acceptor.
19 |
20 | while(<>){
21 | s:^(\d+\s+\d+\s+)\(\s+):$1#0$2:;
22 | print;
23 | }
24 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/utils/find_arpa_oovs.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 |
18 | if ( @ARGV < 1 && @ARGV > 2) {
19 | die "Usage: find_arpa_oovs.pl words.txt [lm.arpa]\n";
20 | # This program finds words in the arpa file that are not symbols
21 | # in the OpenFst-format symbol table words.txt. It prints them
22 | # on the standard output, one per line.
23 | }
24 |
25 | $symtab = shift @ARGV;
26 | open(S, "<$symtab") || die "Failed opening symbol table file $symtab\n";
27 | while(){
28 | @A = split(" ", $_);
29 | @A == 2 || die "Bad line in symbol table file: $_";
30 | $seen{$A[0]} = 1;
31 | }
32 |
33 | $curgram=0;
34 | while(<>) { # Find the \data\ marker.
35 | if(m:^\\data\\$:) { last; }
36 | }
37 | while(<>) {
38 | if(m/^\\(\d+)\-grams:\s*$/) {
39 | $curgram = $1;
40 | if($curgram > 1) {
41 | last; # This is an optimization as we can get the vocab from the 1-grams
42 | }
43 | } elsif($curgram > 0) {
44 | @A = split(" ", $_);
45 | if(@A > 1) {
46 | shift @A;
47 | for($n=0;$n<$curgram;$n++) {
48 | $word = $A[$n];
49 | if(!defined $word) { print STDERR "Unusual line $_ (line $.) in arpa file.\n"; }
50 | $in_arpa{$word} = 1;
51 | }
52 | } else {
53 | if(@A > 0 && $A[0] !~ m:\\end\\:) {
54 | print STDERR "Unusual line $_ (line $.) in arpa file\n";
55 | }
56 | }
57 | }
58 | }
59 |
60 | foreach $w (keys %in_arpa) {
61 | if(!defined $seen{$w} && $w ne "" && $w ne "") {
62 | print "$w\n";
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/utils/int2sym.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
3 | # Apache 2.0.
4 |
5 | undef $field_begin;
6 | undef $field_end;
7 |
8 |
9 | if ($ARGV[0] eq "-f") {
10 | shift @ARGV;
11 | $field_spec = shift @ARGV;
12 | if ($field_spec =~ m/^\d+$/) {
13 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
14 | }
15 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
16 | if ($1 ne "") {
17 | $field_begin = $1 - 1; # Change to zero-based indexing.
18 | }
19 | if ($2 ne "") {
20 | $field_end = $2 - 1; # Change to zero-based indexing.
21 | }
22 | }
23 | if (!defined $field_begin && !defined $field_end) {
24 | die "Bad argument to -f option: $field_spec";
25 | }
26 | }
27 | $symtab = shift @ARGV;
28 | if(!defined $symtab) {
29 | print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" .
30 | "options: [-f (|-)]\n" .
31 | "e.g.: -f 2, or -f 3-4\n";
32 | exit(1);
33 | }
34 |
35 | open(F, "<$symtab") || die "Error opening symbol table file $symtab";
36 | while() {
37 | @A = split(" ", $_);
38 | @A == 2 || die "bad line in symbol table file: $_";
39 | $int2sym{$A[1]} = $A[0];
40 | }
41 |
42 | sub int2sym {
43 | my $a = shift @_;
44 | my $pos = shift @_;
45 | if($a !~ m:^\d+$:) { # not all digits..
46 | $pos1 = $pos+1; # make it one-based.
47 | die "int2sym.pl: found noninteger token $a [in position $pos1]\n";
48 | }
49 | $s = $int2sym{$a};
50 | if(!defined ($s)) {
51 | die "int2sym.pl: integer $a not in symbol table $symtab.";
52 | }
53 | return $s;
54 | }
55 |
56 | $error = 0;
57 | while (<>) {
58 | @A = split(" ", $_);
59 | for ($pos = 0; $pos <= $#A; $pos++) {
60 | $a = $A[$pos];
61 | if ( (!defined $field_begin || $pos >= $field_begin)
62 | && (!defined $field_end || $pos <= $field_end)) {
63 | $a = int2sym($a, $pos);
64 | }
65 | print $a . " ";
66 | }
67 | print "\n";
68 | }
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/utils/remove_oovs.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # This script removes lines that contain these OOVs on either the
18 | # third or fourth fields of the line. It is intended to remove arcs
19 | # with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
20 |
21 | if ( @ARGV < 1 && @ARGV > 2) {
22 | die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
23 | }
24 |
25 | $unklist = shift @ARGV;
26 | open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
27 | while(){
28 | @A = split(" ", $_);
29 | @A == 1 || die "Bad line in unknown-symbol list: $_";
30 | $unk{$A[0]} = 1;
31 | }
32 |
33 | $num_removed = 0;
34 | while(<>){
35 | @A = split(" ", $_);
36 | if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
37 | $num_removed++;
38 | } else {
39 | print;
40 | }
41 | }
42 | print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
43 |
44 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/utils/s2eps.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # This script replaces and with (on both input and output sides),
18 | # for the G.fst acceptor.
19 |
20 | while(<>){
21 | @A = split(" ", $_);
22 | if ( @A >= 4 ) {
23 | if ($A[2] eq "" || $A[2] eq "") { $A[2] = ""; }
24 | if ($A[3] eq "" || $A[3] eq "") { $A[3] = ""; }
25 | }
26 | print join("\t", @A) . "\n";
27 | }
28 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/utils/shuffle_list.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | # Copyright 2013 Johns Hopkins University (author: Daniel Povey)
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 |
19 | if ($ARGV[0] eq "--srand") {
20 | $n = $ARGV[1];
21 | $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\"";
22 | srand($ARGV[1]);
23 | shift;
24 | shift;
25 | } else {
26 | srand(0); # Gives inconsistent behavior if we don't seed.
27 | }
28 |
29 | if (@ARGV > 1 || $ARGV[0] =~ m/^-.+/) { # >1 args, or an option we
30 | # don't understand.
31 | print "Usage: shuffle_list.pl [--srand N] [input file] > output\n";
32 | print "randomizes the order of lines of input.\n";
33 | exit(1);
34 | }
35 |
36 | @lines = <>;
37 | @lines = sort { rand() <=> rand() } @lines;
38 | print @lines;
39 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/utils/spk2utt_to_utt2spk.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 |
18 | while(<>){
19 | @A = split(" ", $_);
20 | @A > 1 || die "Invalid line in spk2utt file: $_";
21 | $s = shift @A;
22 | foreach $u ( @A ) {
23 | print "$u $s\n";
24 | }
25 | }
26 |
27 |
28 |
--------------------------------------------------------------------------------
/asr_egs/librispeech/utils/utt2spk_to_spk2utt.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # converts an utt2spk file to a spk2utt file.
18 | # Takes input from the stdin or from a file argument;
19 | # output goes to the standard out.
20 |
21 | if ( @ARGV > 1 ) {
22 | die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
23 | }
24 |
25 | while(<>){
26 | @A = split(" ", $_);
27 | @A == 2 || die "Invalid line in utt2spk file: $_";
28 | ($u,$s) = @A;
29 | if(!$seen_spk{$s}) {
30 | $seen_spk{$s} = 1;
31 | push @spklist, $s;
32 | }
33 | push (@{$spk_hash{$s}}, "$u");
34 | }
35 | foreach $s (@spklist) {
36 | $l = join(' ',@{$spk_hash{$s}});
37 | print "$s $l\n";
38 | }
39 |
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/README.md:
--------------------------------------------------------------------------------
1 |
2 | This dir contains recipes to build Eesen systems on Switchboard.
3 |
4 | At the minimum, you need to obtain 3 LDC datasets:
5 |
6 | LDC97S62 LDC2002S09 LDC2002T43
7 |
8 | [Optional] To build LMs with the Fisher transcripts, you need 2 additional datasets:
9 |
10 | LDC2004T19 LDC2005T19
11 |
12 | There are two recipes, demonstrating different types of CTC labels
13 |
14 | run_ctc_phn.sh - phonemes as CTC labels
15 | run_ctc_char.sh - characters (letters) as CTC labels
16 |
17 |
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/cmd.sh:
--------------------------------------------------------------------------------
1 | # "queue.pl" uses qsub. The options to it are
2 | # options to qsub. If you have GridEngine installed,
3 | # change this to a queue you have access to.
4 | # Otherwise, use "run.pl", which will run jobs locally
5 | # (make sure your --num-jobs options are no more than
6 | # the number of cpus on your machine.
7 |
8 | #a) JHU cluster options
9 | #export train_cmd="queue.pl -l arch=*64"
10 | #export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
11 | #export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
12 | #export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
13 | #export cuda_cmd="queue.pl -l gpu=1"
14 |
15 | #c) run it locally... works for CMU rocks cluster
16 | export train_cmd=run.pl
17 | export decode_cmd=run.pl
18 | export cuda_cmd=run.pl
19 |
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/conf/char_l5_c640.proto:
--------------------------------------------------------------------------------
1 |
2 | 120 640 0.1 1.0 50.0 1.0
3 | 640 640 0.1 1.0 50.0 1.0
4 | 640 640 0.1 1.0 50.0 1.0
5 | 640 640 0.1 1.0 50.0 1.0
6 | 640 640 0.1 1.0 50.0 1.0
7 | 640 47 0.1
8 | 47 47
9 |
10 |
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/conf/fbank.conf:
--------------------------------------------------------------------------------
1 | --num-mel-bins=40
2 | --sample-frequency=8000
3 |
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false # only non-default option.
2 | --sample-frequency=8000 # Switchboard is sampled at 8kHz
3 |
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/conf/phn_l5_c640.proto:
--------------------------------------------------------------------------------
1 |
2 | 120 640 0.1 1.0 50.0 1.0
3 | 640 640 0.1 1.0 50.0 1.0
4 | 640 640 0.1 1.0 50.0 1.0
5 | 640 640 0.1 1.0 50.0 1.0
6 | 640 640 0.1 1.0 50.0 1.0
7 | 640 46 0.1
8 | 46 46
9 |
10 |
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/local/remove_dup_utts.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Remove excess utterances once they appear more than a specified
4 | # number of times with the same transcription, in a data set.
5 | # E.g. useful for removing excess "uh-huh" from training.
6 |
7 | if [ $# != 3 ]; then
8 | echo "Usage: remove_dup_utts.sh max-count src-data-dir dest-data-dir"
9 | exit 1;
10 | fi
11 |
12 | maxcount=$1
13 | srcdir=$2
14 | destdir=$3
15 | mkdir -p $destdir
16 |
17 | [ ! -f $srcdir/text ] && echo "Invalid input directory $srcdir" && exit 1;
18 |
19 | cp $srcdir/* $destdir
20 | cat $srcdir/text | \
21 | perl -e '
22 | $maxcount = shift @ARGV;
23 | @all = ();
24 | $p1 = 103349; $p2 = 71147; $k = 0;
25 | sub random { # our own random number generator: predictable.
26 | $k = ($k + $p1) % $p2;
27 | return ($k / $p2);
28 | }
29 | while(<>) {
30 | push @all, $_;
31 | @A = split(" ", $_);
32 | shift @A;
33 | $text = join(" ", @A);
34 | $count{$text} ++;
35 | }
36 | foreach $line (@all) {
37 | @A = split(" ", $line);
38 | shift @A;
39 | $text = join(" ", @A);
40 | $n = $count{$text};
41 | if ($n < $maxcount || random() < ($maxcount / $n)) {
42 | print $line;
43 | }
44 | }' $maxcount >$destdir/text
45 |
46 | echo "Reduced number of utterances from `cat $srcdir/text | wc -l` to `cat $destdir/text | wc -l`"
47 |
48 | echo "Using fix_data_dir.sh to reconcile the other files."
49 | utils/fix_data_dir.sh $destdir
50 | rm -r $destdir/.backup
51 |
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/local/swbd1_decode_graph.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script compiles the ARPA-formatted language models into FSTs. Finally it composes the LM, lexicon
4 | # and token FSTs together into the decoding graph.
5 |
6 | . ./path.sh || exit 1;
7 |
8 | langdir=$1
9 | lexicon=$2
10 |
11 | order=3
12 | lm_suffix="tg" # We only use the trigram LMs. You can compile the 4gram ones similarly
13 | srilm_opts="-subset -prune-lowprobs -unk -tolower -order $order"
14 |
15 | # The SWBD LM
16 | LM=data/local/lm/sw1.o${order}g.kn.gz
17 | outlangdir=${langdir}_sw1_$lm_suffix
18 | utils/format_lm_sri.sh --srilm-opts "$srilm_opts" $langdir $LM $lexicon $outlangdir
19 |
20 | # Compose the final decoding graph. The composition of L.fst and G.fst is determinized and
21 | # minimized.
22 | fsttablecompose ${langdir}/L.fst $outlangdir/G.fst | fstdeterminizestar --use-log=true | \
23 | fstminimizeencoded | fstarcsort --sort_type=ilabel > $outlangdir/LG.fst || exit 1;
24 | fsttablecompose ${langdir}/T.fst $outlangdir/LG.fst > $outlangdir/TLG.fst || exit 1;
25 | rm -rf $outlangdir/LG.fst
26 |
27 | # The SWBD+Fisher LM
28 | outlangdir=${langdir}_sw1_fsh_${lm_suffix}pr
29 | # Prune the LM using IRSTLM
30 | export PATH=$PATH:../../../tools/irstlm/bin/
31 | prune-lm --threshold=1e-7 data/local/lm/sw1_fsh.o${order}g.kn.gz /dev/stdout \
32 | | gzip -c > data/local/lm/sw1_fsh.o${order}g.pr1-7.kn.gz || exit 1
33 |
34 | LM=data/local/lm/sw1_fsh.o${order}g.pr1-7.kn.gz
35 | utils/format_lm_sri.sh --srilm-opts "$srilm_opts" $langdir $LM $lexicon $outlangdir
36 |
37 | fsttablecompose ${langdir}/L.fst $outlangdir/G.fst | fstdeterminizestar --use-log=true | \
38 | fstminimizeencoded | fstarcsort --sort_type=ilabel > $outlangdir/LG.fst || exit 1;
39 | fsttablecompose ${langdir}/T.fst $outlangdir/LG.fst > $outlangdir/TLG.fst || exit 1;
40 | rm -rf $outlangdir/LG.fst
41 |
42 | echo "Composing decoding graph TLG.fst succeeded"
43 |
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/local/swbd1_map_words.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | # Modified from swbd_map_words.pl in Kaldi s5 recipe to make pattern
4 | # matches case-insensitive --Arnab (Jan 2013)
5 |
6 | if ($ARGV[0] eq "-f") {
7 | shift @ARGV;
8 | $field_spec = shift @ARGV;
9 | if ($field_spec =~ m/^\d+$/) {
10 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
11 | }
12 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
13 | if ($1 ne "") {
14 | $field_begin = $1 - 1; # Change to zero-based indexing.
15 | }
16 | if ($2 ne "") {
17 | $field_end = $2 - 1; # Change to zero-based indexing.
18 | }
19 | }
20 | if (!defined $field_begin && !defined $field_end) {
21 | die "Bad argument to -f option: $field_spec";
22 | }
23 | }
24 |
25 |
26 | while (<>) {
27 | @A = split(" ", $_);
28 | for ($n = 0; $n < @A; $n++) {
29 | $a = $A[$n];
30 | if ( (!defined $field_begin || $n >= $field_begin)
31 | && (!defined $field_end || $n <= $field_end)) {
32 | # e.g. [LAUGHTER-STORY] -> STORY;
33 | $a =~ s:(|\-)^\[LAUGHTER-(.+)\](|\-)$:$1$2$3:i;
34 | # $1 and $3 relate to preserving trailing "-"
35 | $a =~ s:^\[(.+)/.+\](|\-)$:$1$2:; # e.g. [IT'N/ISN'T] -> IT'N ... note,
36 | # 1st part may include partial-word stuff, which we process further below,
37 | # e.g. [LEM[GUINI]-/LINGUINI]
38 | # the (|\_) at the end is to accept and preserve trailing -'s.
39 | $a =~ s:^(|\-)\[[^][]+\](.+)$:-$2:; # e.g. -[AN]Y , note \047 is quote;
40 | # let the leading - be optional on input, as sometimes omitted.
41 | $a =~ s:^(.+)\[[^][]+\](|\-)$:$1-:; # e.g. AB[SOLUTE]- -> AB-;
42 | # let the trailing - be optional on input, as sometimes omitted.
43 | $a =~ s:([^][]+)\[.+\]$:$1:; # e.g. EX[SPECIALLY]-/ESPECIALLY] -> EX-
44 | # which is a mistake in the input.
45 | $a =~ s:^\{(.+)\}$:$1:; # e.g. {YUPPIEDOM} -> YUPPIEDOM
46 | $a =~ s:[A-Z]\[([^][])+\][A-Z]:$1-$3:i; # e.g. AMMU[N]IT- -> AMMU-IT-
47 | $a =~ s:_\d$::; # e.g. THEM_1 -> THEM
48 | }
49 | $A[$n] = $a;
50 | }
51 | print join(" ", @A) . "\n";
52 | }
53 |
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/local/swbd1_prepare_char_dict.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Creates a lexicon in which each word is represented by the sequence of its characters (spelling).
4 |
5 | phndir=data/local/dict_phn
6 | dir=data/local/dict_char
7 | mkdir -p $dir
8 |
9 | [ -f path.sh ] && . ./path.sh
10 |
11 | # Use the word list of the phoneme-based lexicon. Create the lexicon using characters.
12 | local/swbd1_map_words.pl -f 1 $phndir/lexicon1.txt | awk '{print $1}' | \
13 | perl -e 'while(<>){ chop; $str="$_"; foreach $p (split("", $_)) {$str="$str $p"}; print "$str\n";}' \
14 | > $dir/lexicon1.txt
15 |
16 | # Get the set of lexicon units without noises
17 | cut -d' ' -f2- $dir/lexicon1.txt | tr ' ' '\n' | sort -u > $dir/units_nosil.txt
18 |
19 | # Add special noises words & characters into the lexicon.
20 | (echo '[vocalized-noise] [vocalized-noise]'; echo '[noise] [noise]'; echo '[laughter] [laughter]'; echo ' '; echo ' ';) | \
21 | cat - $dir/lexicon1.txt | sort | uniq > $dir/lexicon2.txt || exit 1;
22 |
23 | cat $dir/lexicon2.txt | sort -u > $dir/lexicon.txt || exit 1;
24 |
25 | # The complete set of lexicon units, indexed by numbers starting from 1
26 | (echo '[vocalized-noise]'; echo '[noise]'; echo '[laughter]'; echo ''; echo '';) | cat - $dir/units_nosil.txt | awk '{print $1 " " NR}' > $dir/units.txt
27 |
28 | # Convert phoneme sequences into the corresponding sequences of units indices, encoded by units.txt
29 | utils/sym2int.pl -f 2- $dir/units.txt < $dir/lexicon.txt > $dir/lexicon_numbers.txt
30 |
31 | echo "Character-based dictionary (word spelling) preparation succeeded"
32 |
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/local/swbd1_prepare_phn_dict.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script prepares the phoneme-based lexicon. It also generates the list of lexicon units
4 | # and represents the lexicon using the indices of the units.
5 |
6 | srcdir=data/local/train
7 | dir=data/local/dict_phn
8 | mkdir -p $dir
9 | srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text
10 |
11 | [ -f path.sh ] && . ./path.sh
12 |
13 | . utils/parse_options.sh
14 |
15 | [ ! -f "$srcdict" ] && echo "No such file $srcdict" && exit 1;
16 |
17 | # Raw dictionary preparation (lower-case, remove comments)
18 | awk 'BEGIN{getline}($0 !~ /^#/) {$0=tolower($0); print}' \
19 | $srcdict | sort | awk '($0 !~ /^[[:space:]]*$/) {print}' | \
20 | perl -e 'while(<>){ chop; $_=~ s/ +/ /; $_=~ s/\s*$//; print "$_\n";}' \
21 | > $dir/lexicon1.txt || exit 1;
22 |
23 | # Get the set of lexicon units without noises
24 | cut -d' ' -f2- $dir/lexicon1.txt | tr ' ' '\n' | sort -u > $dir/units_nosil.txt
25 |
26 | # Add the noises etc. to the lexicon. No silence is added.
27 | (echo '[vocalized-noise] spn'; echo '[noise] nsn'; echo '[laughter] lau'; echo ' spn'; ) | \
28 | cat - $dir/lexicon1.txt | sort | uniq > $dir/lexicon2.txt || exit 1;
29 |
30 | local/swbd1_map_words.pl -f 1 $dir/lexicon2.txt | sort -u > $dir/lexicon.txt || exit 1;
31 |
32 | # The complete set of lexicon units, indexed by numbers starting from 1
33 | (echo 'spn'; echo 'nsn'; echo 'lau';) | cat - $dir/units_nosil.txt | awk '{print $1 " " NR}' > $dir/units.txt
34 |
35 | # Convert phoneme sequences into the corresponding sequences of units indices, encoded by units.txt
36 | utils/sym2int.pl -f 2- $dir/units.txt < $dir/lexicon.txt > $dir/lexicon_numbers.txt
37 |
38 | echo "Phoneme-based dictionary preparation succeeded"
39 |
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/path.sh:
--------------------------------------------------------------------------------
1 | export EESEN_ROOT=`pwd`/../../..
2 | export PATH=$PWD/utils/:$EESEN_ROOT/src/netbin:$EESEN_ROOT/src/featbin:$EESEN_ROOT/src/decoderbin:$EESEN_ROOT/src/fstbin:$EESEN_ROOT/tools/openfst/bin:$PWD:$PATH
3 | export LC_ALL=C
4 | export LD_LIBRARY_PATH=$EESEN_ROOT/tools/openfst/lib:$LD_LIBRARY_PATH
5 |
6 | . $EESEN_ROOT/tools/env.sh
7 |
8 | if [[ `uname -n` =~ comet-* ]]; then
9 | # SDSC Comet cluster
10 | export TMPDIR=/scratch/$USER/$SLURM_JOBID
11 |
12 | elif [[ `uname -n` =~ br0* ]]; then
13 | # PSC Bridges cluster
14 | export TMPDIR=$LOCAL
15 |
16 | elif [[ `uname -n` =~ compute-* ]]; then
17 | # CMU Rocks cluster
18 | module load python27
19 | module load gcc-4.9.2
20 | export TMPDIR=/scratch
21 | fi
22 |
23 | if [[ ! -z ${acwt+x} ]]; then
24 | # let's assume we're decoding
25 | export PATH=$EESEN_ROOT/src-nogpu/netbin:$PATH
26 | echo "Preferring non-gpu netbin code"
27 | fi
28 |
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/steps:
--------------------------------------------------------------------------------
1 | ../../wsj/steps
--------------------------------------------------------------------------------
/asr_egs/swbd/v1/utils:
--------------------------------------------------------------------------------
1 | ../../wsj/utils
--------------------------------------------------------------------------------
/asr_egs/tedlium/v1/RESULTS:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | for x in exp/*/decode*; do [ -d $x ] && grep Sum $x/score_*/*.sys | utils/best_wer.sh; done 2>/dev/null
4 |
5 | %WER 19.6 | 507 17792 | 84.8 12.4 2.8 4.3 19.6 94.7 | exp/train_phn_l5_c320_s20/decode_dev/score_7/dev.ctm.filt.sys
6 | %WER 19.2 | 1155 27512 | 85.6 11.6 2.8 4.8 19.2 92.1 | exp/train_phn_l5_c320_s20/decode_test/score_8/test.ctm.filt.sys
7 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v1/cmd.sh:
--------------------------------------------------------------------------------
1 | # "queue.pl" uses qsub. The options to it are
2 | # options to qsub. If you have GridEngine installed,
3 | # change this to a queue you have access to.
4 | # Otherwise, use "run.pl", which will run jobs locally
5 | # (make sure your --num-jobs options are no more than
6 | # the number of cpus on your machine.
7 |
8 | #a) JHU cluster options
9 | #export train_cmd="queue.pl -l arch=*64"
10 | #export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
11 | #export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
12 | #export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
13 | #export cuda_cmd="queue.pl -l gpu=1"
14 |
15 | #c) run it locally... works for CMU rocks cluster
16 | export train_cmd=run.pl
17 | export decode_cmd=run.pl
18 | export cuda_cmd=run.pl
19 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v1/conf/fbank.conf:
--------------------------------------------------------------------------------
1 | --num-mel-bins=40
2 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v1/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false # only non-default option.
2 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v1/conf/pitch.conf:
--------------------------------------------------------------------------------
1 | --sample-frequency=16000
2 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v1/local/join_suffix.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Copyright 2014 Nickolay V. Shmyrev
4 | # Apache 2.0
5 |
6 |
7 | import sys
8 |
9 | words = set()
10 | for line in open(sys.argv[1]):
11 | items = line.split()
12 | words.add(items[0])
13 |
14 | for line in sys.stdin:
15 | items = line.split()
16 | new_items = []
17 | i = 1
18 | while i < len(items):
19 | if i < len(items) - 1 and items[i+1][0] == '\'' and items[i] + items[i+1] in words:
20 | new_items.append(items[i] + items[i+1])
21 | i = i + 1
22 | else:
23 | new_items.append(items[i])
24 | i = i + 1
25 |
26 | print items[0], " ".join(new_items)
27 |
28 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v1/local/tedlium_decode_graph.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This script compiles the ARPA-formatted language models into FSTs. Finally it composes the LM, lexicon
4 | # and token FSTs together into the decoding graph.
5 |
6 | . ./path.sh || exit 1;
7 |
8 | arpa_lm=db/cantab-TEDLIUM/cantab-TEDLIUM-pruned.lm3.gz
9 | oov_list=/dev/null
10 |
11 | . parse_options.sh || exit 1;
12 |
13 | langdir=$1
14 |
15 | [ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
16 |
17 | outlangdir=${langdir}_test
18 |
19 | rm -rf $outlangdir
20 | cp -r $langdir $outlangdir
21 |
22 | gunzip -c "$arpa_lm" | \
23 | grep -v ' ' | \
24 | grep -v ' ' | \
25 | grep -v ' ' | \
26 | arpa2fst - | fstprint | \
27 | utils/remove_oovs.pl $oov_list | \
28 | utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$outlangdir/words.txt \
29 | --osymbols=$outlangdir/words.txt --keep_isymbols=false --keep_osymbols=false | \
30 | fstrmepsilon | fstarcsort --sort_type=ilabel > $outlangdir/G.fst
31 |
32 | # Compose the final decoding graph. The composition of L.fst and G.fst is determinized and
33 | # minimized.
34 | fsttablecompose $outlangdir/L.fst $outlangdir/G.fst | fstdeterminizestar --use-log=true | \
35 | fstminimizeencoded | fstarcsort --sort_type=ilabel > $outlangdir/LG.fst || exit 1;
36 | fsttablecompose $outlangdir/T.fst $outlangdir/LG.fst > $outlangdir/TLG.fst || exit 1;
37 | rm -rf $outlangdir/LG.fst
38 |
39 | echo "Composing decoding graph TLG.fst succeeded"
40 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v1/local/tedlium_download_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2014 Nickolay V. Shmyrev
4 | # 2014 Brno University of Technology (Author: Karel Vesely)
5 | # Apache 2.0
6 |
7 | mkdir -p db
8 | pushd db
9 |
10 | # TED-LIUM database:
11 | if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
12 | ln -s /export/corpora5/TEDLIUM_release1
13 | elif [[ $(hostname -f) == compute-*.local ]]; then
14 | ln -s /data/MM1/corpora/TEDLIUM_release* .
15 | else
16 | if [ ! -f TEDLIUM_release1.tar.gz ]; then
17 | wget -q http://www.openslr.org/resources/7/TEDLIUM_release1.tar.gz || exit 1
18 | tar xf TEDLIUM_release1.tar.gz
19 | fi
20 | if [ ! -f TEDLIUM_release2.tar.gz ]; then
21 | wget -q http://www.openslr.org/resources/19/TEDLIUM_release2.tar.gz || exit 1;
22 | tar xf TEDLIUM_release2.tar.gz
23 | fi
24 | fi
25 |
26 | # Language models (Cantab Research):
27 | if [ ! -d cantab-TEDLIUM ]; then
28 | echo "Downloading \"http://cantabresearch.com/cantab-TEDLIUM.tar.bz2\". "
29 | wget --no-verbose --output-document=- http://cantabresearch.com/cantab-TEDLIUM.tar.bz2 | bzcat | tar --extract --file=- || exit 1
30 | gzip cantab-TEDLIUM/cantab-TEDLIUM-pruned.lm3
31 | gzip cantab-TEDLIUM/cantab-TEDLIUM-unpruned.lm4
32 | fi
33 |
34 | popd
35 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v1/local/tedlium_prepare_char_dict.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2015 Yajie Miao
4 | # Apache 2.0
5 |
6 | # Creates a lexicon in which each word is represented by the sequence of its characters (spelling).
7 |
8 | phndir=data/local/dict_phn
9 | dir=data/local/dict_char
10 | mkdir -p $dir
11 |
12 | [ -f path.sh ] && . ./path.sh
13 |
14 | # Use the word list of the phoneme-based lexicon. Create the lexicon using characters.
15 | cat $phndir/lexicon_words.txt | awk '{print $1}' | \
16 | perl -e 'while(<>){ chop; $str="$_"; foreach $p (split("", $_)) {$str="$str $p"}; print "$str\n";}' \
17 | > $dir/lexicon_words.txt
18 |
19 | # Get the set of lexicon units without noises
20 | cut -d' ' -f2- $dir/lexicon_words.txt | tr ' ' '\n' | sort -u > $dir/units_nosil.txt
21 |
22 | # Add special noises words & characters into the lexicon.
23 | ( echo '[BREATH] [BREATH]'; echo '[NOISE] [NOISE]'; echo '[COUGH] [COUGH]';
24 | echo '[SMACK] [SMACK]'; echo '[UM] [UM]'; echo '[UH] [UH]'; echo ' '; echo ' ';) | \
25 | cat - $dir/lexicon_words.txt | sort | uniq > $dir/lexicon.txt || exit 1;
26 |
27 | # The complete set of lexicon units, indexed by numbers starting from 1
28 | (echo '[BREATH]'; echo '[NOISE]'; echo '[COUGH]'; echo '[SMACK]';
29 | echo '[UM]'; echo '[UH]'; echo ''; echo ''; ) | cat - $dir/units_nosil.txt | awk '{print $1 " " NR}' > $dir/units.txt
30 |
31 | # Convert phoneme sequences into the corresponding sequences of units indices, encoded by units.txt
32 | utils/sym2int.pl -f 2- $dir/units.txt < $dir/lexicon.txt > $dir/lexicon_numbers.txt
33 |
34 | echo "Character-based dictionary (word spelling) preparation succeeded"
35 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v1/local/tedlium_prepare_phn_dict.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright 2015 Yajie Miao
4 | # Apache 2.0
5 | #
6 |
7 | dir=data/local/dict_phn
8 | mkdir -p $dir
9 |
10 | srcdict=db/cantab-TEDLIUM/cantab-TEDLIUM.dct
11 |
12 | . parse_options.sh
13 |
14 | [ ! -r $srcdict ] && echo "Missing $srcdict" && exit 1
15 |
16 | # Join dicts and fix some troubles
17 | cat $srcdict | grep -v "" | grep -v "" | LANG= LC_ALL= sort | sed 's:([0-9])::g' > $dir/lexicon_words.txt
18 |
19 | # Get the set of lexicon units without noises
20 | cut -d' ' -f2- $dir/lexicon_words.txt | tr ' ' '\n' | sort -u > $dir/units_nosil.txt
21 |
22 | # Add to the lexicon the silences, noises etc.
23 | (echo '[BREATH] BRH'; echo '[NOISE] NSN'; echo '[COUGH] CGH';
24 | echo '[SMACK] SMK'; echo '[UM] UM'; echo '[UH] UHH'
25 | echo ' NSN' ) | \
26 | cat - $dir/lexicon_words.txt | sort | uniq > $dir/lexicon.txt
27 |
28 | # The complete set of lexicon units, indexed by numbers starting from 1
29 | ( echo BRH; echo CGH; echo NSN ; echo SMK; echo UM; echo UHH ) | cat - $dir/units_nosil.txt | awk '{print $1 " " NR}' > $dir/units.txt
30 |
31 | # Convert phoneme sequences into the corresponding sequences of units indices, encoded by units.txt
32 | utils/sym2int.pl -f 2- $dir/units.txt < $dir/lexicon.txt > $dir/lexicon_numbers.txt
33 |
34 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v1/path.sh:
--------------------------------------------------------------------------------
1 | export EESEN_ROOT=`pwd`/../../..
2 | export PATH=$PWD/utils/:$EESEN_ROOT/src/netbin:$EESEN_ROOT/src/featbin:$EESEN_ROOT/src/decoderbin:$EESEN_ROOT/src/fstbin:$EESEN_ROOT/tools/openfst/bin:$EESEN_ROOT/tools/irstlm/bin/:$PWD:$PATH
3 | export LC_ALL=C
4 |
5 | if [[ `uname -n` =~ compute- ]]; then
6 | # CMu Rocks cluster
7 | module load python27
8 | module load gcc-4.9.2
9 | fi
10 |
11 | if [[ ! -z ${acwt+x} ]]; then
12 | # let's assume we're decoding
13 | export PATH=$EESEN_ROOT/src-nogpu/netbin:$PATH
14 | echo "Preferring non-gpu netbin code"
15 | fi
16 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v1/steps:
--------------------------------------------------------------------------------
1 | ../../wsj/steps
--------------------------------------------------------------------------------
/asr_egs/tedlium/v1/utils:
--------------------------------------------------------------------------------
1 | ../../wsj/utils
--------------------------------------------------------------------------------
/asr_egs/tedlium/v2-30ms/RESULTS:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | for x in exp/*/decode*; do [ -d $x ] && grep Sum $x/score_*/*.sys | utils/best_wer.sh; done 2>/dev/null
4 |
5 | %WER 19.6 | 507 17792 | 84.8 12.4 2.8 4.3 19.6 94.7 | exp/train_phn_l5_c320_s20/decode_dev/score_7/dev.ctm.filt.sys
6 | %WER 19.2 | 1155 27512 | 85.6 11.6 2.8 4.8 19.2 92.1 | exp/train_phn_l5_c320_s20/decode_test/score_8/test.ctm.filt.sys
7 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v2-30ms/cmd.sh:
--------------------------------------------------------------------------------
1 | # "queue.pl" uses qsub. The options to it are
2 | # options to qsub. If you have GridEngine installed,
3 | # change this to a queue you have access to.
4 | # Otherwise, use "run.pl", which will run jobs locally
5 | # (make sure your --num-jobs options are no more than
6 | # the number of cpus on your machine.
7 |
8 | #a) JHU cluster options
9 | #export train_cmd="queue.pl -l arch=*64"
10 | #export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
11 | #export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
12 | #export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
13 | #export cuda_cmd="queue.pl -l gpu=1"
14 |
15 | #c) run it locally... works for CMU rocks cluster
16 | export train_cmd=run.pl
17 | export decode_cmd=run.pl
18 | export cuda_cmd=run.pl
19 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v2-30ms/conf:
--------------------------------------------------------------------------------
1 | ../v1/conf
--------------------------------------------------------------------------------
/asr_egs/tedlium/v2-30ms/local:
--------------------------------------------------------------------------------
1 | ../v1/local
--------------------------------------------------------------------------------
/asr_egs/tedlium/v2-30ms/path.sh:
--------------------------------------------------------------------------------
1 | export EESEN_ROOT=`pwd`/../../..
2 | export PATH=$PWD/utils/:$EESEN_ROOT/src/netbin:$EESEN_ROOT/src/netbin:$EESEN_ROOT/src/featbin:$EESEN_ROOT/src/decoderbin:$EESEN_ROOT/src/fstbin:$EESEN_ROOT/tools/openfst/bin:$EESEN_ROOT/tools/irstlm/bin/:$PWD:$PATH
3 | export LC_ALL=C
4 |
5 | if [[ `uname -n` =~ ip-* ]]; then
6 | # AWS instance
7 | export KALDI_ROOT=/home/fmetze/tools/kaldi
8 | export TMPDIR=/tmp
9 |
10 | elif [[ `uname -n` =~ comet* ]]; then
11 | # comet cluster
12 | module load atlas
13 | module load lapack
14 |
15 | export TMPDIR=/scratch/${USER}/${SLURM_JOBID}
16 |
17 | elif [[ `uname -n` =~ compute- ]]; then
18 | # CMU Rocks cluster
19 | module load python27
20 | module load gcc-4.9.2
21 |
22 | export TMPDIR=/scratch
23 |
24 | # just in case we're running on a GPU node
25 | export CUDA_VISIBLE_DEVICES=`qstat -n $PBS_JOBID|awk ' END { split ($NF, a, "/"); printf ("%s\n", a[2]) } '`
26 |
27 | else
28 | # continue running on local node or VM
29 | echo "Running locally"
30 | fi
31 |
32 | if [[ ! -z ${acwt+x} ]]; then
33 | # let's assume we're decoding
34 | export PATH=$EESEN_ROOT/src-nogpu/netbin:$PATH
35 | echo "Preferring non-gpu netbin code"
36 | fi
37 |
--------------------------------------------------------------------------------
/asr_egs/tedlium/v2-30ms/steps:
--------------------------------------------------------------------------------
1 | ../../wsj/steps
--------------------------------------------------------------------------------
/asr_egs/tedlium/v2-30ms/utils:
--------------------------------------------------------------------------------
1 | ../../wsj/utils
--------------------------------------------------------------------------------
/asr_egs/wsj/README.md:
--------------------------------------------------------------------------------
1 |
2 | This dir contains fully fledged recipes to build end-to-end ASR systems using
3 | the Wall Street Journal (WSJ) corpus.
4 |
5 | You need to obtain the WSJ dataset from LDC to run this example. The LDC catalog
6 | numbers are LDC93S6B and LDC94S13B.
7 |
8 | There are two recipes, demonstrating different types of CTC labels
9 |
10 | run_ctc_phn.sh - phonemes as CTC labels
11 | run_ctc_char.sh - characters (letters) as CTC labels
12 |
13 |
--------------------------------------------------------------------------------
/asr_egs/wsj/RESULTS:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
4 | exit 0
5 |
6 | # CTC Phoneme
7 | %WER 11.39 [ 938 / 8234, 119 ins, 127 del, 692 sub ] exp/train_phn_l4_c320/decode_dev93_tgpr/wer_0.8
8 | %WER 7.87 [ 444 / 5643, 77 ins, 29 del, 338 sub ] exp/train_phn_l4_c320/decode_eval92_tgpr/wer_0.9
9 |
10 | %WER 10.87 [ 895 / 8234, 126 ins, 115 del, 654 sub ] exp/train_phn_l4_c320/decode_dev93_tg/wer_0.8
11 | %WER 7.28 [ 411 / 5643, 58 ins, 38 del, 315 sub ] exp/train_phn_l4_c320/decode_eval92_tg/wer_0.7
12 |
13 | # CTC Character - Basic Vocabulary
14 | %WER 14.23 [ 1172 / 8234, 151 ins, 138 del, 883 sub ] exp/train_char_l4_c320/decode_dev93_tgpr/wer_0.6
15 | %WER 9.07 [ 512 / 5643, 73 ins, 37 del, 402 sub ] exp/train_char_l4_c320/decode_eval92_tgpr/wer_0.6
16 |
17 | %WER 13.59 [ 1119 / 8234, 128 ins, 157 del, 834 sub ] exp/train_char_l4_c320/decode_dev93_tg/wer_0.5
18 | %WER 8.49 [ 479 / 5643, 63 ins, 34 del, 382 sub ] exp/train_char_l4_c320/decode_eval92_tg/wer_0.6
19 |
20 | # CTC Character - Expanded Vocabulary
21 | %WER 12.12 [ 998 / 8234, 89 ins, 138 del, 771 sub ] exp/train_char_l4_c320/decode_dev93_tgpr_larger/wer_0.6
22 | %WER 7.34 [ 414 / 5643, 44 ins, 31 del, 339 sub ] exp/train_char_l4_c320/decode_eval92_tgpr_larger/wer_0.6
23 |
24 | %WER 10.98 [ 904 / 8234, 82 ins, 122 del, 700 sub ] exp/train_char_l4_c320/decode_dev93_tg_larger/wer_0.6
25 | %WER 6.70 [ 378 / 5643, 38 ins, 30 del, 310 sub ] exp/train_char_l4_c320/decode_eval92_tg_larger/wer_0.6
26 |
27 |
28 | # Results of hybrid DNN models for comparisons.
29 | # Note that it is not fair to compare these CTC results with the Kaldi released hybrid systems because:
30 | # 1) the Kaldi models use FMLLR features, whereas our CTC models use FBANK features
31 | # 2) the Kaldi DNN numbers are reported over an expanded dictionary, while our models use the original dict.
32 | # To rule out all these variances, we train a hybrid DNN model using the FBANK features, and decode the model
33 | # with the original dict. DNN input features are 11 frames of 40-dim FBANKs.
34 | # DNN model has 6 hidden layers, each has 1024 units. Pre-trained with RBM. This hybrid DNN has approximately
35 | # the same number of parameters (9.2 million) as our CTC models (8.5 million).
36 | %WER 9.91 [ 816 / 8234, 160 ins, 80 del, 576 sub ] exp/dnn4_pretrain-dbn-fbank_dnn_V2/decode_tgpr_dev93/wer_11
37 | %WER 7.14 [ 403 / 5643, 100 ins, 17 del, 286 sub ] exp/dnn4_pretrain-dbn-fbank_dnn_V2/decode_tgpr_eval92/wer_10
38 |
39 |
--------------------------------------------------------------------------------
/asr_egs/wsj/cmd.sh:
--------------------------------------------------------------------------------
1 | # "queue.pl" uses qsub. The options to it are
2 | # options to qsub. If you have GridEngine installed,
3 | # change this to a queue you have access to.
4 | # Otherwise, use "run.pl", which will run jobs locally
5 | # (make sure your --num-jobs options are no more than
6 | # the number of cpus on your machine.
7 |
8 | #a) JHU cluster options
9 | #export train_cmd="queue.pl -l arch=*64"
10 | #export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
11 | #export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
12 | #export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
13 | #export cuda_cmd="queue.pl -l gpu=1"
14 |
15 | #c) run it locally... works for CMU rocks cluster
16 | export train_cmd=run.pl
17 | export decode_cmd=run.pl
18 | export cuda_cmd=run.pl
19 | # Comet cluster
20 | #export cuda_cmd="slurm_comet.pl -p gpu-shared -t 48:00:00 --gpu 1"
21 |
22 |
--------------------------------------------------------------------------------
/asr_egs/wsj/conf/fbank.conf:
--------------------------------------------------------------------------------
1 | --num-mel-bins=40
2 |
--------------------------------------------------------------------------------
/asr_egs/wsj/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 | --use-energy=false # only non-default option.
2 |
--------------------------------------------------------------------------------
/asr_egs/wsj/local/find_transcripts.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 |
18 |
19 | # This program takes on its standard input a list of utterance
20 | # id's, one for each line. (e.g. 4k0c030a is a an utterance id).
21 | # It takes as
22 | # Extracts from the dot files the transcripts for a given
23 | # dataset (represented by a file list).
24 | #
25 |
26 | @ARGV == 1 || die "find_transcripts.pl dot_files_flist < utterance_ids > transcripts";
27 | $dot_flist = shift @ARGV;
28 |
29 | open(L, "<$dot_flist") || die "Opening file list of dot files: $dot_flist\n";
30 | while(){
31 | chop;
32 | m:\S+/(\w{6})00.dot: || die "Bad line in dot file list: $_";
33 | $spk = $1;
34 | $spk2dot{$spk} = $_;
35 | }
36 |
37 |
38 |
39 | while(){
40 | chop;
41 | $uttid = $_;
42 | $uttid =~ m:(\w{6})\w\w: || die "Bad utterance id $_";
43 | $spk = $1;
44 | if($spk ne $curspk) {
45 | %utt2trans = { }; # Don't keep all the transcripts in memory...
46 | $curspk = $spk;
47 | $dotfile = $spk2dot{$spk};
48 | defined $dotfile || die "No dot file for speaker $spk\n";
49 | open(F, "<$dotfile") || die "Error opening dot file $dotfile\n";
50 | while() {
51 | $_ =~ m:(.+)\((\w{8})\)\s*$: || die "Bad line $_ in dot file $dotfile (line $.)\n";
52 | $trans = $1;
53 | $utt = $2;
54 | $utt2trans{$utt} = $trans;
55 | }
56 | }
57 | if(!defined $utt2trans{$uttid}) {
58 | print STDERR "No transcript for utterance $uttid (current dot file is $dotfile)\n";
59 | } else {
60 | print "$uttid $utt2trans{$uttid}\n";
61 | }
62 | }
63 |
64 |
65 |
--------------------------------------------------------------------------------
/asr_egs/wsj/local/flist2scp.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 |
18 | # takes in a file list with lines like
19 | # /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
20 | # and outputs an scp in kaldi format with lines like
21 | # 4k0c030a /mnt/matylda2/data/WSJ1/13-16.1/wsj1/si_dt_20/4k0/4k0c030a.wv1
22 | # (the first thing is the utterance-id, which is the same as the basename of the file.
23 |
24 |
25 | while(<>){
26 | m:^\S+/(\w+)\.[wW][vV]1$: || die "Bad line $_";
27 | $id = $1;
28 | $id =~ tr/A-Z/a-z/; # Necessary because of weirdness on disk 13-16.1 (uppercase filenames)
29 | print "$id $_";
30 | }
31 |
32 |
--------------------------------------------------------------------------------
/asr_egs/wsj/local/score.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Apache 2.0
3 |
4 | [ -f ./path.sh ] && . ./path.sh
5 |
6 | # begin configuration section.
7 | cmd=run.pl
8 | stage=0
9 | min_acwt=5
10 | max_acwt=10
11 | acwt_factor=0.1 # the scaling factor for the acoustic scale. The scaling factor for acoustic likelihoods
12 | # needs to be 0.5 ~1.0. However, the job submission script can only take integers as the
13 | # job marker. That's why we set the acwt to be integers (5 ~ 10), but scale them with 0.1
14 | # when they are actually used.
15 | #end configuration section.
16 |
17 | [ -f ./path.sh ] && . ./path.sh
18 | . parse_options.sh || exit 1;
19 |
20 | if [ $# -ne 3 ]; then
21 | echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] "
22 | echo " Options:"
23 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes."
24 | echo " --min_acwt # minumum LM-weight for lattice rescoring "
25 | echo " --max_acwt # maximum LM-weight for lattice rescoring "
26 | exit 1;
27 | fi
28 |
29 | data=$1
30 | lang_or_graph=$2
31 | dir=$3
32 |
33 | symtab=$lang_or_graph/words.txt
34 |
35 | for f in $symtab $dir/lat.1.gz $data/text; do
36 | [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
37 | done
38 |
39 | mkdir -p $dir/scoring/log
40 |
41 | cat $data/text | sed 's:::g' | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt
42 |
43 | $cmd ACWT=$min_acwt:$max_acwt $dir/scoring/log/best_path.ACWT.log \
44 | lattice-scale --acoustic-scale=ACWT --ascale-factor=$acwt_factor "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
45 | lattice-best-path --word-symbol-table=$symtab ark:- ark,t:$dir/scoring/ACWT.tra || exit 1;
46 |
47 | cat $data/text | sed 's:::g' | sed 's:::g' | sed 's:::g' > $dir/scoring/text_filt
48 |
49 | for acwt in `seq $min_acwt $max_acwt`; do
50 | cat $dir/scoring/${acwt}.tra | utils/int2sym.pl -f 2- $symtab | \
51 | sed 's:::g' | sed 's:::g' | sed 's:::g' | \
52 | compute-wer --text --mode=present ark:$dir/scoring/text_filt ark,p:- >& $dir/wer_$acwt || exit 1;
53 | done
54 |
55 | exit 0;
56 |
--------------------------------------------------------------------------------
/asr_egs/wsj/path.sh:
--------------------------------------------------------------------------------
1 | export EESEN_ROOT=`pwd`/../..
2 | export PATH=$PWD/utils/:$EESEN_ROOT/src/netbin:$EESEN_ROOT/src/featbin:$EESEN_ROOT/src/decoderbin:$EESEN_ROOT/src/fstbin:$EESEN_ROOT/tools/openfst/bin:$EESEN_ROOT/tools/extras/irstlm/bin/:$PWD:$PATH
3 | export LC_ALL=C
4 |
5 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/best_wer.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Copyright 2010-2011 Microsoft Corporation
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | # To be run from one directory above this script.
19 |
20 | perl -e 'while(<>){
21 | s/\|(\d)/\| $1/g; s/(\d)\|/$1 \|/g;
22 | if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool.
23 | elsif (m: (Mean|Sum/Avg|)\s*\|\s*\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|:
24 | && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } } # sclite.
25 | if (defined $bestline){ print $bestline; } ' | \
26 | awk 'BEGIN{ FS="%WER"; } { if(NF == 2) { print FS$2" "$1; } else { print $0; }}' | \
27 | awk 'BEGIN{ FS="Sum/Avg"; } { if(NF == 2) { print $2" "$1; } else { print $0; }}' | \
28 | awk '{ if($1!~/%WER/) { print "%WER "$9" "$0; } else { print $0; }}' | \
29 | sed -e 's|\s\s*| |g' -e 's|\:$||' -e 's|\:\s*\|\s*$||'
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/build_const_arpa_lm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2014 Guoguo Chen
4 | # Apache 2.0
5 |
6 | # This script reads in an Arpa format language model, and converts it into the
7 | # ConstArpaLm format language model.
8 |
9 | # begin configuration section
10 | # end configuration section
11 |
12 | [ -f path.sh ] && . ./path.sh;
13 |
14 | . utils/parse_options.sh
15 |
16 | if [ $# != 3 ]; then
17 | echo "Usage: "
18 | echo " $0 [options] "
19 | echo "e.g.:"
20 | echo " $0 data/local/lm/3-gram.full.arpa.gz data/lang/ data/lang_test_tgmed"
21 | echo "Options"
22 | exit 1;
23 | fi
24 |
25 | export LC_ALL=C
26 |
27 | arpa_lm=$1
28 | old_lang=$2
29 | new_lang=$3
30 |
31 | mkdir -p $new_lang
32 |
33 | mkdir -p $new_lang
34 | cp -r $old_lang/* $new_lang
35 |
36 |
37 | unk=`cat $new_lang/oov.int`
38 | bos=`grep "" $new_lang/words.txt | awk '{print $2}'`
39 | eos=`grep "" $new_lang/words.txt | awk '{print $2}'`
40 | if [[ -z $bos || -z $eos ]]; then
41 | echo "$0: and symbols are not in $new_lang/words.txt"
42 | exit 1
43 | fi
44 |
45 |
46 | arpa-to-const-arpa --bos-symbol=$bos \
47 | --eos-symbol=$eos --unk-symbol=$unk \
48 | "gunzip -c $arpa_lm | utils/map_arpa_lm.pl $new_lang/words.txt|" $new_lang/G.carpa || exit 1;
49 |
50 | exit 0;
51 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/ctc_token_fst.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Apache 2.0
4 |
5 | import sys
6 |
7 | fread = open(sys.argv[1], 'r')
8 |
9 | print '0 1 '
10 | print '1 1 '
11 | print '2 2 '
12 | print '2 0 '
13 |
14 | nodeX = 3
15 | for entry in fread.readlines():
16 | entry = entry.replace('\n','').strip()
17 | fields = entry.split(' ')
18 | phone = fields[0]
19 | if phone == '' or phone == '':
20 | continue
21 |
22 | if '#' in phone:
23 | print str(0) + ' ' + str(0) + ' ' + '' + ' ' + phone;
24 | else:
25 | print str(1) + ' ' + str(nodeX) + ' ' + phone + ' ' + phone;
26 | print str(nodeX) + ' ' + str(nodeX) + ' ' + phone + ' ';
27 | print str(nodeX) + ' ' + str(2) + ' ' + ' ';
28 | nodeX += 1
29 | print '0'
30 |
31 | fread.close()
32 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/distribute_scp.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl -w
2 |
3 | # Copyright 2015 Hang Su. Apache 2.0.
4 |
5 | # This script split an scp list either by length of the frames or in round-robin manner
6 |
7 | $mode = 'frame';
8 | if ($ARGV[0] eq '--mode') {
9 | shift @ARGV;
10 | $mode = shift @ARGV;
11 | }
12 |
13 | $num_jobs = $ARGV[0]; shift;
14 | $base_filename = $ARGV[0]; shift;
15 |
16 | @num_frames = (0) x $num_jobs;
17 |
18 | foreach $i (1..$num_jobs) {
19 | local *FILE;
20 | open(FILE, "> $base_filename.$i.scp") || die;
21 | push(@file_handles, *FILE);
22 | }
23 |
24 | $count = 0;
25 | while (<>) {
26 | chomp;
27 | if ($mode eq "utt") {
28 | $id = ($count % $num_jobs) ;
29 | print {$file_handles[$id]} $_,"\n";
30 | } elsif ($mode eq "frame") {
31 | @A = split /\s+/;
32 | $id_min = 0;
33 | $num_frames[$id_min] < $num_frames[$_] or $id_min = $_ for 1 .. $#num_frames; # find the smallest index
34 | $id = $id_min;
35 | $num_frames[$id_min] += $A[1];
36 | print {$file_handles[$id]} $A[0],"\n";
37 | } else {
38 | die "Un-recognized mode $mode!";
39 | }
40 | $count += 1;
41 | }
42 |
43 | $id_min = 0;
44 | $num_frames[$id_min] < $num_frames[$_] or $id_min = $_ for 1 .. $#num_frames; # find the smallest index
45 | print "$num_frames[$id_min]";
46 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/eps2disambig.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # This script replaces epsilon with #0 on the input side only, of the G.fst
18 | # acceptor.
19 |
20 | while(<>){
21 | s:^(\d+\s+\d+\s+)\(\s+):$1#0$2:;
22 | print;
23 | }
24 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/find_arpa_oovs.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 |
18 | if ( @ARGV < 1 && @ARGV > 2) {
19 | die "Usage: find_arpa_oovs.pl words.txt [lm.arpa]\n";
20 | # This program finds words in the arpa file that are not symbols
21 | # in the OpenFst-format symbol table words.txt. It prints them
22 | # on the standard output, one per line.
23 | }
24 |
25 | $symtab = shift @ARGV;
26 | open(S, "<$symtab") || die "Failed opening symbol table file $symtab\n";
27 | while(){
28 | @A = split(" ", $_);
29 | @A == 2 || die "Bad line in symbol table file: $_";
30 | $seen{$A[0]} = 1;
31 | }
32 |
33 | $curgram=0;
34 | while(<>) { # Find the \data\ marker.
35 | if(m:^\\data\\$:) { last; }
36 | }
37 | while(<>) {
38 | if(m/^\\(\d+)\-grams:\s*$/) {
39 | $curgram = $1;
40 | if($curgram > 1) {
41 | last; # This is an optimization as we can get the vocab from the 1-grams
42 | }
43 | } elsif($curgram > 0) {
44 | @A = split(" ", $_);
45 | if(@A > 1) {
46 | shift @A;
47 | for($n=0;$n<$curgram;$n++) {
48 | $word = $A[$n];
49 | if(!defined $word) { print STDERR "Unusual line $_ (line $.) in arpa file.\n"; }
50 | $in_arpa{$word} = 1;
51 | }
52 | } else {
53 | if(@A > 0 && $A[0] !~ m:\\end\\:) {
54 | print STDERR "Unusual line $_ (line $.) in arpa file\n";
55 | }
56 | }
57 | }
58 | }
59 |
60 | foreach $w (keys %in_arpa) {
61 | if(!defined $seen{$w} && $w ne "" && $w ne "") {
62 | print "$w\n";
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/int2sym.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
3 | # Apache 2.0.
4 |
5 | undef $field_begin;
6 | undef $field_end;
7 |
8 |
9 | if ($ARGV[0] eq "-f") {
10 | shift @ARGV;
11 | $field_spec = shift @ARGV;
12 | if ($field_spec =~ m/^\d+$/) {
13 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
14 | }
15 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
16 | if ($1 ne "") {
17 | $field_begin = $1 - 1; # Change to zero-based indexing.
18 | }
19 | if ($2 ne "") {
20 | $field_end = $2 - 1; # Change to zero-based indexing.
21 | }
22 | }
23 | if (!defined $field_begin && !defined $field_end) {
24 | die "Bad argument to -f option: $field_spec";
25 | }
26 | }
27 | $symtab = shift @ARGV;
28 | if(!defined $symtab) {
29 | print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" .
30 | "options: [-f (|-)]\n" .
31 | "e.g.: -f 2, or -f 3-4\n";
32 | exit(1);
33 | }
34 |
35 | open(F, "<$symtab") || die "Error opening symbol table file $symtab";
36 | while() {
37 | @A = split(" ", $_);
38 | @A == 2 || die "bad line in symbol table file: $_";
39 | $int2sym{$A[1]} = $A[0];
40 | }
41 |
42 | sub int2sym {
43 | my $a = shift @_;
44 | my $pos = shift @_;
45 | if($a !~ m:^\d+$:) { # not all digits..
46 | $pos1 = $pos+1; # make it one-based.
47 | die "int2sym.pl: found noninteger token $a [in position $pos1]\n";
48 | }
49 | $s = $int2sym{$a};
50 | if(!defined ($s)) {
51 | die "int2sym.pl: integer $a not in symbol table $symtab.";
52 | }
53 | return $s;
54 | }
55 |
56 | $error = 0;
57 | while (<>) {
58 | @A = split(" ", $_);
59 | for ($pos = 0; $pos <= $#A; $pos++) {
60 | $a = $A[$pos];
61 | if ( (!defined $field_begin || $pos >= $field_begin)
62 | && (!defined $field_end || $pos <= $field_end)) {
63 | $a = int2sym($a, $pos);
64 | }
65 | print $a . " ";
66 | }
67 | print "\n";
68 | }
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/prep_scps.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | {
3 | # Copyright 2015 Hang Su
4 | # Apache 2.0
5 |
6 | # This script prepares feature scp file for CTC training
7 |
8 | set -e
9 | set -o pipefail
10 |
11 | ## Begin configuration section
12 | clean_up=true
13 | seed=
14 | cmd=
15 | nj=1
16 | # End of configuration
17 |
18 | echo "$0 $@" # Print the command line for logging
19 |
20 | [ -f ./path.sh ] && . ./path.sh;
21 |
22 | . utils/parse_options.sh || exit 1;
23 |
24 | if [ $# != 6 ]; then
25 | echo "Usage: "
26 | echo " e.g.: "
27 | exit 1
28 | fi
29 |
30 | feat_tr=$1
31 | feat_cv=$2
32 | num_sequence=$3
33 | frame_num_limit=$4
34 | tmpdir=$5
35 | dir=$6
36 |
37 | for part in tr cv; do
38 | feat=$(eval echo "\$feat_${part}")
39 |
40 | feat-to-len scp:$feat ark,t:- | sort -k2 -n | \
41 | awk -v num_sequence=$num_sequence -v frame_num_limit=$frame_num_limit '
42 | BEGIN {max_frame_num = 0; num_utts = 0;}
43 | {
44 | printf("%s ",$1);
45 | num_utts++;
46 | if (max_frame_num < $2) {
47 | max_frame_num = $2;
48 | }
49 | if (num_utts >= num_sequence || num_utts * max_frame_num > frame_num_limit) {
50 | printf("\n");
51 | num_utts = 0;
52 | max_frame_num = 0;
53 | }
54 | }' | utils/shuffle_list.pl --srand ${seed:-777} > $dir/batch.$part.list
55 |
56 | split_batches=""
57 | for n in $(seq $nj); do
58 | split_batches="$split_batches $tmpdir/batch.$part.$n.list"
59 | done
60 | utils/split_scp.pl $dir/batch.$part.list $split_batches
61 |
62 | for n in $(seq $nj); do
63 | awk '
64 | NR==FNR {a[$1]=$2;next}
65 | {
66 | for (i=1; i<=NF; i++) {
67 | printf("%s %s\n", $i, a[$i]);
68 | }
69 | }' $feat $tmpdir/batch.$part.$n.list > $tmpdir/batch.$part.$n.scp
70 | done
71 | if [ $nj -ne 1 ]; then
72 | $cmd JOB=1:$nj $dir/log/prepare_feats_$part.JOB.log \
73 | copy-feats scp:$tmpdir/batch.$part.JOB.scp ark,scp:$tmpdir/feats_$part.JOB.ark,$dir/feats_$part.JOB.scp
74 | else
75 | copy-feats scp:$tmpdir/batch.$part.1.scp ark,scp:$tmpdir/feats_$part.1.ark,$dir/feats_$part.1.scp
76 | fi
77 |
78 | done
79 |
80 | }
81 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/remove_oovs.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # This script removes lines that contain these OOVs on either the
18 | # third or fourth fields of the line. It is intended to remove arcs
19 | # with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
20 |
21 | if ( @ARGV < 1 && @ARGV > 2) {
22 | die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
23 | }
24 |
25 | $unklist = shift @ARGV;
26 | open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
27 | while(){
28 | @A = split(" ", $_);
29 | @A == 1 || die "Bad line in unknown-symbol list: $_";
30 | $unk{$A[0]} = 1;
31 | }
32 |
33 | $num_removed = 0;
34 | while(<>){
35 | @A = split(" ", $_);
36 | if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
37 | $num_removed++;
38 | } else {
39 | print;
40 | }
41 | }
42 | print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
43 |
44 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/s2eps.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # This script replaces and with (on both input and output sides),
18 | # for the G.fst acceptor.
19 |
20 | while(<>){
21 | @A = split(" ", $_);
22 | if ( @A >= 4 ) {
23 | if ($A[2] eq "" || $A[2] eq "") { $A[2] = ""; }
24 | if ($A[3] eq "" || $A[3] eq "") { $A[3] = ""; }
25 | }
26 | print join("\t", @A) . "\n";
27 | }
28 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/shuffle_list.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | # Copyright 2013 Johns Hopkins University (author: Daniel Povey)
4 |
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 |
19 | if ($ARGV[0] eq "--srand") {
20 | $n = $ARGV[1];
21 | $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\"";
22 | srand($ARGV[1]);
23 | shift;
24 | shift;
25 | } else {
26 | srand(0); # Gives inconsistent behavior if we don't seed.
27 | }
28 |
29 | if (@ARGV > 1 || $ARGV[0] =~ m/^-.+/) { # >1 args, or an option we
30 | # don't understand.
31 | print "Usage: shuffle_list.pl [--srand N] [input file] > output\n";
32 | print "randomizes the order of lines of input.\n";
33 | exit(1);
34 | }
35 |
36 | @lines = <>;
37 | @lines = sort { rand() <=> rand() } @lines;
38 | print @lines;
39 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/spk2utt_to_utt2spk.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 |
18 | while(<>){
19 | @A = split(" ", $_);
20 | @A > 1 || die "Invalid line in spk2utt file: $_";
21 | $s = shift @A;
22 | foreach $u ( @A ) {
23 | print "$u $s\n";
24 | }
25 | }
26 |
27 |
28 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/training_trans_fst.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Apache 2.0
4 |
5 | import sys
6 |
7 | fread = open(sys.argv[1], 'r')
8 |
9 | for entry in fread.readlines():
10 | entry = entry.replace('\n','').strip()
11 | fields = entry.split(' ')
12 | uttid = fields[0]
13 |
14 | for n in range(1, len(fields)):
15 | print str(n-1) + ' ' + str(n) + ' ' + fields[n] + ' ' + fields[n]
16 |
17 | print str(n) + ' ' + '0' + ' ' + '0' + ' ' + '0' # assume that is 0 in words.txt
18 |
19 | print '0'
20 |
21 | fread.close()
22 |
--------------------------------------------------------------------------------
/asr_egs/wsj/utils/utt2spk_to_spk2utt.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | # Copyright 2010-2011 Microsoft Corporation
3 |
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # converts an utt2spk file to a spk2utt file.
18 | # Takes input from the stdin or from a file argument;
19 | # output goes to the standard out.
20 |
21 | if ( @ARGV > 1 ) {
22 | die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
23 | }
24 |
25 | while(<>){
26 | @A = split(" ", $_);
27 | @A == 2 || die "Invalid line in utt2spk file: $_";
28 | ($u,$s) = @A;
29 | if(!$seen_spk{$s}) {
30 | $seen_spk{$s} = 1;
31 | push @spklist, $s;
32 | }
33 | push (@{$spk_hash{$s}}, "$u");
34 | }
35 | foreach $s (@spklist) {
36 | $l = join(' ',@{$spk_hash{$s}});
37 | print "$s $l\n";
38 | }
39 |
--------------------------------------------------------------------------------
/src/base/Makefile:
--------------------------------------------------------------------------------
1 |
2 | all:
3 |
4 | include ../config.mk
5 |
6 | TESTFILES = kaldi-math-test io-funcs-test kaldi-error-test
7 |
8 | OBJFILES = kaldi-math.o kaldi-error.o io-funcs.o kaldi-utils.o
9 |
10 | LIBNAME = base
11 |
12 | ADDLIBS =
13 |
14 | include ../makefiles/default_rules.mk
15 |
16 |
--------------------------------------------------------------------------------
/src/base/kaldi-common.h:
--------------------------------------------------------------------------------
1 | // base/kaldi-common.h
2 |
3 | // Copyright 2009-2011 Microsoft Corporation
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 | #ifndef KALDI_BASE_KALDI_COMMON_H_
21 | #define KALDI_BASE_KALDI_COMMON_H_ 1
22 |
23 | #include
24 | #include
25 | #include // C string stuff like strcpy
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 |
34 | #include "base/kaldi-utils.h"
35 | #include "base/kaldi-error.h"
36 | #include "base/kaldi-types.h"
37 | #include "base/io-funcs.h"
38 | #include "base/kaldi-math.h"
39 |
40 | #endif // KALDI_BASE_KALDI_COMMON_H_
41 |
42 |
--------------------------------------------------------------------------------
/src/base/kaldi-error-test.cc:
--------------------------------------------------------------------------------
1 | // base/kaldi-error-test.cc
2 |
3 | // Copyright 2009-2011 Microsoft Corporation
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 |
21 | #include "base/kaldi-common.h"
22 |
23 | // testing that we get the stack trace.
24 | namespace eesen {
25 |
26 | void MyFunction2() {
27 | KALDI_ERR << "Ignore this error";
28 | }
29 |
30 | void MyFunction1() {
31 | MyFunction2();
32 | }
33 |
34 | void UnitTestError() {
35 | {
36 | std::cerr << "Ignore next error:\n";
37 | MyFunction1();
38 | }
39 | }
40 |
41 |
42 | } // end namespace eesen.
43 |
44 | int main() {
45 | eesen::g_program_name = "/foo/bar/kaldi-error-test";
46 | try {
47 | eesen::UnitTestError();
48 | KALDI_ASSERT(0); // should not happen.
49 | } catch (std::runtime_error &r) {
50 | std::cout << "UnitTestError: the error we generated was: " << r.what();
51 | }
52 | }
53 |
54 |
--------------------------------------------------------------------------------
/src/base/kaldi-types.h:
--------------------------------------------------------------------------------
1 | // base/kaldi-types.h
2 |
3 | // Copyright 2009-2011 Microsoft Corporation; Saarland University;
4 | // Jan Silovsky; Yanmin Qian
5 |
6 | // See ../../COPYING for clarification regarding multiple authors
7 | //
8 | // Licensed under the Apache License, Version 2.0 (the "License");
9 | // you may not use this file except in compliance with the License.
10 | // You may obtain a copy of the License at
11 | //
12 | // http://www.apache.org/licenses/LICENSE-2.0
13 | //
14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 | // MERCHANTABLITY OR NON-INFRINGEMENT.
18 | // See the Apache 2 License for the specific language governing permissions and
19 | // limitations under the License.
20 |
21 | #ifndef KALDI_BASE_KALDI_TYPES_H_
22 | #define KALDI_BASE_KALDI_TYPES_H_ 1
23 |
24 | namespace eesen {
25 | // TYPEDEFS ..................................................................
26 | #if (KALDI_DOUBLEPRECISION != 0)
27 | typedef double BaseFloat;
28 | #else
29 | typedef float BaseFloat;
30 | #endif
31 | }
32 |
33 | #ifdef _MSC_VER
34 | namespace eesen {
35 | typedef unsigned __int16 uint16;
36 | typedef unsigned __int32 uint32;
37 | typedef __int16 int16;
38 | typedef __int32 int32;
39 | typedef __int64 int64;
40 | typedef unsigned __int64 uint64;
41 | typedef float float32;
42 | typedef double double64;
43 | }
44 | #include
45 | #define ssize_t SSIZE_T
46 |
47 | #else
48 | // we can do this a different way if some platform
49 | // we find in the future lacks stdint.h
50 | #include
51 |
52 | namespace eesen {
53 | typedef uint16_t uint16;
54 | typedef uint32_t uint32;
55 | typedef uint64_t uint64;
56 | typedef int16_t int16;
57 | typedef int32_t int32;
58 | typedef int64_t int64;
59 | typedef float float32;
60 | typedef double double64;
61 | } // end namespace eesen
62 | #endif
63 |
64 | #endif // KALDI_BASE_KALDI_TYPES_H_
65 |
--------------------------------------------------------------------------------
/src/base/kaldi-utils.cc:
--------------------------------------------------------------------------------
1 | // base/kaldi-utils.cc
2 | // Copyright 2009-2011 Karel Vesely; Yanmin Qian; Microsoft Corporation
3 | // 2015 Hang Su
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 |
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 |
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 | #include
21 | #include
22 | #include
23 | #include "base/kaldi-common.h"
24 |
25 | namespace eesen {
26 |
27 | std::string CharToString(const char &c) {
28 | char buf[20];
29 | if (std::isprint(c))
30 | sprintf(buf, "\'%c\'", c);
31 | else
32 | sprintf(buf, "[character %d]", (int) c);
33 | return (std::string) buf;
34 | }
35 |
36 | std::string IntToString(const int &i) {
37 | std::stringstream ss;
38 | ss << i;
39 | return ss.str();
40 | }
41 |
42 | bool FileExist(const char *file_name) {
43 | std::ifstream infile(file_name);
44 | return infile.good();
45 | }
46 |
47 |
48 | } // end namespace eesen
49 |
--------------------------------------------------------------------------------
/src/base/timer-test.cc:
--------------------------------------------------------------------------------
1 | // base/timer-test.cc
2 |
3 | // Copyright 2009-2011 Microsoft Corporation
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 |
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 |
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 | #include "base/timer.h"
21 | #include "base/kaldi-common.h"
22 |
23 |
24 |
25 | namespace eesen {
26 |
27 | void TimerTest() {
28 |
29 | Timer timer;
30 | #if defined(_MSC_VER) || defined(MINGW)
31 | Sleep(1000);
32 | #else
33 | sleep(1);
34 | #endif
35 | BaseFloat f = timer.Elapsed();
36 | std::cout << "time is " << f;
37 | KALDI_ASSERT(fabs(1.0 - f) < 0.1);
38 | }
39 |
40 | }
41 |
42 |
43 | int main() {
44 | eesen::TimerTest();
45 | }
46 |
47 |
--------------------------------------------------------------------------------
/src/base/timer.h:
--------------------------------------------------------------------------------
1 | // base/timer.h
2 |
3 | // Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 |
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 |
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 | #ifndef KALDI_BASE_TIMER_H_
20 | #define KALDI_BASE_TIMER_H_
21 |
22 | #if defined(_MSC_VER) || defined(MINGW)
23 |
24 | #include "base/kaldi-utils.h"
25 |
26 | namespace eesen
27 | {
28 |
29 | class Timer {
30 | public:
31 | Timer() { Reset(); }
32 | void Reset() {
33 | QueryPerformanceCounter(&time_start_);
34 | }
35 | double Elapsed() {
36 | LARGE_INTEGER time_end;
37 | LARGE_INTEGER freq;
38 | QueryPerformanceCounter(&time_end);
39 | if (QueryPerformanceFrequency(&freq) == 0) return 0.0; // Hardware does not support this.
40 | return ((double)time_end.QuadPart - (double)time_start_.QuadPart) /
41 | ((double)freq.QuadPart);
42 | }
43 | private:
44 | LARGE_INTEGER time_start_;
45 | };
46 | }
47 |
48 | #else
49 |
50 | # include
51 | # include
52 | namespace eesen
53 | {
54 | class Timer
55 | {
56 | public:
57 | Timer() { Reset(); }
58 |
59 | void Reset() { gettimeofday(&this->time_start_, &time_zone_); }
60 |
61 | /// Returns time in seconds.
62 | double Elapsed() {
63 | struct timeval time_end;
64 | gettimeofday(&time_end, &time_zone_);
65 | double t1, t2;
66 | t1 = (double)time_start_.tv_sec +
67 | (double)time_start_.tv_usec/(1000*1000);
68 | t2 = (double)time_end.tv_sec + (double)time_end.tv_usec/(1000*1000);
69 | return t2-t1;
70 | }
71 |
72 | private:
73 | struct timeval time_start_;
74 | struct timezone time_zone_;
75 | };
76 | }
77 |
78 | #endif
79 |
80 |
81 | #endif
82 |
--------------------------------------------------------------------------------
/src/cpucompute/Makefile:
--------------------------------------------------------------------------------
1 |
2 |
3 | all:
4 |
5 | OPENFST_CXXFLAGS =
6 | OPENFST_LDLIBS =
7 |
8 | include ../config.mk
9 |
10 |
11 | # you can uncomment matrix-lib-speed-test if you want to do the speed tests.
12 |
13 | TESTFILES =
14 |
15 | OBJFILES = matrix.o vector.o matrix-functions.o compressed-matrix.o
16 |
17 | LIBNAME = cpucompute
18 |
19 | ADDLIBS = ../base/base.a
20 |
21 | include ../makefiles/default_rules.mk
22 |
23 |
--------------------------------------------------------------------------------
/src/cpucompute/matrix-functions-inl.h:
--------------------------------------------------------------------------------
1 | // cpucompute/matrix-functions-inl.h
2 |
3 | // Copyright 2009-2011 Microsoft Corporation
4 | //
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 | //
20 | // (*) incorporates, with permission, FFT code from his book
21 | // "Signal Processing with Lapped Transforms", Artech, 1992.
22 |
23 |
24 |
25 | #ifndef CPUCOMPUTE_MATRIX_FUNCTIONS_INL_H_
26 | #define CPUCOMPUTE_MATRIX_FUNCTIONS_INL_H_
27 |
28 | namespace eesen {
29 |
30 | //! ComplexMul implements, inline, the complex multiplication b *= a.
31 | template inline void ComplexMul(const Real &a_re, const Real &a_im,
32 | Real *b_re, Real *b_im) {
33 | Real tmp_re = (*b_re * a_re) - (*b_im * a_im);
34 | *b_im = *b_re * a_im + *b_im * a_re;
35 | *b_re = tmp_re;
36 | }
37 |
38 | template inline void ComplexAddProduct(const Real &a_re, const Real &a_im,
39 | const Real &b_re, const Real &b_im,
40 | Real *c_re, Real *c_im) {
41 | *c_re += b_re*a_re - b_im*a_im;
42 | *c_im += b_re*a_im + b_im*a_re;
43 | }
44 |
45 |
46 | template inline void ComplexImExp(Real x, Real *a_re, Real *a_im) {
47 | *a_re = std::cos(x);
48 | *a_im = std::sin(x);
49 | }
50 |
51 |
52 | } // end namespace eesen
53 |
54 |
55 | #endif // KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
56 |
57 |
--------------------------------------------------------------------------------
/src/cpucompute/matrix-inl.h:
--------------------------------------------------------------------------------
1 | // cpucompute/matrix-inl.h
2 |
3 | // Copyright 2009-2011 Microsoft Corporation; Haihua Xu
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 | #ifndef CPUCOMPUTE_MATRIX_INL_H_
21 | #define CPUCOMPUTE_MATRIX_INL_H_ 1
22 |
23 | #include "cpucompute/vector.h"
24 |
25 | namespace eesen {
26 |
27 | /// Empty constructor
28 | template
29 | Matrix::Matrix(): MatrixBase(NULL, 0, 0, 0) { }
30 |
31 |
32 | template<>
33 | template<>
34 | void MatrixBase::AddVecVec(const float alpha, const VectorBase &ra, const VectorBase &rb);
35 |
36 | template<>
37 | template<>
38 | void MatrixBase::AddVecVec(const double alpha, const VectorBase &ra, const VectorBase &rb);
39 |
40 | template
41 | inline std::ostream & operator << (std::ostream & os, const MatrixBase & M) {
42 | M.Write(os, false);
43 | return os;
44 | }
45 |
46 | template
47 | inline std::istream & operator >> (std::istream & is, Matrix & M) {
48 | M.Read(is, false);
49 | return is;
50 | }
51 |
52 |
53 | template
54 | inline std::istream & operator >> (std::istream & is, MatrixBase & M) {
55 | M.Read(is, false);
56 | return is;
57 | }
58 |
59 | }// namespace eesen
60 |
61 |
62 | #endif // KALDI_MATRIX_KALDI_MATRIX_INL_H_
63 |
--------------------------------------------------------------------------------
/src/cpucompute/matrix-lib.h:
--------------------------------------------------------------------------------
1 | // cpucompute/matrix-lib.h
2 |
3 | // Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Haihua Xu
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 |
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 |
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 | // Include everything from this directory.
21 | // These files include other stuff that we need.
22 | #ifndef CPUCOMPUTE_MATRIX_LIB_H_
23 | #define CPUCOMPUTE_MATRIX_LIB_H_
24 |
25 | #include "cpucompute/cblas-wrappers.h"
26 | #include "base/kaldi-common.h"
27 | #include "cpucompute/vector.h"
28 | #include "cpucompute/matrix.h"
29 | #include "cpucompute/matrix-functions.h"
30 | #include "cpucompute/compressed-matrix.h"
31 |
32 | #endif
33 |
34 |
--------------------------------------------------------------------------------
/src/cpucompute/vector-inl.h:
--------------------------------------------------------------------------------
1 | // cpucompute/vector-inl.h
2 |
3 | // Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation;
4 | // Haihua Xu
5 |
6 | // See ../../COPYING for clarification regarding multiple authors
7 | //
8 | // Licensed under the Apache License, Version 2.0 (the "License");
9 | // you may not use this file except in compliance with the License.
10 | // You may obtain a copy of the License at
11 | //
12 | // http://www.apache.org/licenses/LICENSE-2.0
13 | //
14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 | // MERCHANTABLITY OR NON-INFRINGEMENT.
18 | // See the Apache 2 License for the specific language governing permissions and
19 | // limitations under the License.
20 |
21 | // This is an internal header file, included by other library headers.
22 | // You should not attempt to use it directly.
23 |
24 | #ifndef CPUCOMPUTE_VECTOR_INL_H_
25 | #define CPUCOMPUTE_VECTOR_INL_H_ 1
26 |
27 | namespace eesen {
28 |
29 | template
30 | std::ostream & operator << (std::ostream &os, const VectorBase &rv) {
31 | rv.Write(os, false);
32 | return os;
33 | }
34 |
35 | template
36 | std::istream &operator >> (std::istream &is, VectorBase &rv) {
37 | rv.Read(is, false);
38 | return is;
39 | }
40 |
41 | template
42 | std::istream &operator >> (std::istream &is, Vector &rv) {
43 | rv.Read(is, false);
44 | return is;
45 | }
46 |
47 | template<>
48 | template<>
49 | void VectorBase::AddVec(const float alpha, const VectorBase &rv);
50 |
51 | template<>
52 | template<>
53 | void VectorBase::AddVec(const double alpha,
54 | const VectorBase &rv);
55 |
56 | } // namespace eesen
57 |
58 | #endif // KALDI_MATRIX_KALDI_VECTOR_INL_H_
59 |
--------------------------------------------------------------------------------
/src/decoder/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 |
3 | EXTRA_CXXFLAGS = -Wno-sign-compare -O3
4 | include ../config.mk
5 |
6 | TESTFILES =
7 |
8 | OBJFILES = lattice-faster-decoder.o faster-decoder.o decoder-wrappers.o
9 |
10 | LIBNAME = decoder
11 |
12 | ADDLIBS = ../lat/lat.a ../util/util.a ../base/base.a ../cpucompute/cpucompute.a
13 |
14 | include ../makefiles/default_rules.mk
15 |
16 |
--------------------------------------------------------------------------------
/src/decoder/decoder-wrappers.h:
--------------------------------------------------------------------------------
1 | // decoder/decoder-wrappers.h
2 |
3 | // Copyright 2014 Johns Hopkins University (author: Daniel Povey)
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 | #ifndef KALDI_DECODER_DECODER_WRAPPERS_H_
21 | #define KALDI_DECODER_DECODER_WRAPPERS_H_
22 |
23 | #include "util/options-itf.h"
24 | #include "decoder/lattice-faster-decoder.h"
25 |
26 | // This header contains declarations from various convenience functions that are called
27 | // from binary-level programs such as gmm-decode-faster.cc, gmm-align-compiled.cc, and
28 | // so on.
29 |
30 | namespace eesen {
31 |
32 | bool DecodeUtteranceLatticeFaster(
33 | LatticeFasterDecoder &decoder, // not const but is really an input.
34 | DecodableInterface &decodable, // not const but is really an input.
35 | const fst::SymbolTable *word_syms,
36 | std::string utt,
37 | double acoustic_scale,
38 | bool determinize,
39 | bool allow_partial,
40 | Int32VectorWriter *alignments_writer,
41 | Int32VectorWriter *words_writer,
42 | CompactLatticeWriter *compact_lattice_writer,
43 | LatticeWriter *lattice_writer,
44 | double *like_ptr);
45 |
46 | } // end namespace eesen.
47 |
48 |
49 | #endif
50 |
--------------------------------------------------------------------------------
/src/decoderbin/Makefile:
--------------------------------------------------------------------------------
1 |
2 | all:
3 | EXTRA_CXXFLAGS = -Wno-sign-compare
4 | include ../config.mk
5 |
6 | BINFILES = analyze-counts arpa2fst compute-wer decode-faster latgen-faster lattice-best-path lattice-1best lattice-to-nbest lattice-scale nbest-to-ctm lattice-prune lattice-to-ctm-conf lattice-add-penalty
7 |
8 | OBJFILES =
9 |
10 | ADDLIBS = ../lm/lm.a ../decoder/decoder.a ../lat/lat.a \
11 | ../cpucompute/cpucompute.a ../util/util.a ../base/base.a
12 |
13 |
14 | TESTFILES =
15 |
16 | include ../makefiles/default_rules.mk
17 |
18 |
--------------------------------------------------------------------------------
/src/decoderbin/arpa2fst.cc:
--------------------------------------------------------------------------------
1 | // bin/arpa2fst.cc
2 | //
3 | // Copyright 2009-2011 Gilles Boulianne.
4 | //
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 |
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 |
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 | /// @addtogroup LanguageModel
21 | /// @{
22 |
23 | /**
24 | * @file arpa2fst.cc
25 | * @brief Example for converting an ARPA format language model into an FST.
26 | *
27 | */
28 |
29 | #include
30 | #include "lm/kaldi-lm.h"
31 | #include "util/parse-options.h"
32 |
33 | int main(int argc, char *argv[]) {
34 | try {
35 | const char *usage =
36 | "Converts an ARPA format language model into a FST\n"
37 | "Usage: arpa2fst [opts] (input_arpa|-) [output_fst|-]\n";
38 | eesen::ParseOptions po(usage);
39 |
40 | bool natural_base = true;
41 | po.Register("natural-base", &natural_base, "Use log-base e (not log-base 10)");
42 | po.Read(argc, argv);
43 |
44 | if (po.NumArgs() != 1 && po.NumArgs() != 2) {
45 | po.PrintUsage();
46 | exit(1);
47 | }
48 | std::string arpa_filename = po.GetArg(1),
49 | fst_filename = po.GetOptArg(2);
50 |
51 | eesen::LangModelFst lm;
52 | // read from standard input and write to standard output
53 | lm.Read(arpa_filename, eesen::kArpaLm, NULL, natural_base);
54 | lm.Write(fst_filename);
55 | exit(0);
56 | } catch(const std::exception &e) {
57 | std::cerr << e.what();
58 | return -1;
59 | }
60 | }
61 | /// @}
62 |
63 |
--------------------------------------------------------------------------------
/src/feat/Makefile:
--------------------------------------------------------------------------------
1 |
2 |
3 | all:
4 |
5 | include ../config.mk
6 |
7 | TESTFILES = feature-mfcc-test feature-plp-test feature-fbank-test \
8 | feature-functions-test pitch-functions-test feature-sdc-test \
9 | resample-test
10 |
11 | OBJFILES = srfft.o cmvn.o feature-functions.o feature-mfcc.o feature-plp.o feature-fbank.o \
12 | feature-spectrogram.o mel-computations.o wave-reader.o \
13 | pitch-functions.o resample.o online-feature.o
14 |
15 | LIBNAME = feat
16 |
17 | ADDLIBS = ../util/util.a ../cpucompute/cpucompute.a ../base/base.a
18 | # ../thread/thread.a
19 |
20 | include ../makefiles/default_rules.mk
21 |
22 |
--------------------------------------------------------------------------------
/src/featbin/Makefile:
--------------------------------------------------------------------------------
1 |
2 | all:
3 | EXTRA_CXXFLAGS = -Wno-sign-compare
4 | include ../config.mk
5 |
6 | BINFILES = compute-mfcc-feats compute-plp-feats compute-fbank-feats \
7 | compute-cmvn-stats add-deltas apply-cmvn copy-feats extract-segments feat-to-len feat-to-dim \
8 | compute-kaldi-pitch-feats process-kaldi-pitch-feats paste-feats splice-feats subsample-feats
9 |
10 | OBJFILES =
11 |
12 | TESTFILES =
13 |
14 | ADDLIBS = ../feat/feat.a ../cpucompute/cpucompute.a ../util/util.a ../base/base.a
15 |
16 | include ../makefiles/default_rules.mk
17 |
--------------------------------------------------------------------------------
/src/featbin/splice-feats.cc:
--------------------------------------------------------------------------------
1 | // featbin/splice-feats.cc
2 |
3 | // Copyright 2009-2011 Microsoft Corporation
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 | #include "base/kaldi-common.h"
21 | #include "util/common-utils.h"
22 | #include "cpucompute/matrix.h"
23 | #include "feat/feature-functions.h"
24 |
25 | int main(int argc, char *argv[]) {
26 | try {
27 | using namespace eesen;
28 |
29 | const char *usage =
30 | "Splice features with left and right context (e.g. prior to LDA)\n"
31 | "Usage: splice-feats [options] \n"
32 | "e.g.: splice-feats scp:feats.scp ark:-\n";
33 |
34 |
35 | ParseOptions po(usage);
36 | int32 left_context = 4, right_context = 4;
37 |
38 |
39 | po.Register("left-context", &left_context, "Number of frames of left context");
40 | po.Register("right-context", &right_context, "Number of frames of right context");
41 |
42 | po.Read(argc, argv);
43 |
44 | if (po.NumArgs() != 2) {
45 | po.PrintUsage();
46 | exit(1);
47 | }
48 |
49 | std::string rspecifier = po.GetArg(1);
50 | std::string wspecifier = po.GetArg(2);
51 |
52 | BaseFloatMatrixWriter kaldi_writer(wspecifier);
53 | SequentialBaseFloatMatrixReader kaldi_reader(rspecifier);
54 | for (; !kaldi_reader.Done(); kaldi_reader.Next()) {
55 | Matrix spliced;
56 | SpliceFrames(kaldi_reader.Value(),
57 | left_context,
58 | right_context,
59 | &spliced);
60 | kaldi_writer.Write(kaldi_reader.Key(), spliced);
61 | }
62 | return 0;
63 | } catch(const std::exception &e) {
64 | std::cerr << e.what();
65 | return -1;
66 | }
67 | }
68 |
69 |
70 |
--------------------------------------------------------------------------------
/src/fstbin/Makefile:
--------------------------------------------------------------------------------
1 |
2 | # make "all" the target.
3 | all:
4 |
5 | # Disable linking math libs because not needed here. Just for compilation speed.
6 | # no, it's now needed for context-fst-test.
7 | # MATHLIB = NONE
8 |
9 | EXTRA_CXXFLAGS = -Wno-sign-compare
10 |
11 | include ../config.mk
12 |
13 | BINFILES = fstdeterminizestar \
14 | fstrmsymbols fstisstochastic fstminimizeencoded \
15 | fstaddsubsequentialloop fstaddselfloops \
16 | fstrmepslocal fstcomposecontext fsttablecompose fstrand fstfactor \
17 | fstdeterminizelog fstphicompose fstrhocompose fstpropfinal fstcopy \
18 | fstpushspecial fsts-to-transcripts
19 |
20 | OBJFILES =
21 |
22 | TESTFILES =
23 |
24 | # actually, this library is currently empty. Everything is a header.
25 | LIBFILE =
26 |
27 | ADDLIBS = ../fstext/fstext.a ../cpucompute/cpucompute.a ../base/base.a \
28 | ../util/util.a
29 |
30 | include ../makefiles/default_rules.mk
31 |
--------------------------------------------------------------------------------
/src/fstbin/fstcopy.cc:
--------------------------------------------------------------------------------
1 | // fstbin/fstcopy.cc
2 |
3 | // Copyright 2009-2011 Microsoft Corporation
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 |
21 | #include "base/kaldi-common.h"
22 | #include "util/common-utils.h"
23 | #include "fst/fstlib.h"
24 | #include "fstext/table-matcher.h"
25 | #include "fstext/fstext-utils.h"
26 |
27 |
28 | int main(int argc, char *argv[]) {
29 | try {
30 | using namespace eesen;
31 | using namespace fst;
32 | using eesen::int32;
33 |
34 | const char *usage =
35 | "Copy tables/archives of FSTs, indexed by a string (e.g. utterance-id)\n"
36 | "\n"
37 | "Usage: fstcopy \n";
38 |
39 | ParseOptions po(usage);
40 |
41 | po.Read(argc, argv);
42 |
43 | if (po.NumArgs() != 2) {
44 | po.PrintUsage();
45 | exit(1);
46 | }
47 |
48 | std::string fst_rspecifier = po.GetArg(1),
49 | fst_wspecifier = po.GetArg(2);
50 |
51 | SequentialTableReader fst_reader(fst_rspecifier);
52 | TableWriter fst_writer(fst_wspecifier);
53 | int32 n_done = 0;
54 |
55 | for (; !fst_reader.Done(); fst_reader.Next(), n_done++)
56 | fst_writer.Write(fst_reader.Key(), fst_reader.Value());
57 |
58 | KALDI_LOG << "Copied " << n_done << " FSTs.";
59 | return (n_done != 0 ? 0 : 1);
60 | } catch(const std::exception &e) {
61 | std::cerr << e.what();
62 | return -1;
63 | }
64 | }
65 |
66 |
--------------------------------------------------------------------------------
/src/fstbin/fstdeterminizelog.cc:
--------------------------------------------------------------------------------
1 | // fstbin/fstdeterminizelog.cc
2 |
3 | // Copyright 2009-2011 Microsoft Corporation
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 |
21 | #include "base/kaldi-common.h"
22 | #include "util/kaldi-io.h"
23 | #include "util/parse-options.h"
24 | #include "util/text-utils.h"
25 | #include "fst/fstlib.h"
26 | #include "fstext/fstext-utils.h"
27 |
28 | int main(int argc, char *argv[]) {
29 | try {
30 | using namespace eesen;
31 | using namespace fst;
32 | using eesen::int32;
33 |
34 | const char *usage =
35 | "Determinizes in the log semiring\n"
36 | "\n"
37 | "Usage: fstdeterminizelog [in.fst [out.fst] ]\n"
38 | "\n"
39 | "See also fstdeterminizestar\n";
40 |
41 | ParseOptions po(usage);
42 | po.Read(argc, argv);
43 |
44 | if (po.NumArgs() > 2) {
45 | po.PrintUsage();
46 | exit(1);
47 | }
48 |
49 | std::string fst_in_filename = po.GetOptArg(1),
50 | fst_out_filename = po.GetOptArg(2);
51 |
52 | VectorFst *fst = ReadFstKaldi(fst_in_filename);
53 |
54 | DeterminizeInLog(fst);
55 |
56 | WriteFstKaldi(*fst, fst_out_filename);
57 | delete fst;
58 | return 0;
59 | } catch(const std::exception &e) {
60 | std::cerr << e.what();
61 | return -1;
62 | }
63 | }
64 |
65 |
--------------------------------------------------------------------------------
/src/fstbin/fstminimizeencoded.cc:
--------------------------------------------------------------------------------
1 | // fstbin/fstminimizeencoded.cc
2 |
3 | // Copyright 2009-2011 Microsoft Corporation
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 |
21 | #include "base/kaldi-common.h"
22 | #include "util/kaldi-io.h"
23 | #include "util/parse-options.h"
24 | #include "util/text-utils.h"
25 | #include "fst/fstlib.h"
26 | #include "fstext/determinize-star.h"
27 | #include "fstext/fstext-utils.h"
28 |
29 |
30 | /* some test examples:
31 | ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstminimizeencoded | fstprint
32 | ( echo "0 1 0 0"; echo " 0 2 0 0"; echo "1 0"; echo "2 0"; ) | fstcompile | fstminimizeencoded | fstprint
33 | */
34 |
35 | int main(int argc, char *argv[]) {
36 | try {
37 | using namespace eesen;
38 | using namespace fst;
39 | using eesen::int32;
40 |
41 | const char *usage =
42 | "Minimizes FST after encoding [similar to fstminimize, but no weight-pushing]\n"
43 | "\n"
44 | "Usage: fstminimizeencoded [in.fst [out.fst] ]\n";
45 |
46 | float delta = kDelta;
47 | ParseOptions po(usage);
48 | po.Register("delta", &delta, "Delta likelihood used for quantization of weights");
49 | po.Read(argc, argv);
50 |
51 | if (po.NumArgs() > 2) {
52 | po.PrintUsage();
53 | exit(1);
54 | }
55 |
56 | std::string fst_in_filename = po.GetOptArg(1),
57 | fst_out_filename = po.GetOptArg(2);
58 |
59 | VectorFst *fst = ReadFstKaldi(fst_in_filename);
60 |
61 | MinimizeEncoded(fst, delta);
62 |
63 | WriteFstKaldi(*fst, fst_out_filename);
64 |
65 | delete fst;
66 | return 0;
67 | } catch(const std::exception &e) {
68 | std::cerr << e.what();
69 | return -1;
70 | }
71 | return 0;
72 | }
73 |
74 |
--------------------------------------------------------------------------------
/src/fstbin/fstpushspecial.cc:
--------------------------------------------------------------------------------
1 | // fstbin/fstpushspecial.cc
2 |
3 | // Copyright 2012 Daniel Povey
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 |
21 | #include "base/kaldi-common.h"
22 | #include "util/kaldi-io.h"
23 | #include "util/parse-options.h"
24 | #include "util/text-utils.h"
25 | #include "fst/fstlib.h"
26 | #include "fstext/fstext-utils.h"
27 | #include "fstext/push-special.h"
28 |
29 | int main(int argc, char *argv[]) {
30 | try {
31 | using namespace eesen;
32 | using namespace fst;
33 | using eesen::int32;
34 |
35 | const char *usage =
36 | "Pushes weights in an FST such that all the states\n"
37 | "in the FST have arcs and final-probs with weights that\n"
38 | "sum to the same amount (viewed as being in the log semiring).\n"
39 | "Thus, the \"extra weight\" is distributed throughout the FST.\n"
40 | "Tolerance parameter --delta controls how exact this is, and the\n"
41 | "speed.\n"
42 | "\n"
43 | "Usage: fstpushspecial [options] [in.fst [out.fst] ]\n";
44 |
45 | BaseFloat delta = kDelta;
46 | ParseOptions po(usage);
47 | po.Register("delta", &delta, "Delta cost: after pushing, all states will "
48 | "have a total weight that differs from the average by no more "
49 | "than this.");
50 | po.Read(argc, argv);
51 |
52 | if (po.NumArgs() > 2) {
53 | po.PrintUsage();
54 | exit(1);
55 | }
56 |
57 | std::string fst_in_filename = po.GetOptArg(1),
58 | fst_out_filename = po.GetOptArg(2);
59 |
60 | VectorFst *fst = ReadFstKaldi(fst_in_filename);
61 |
62 | PushSpecial(fst, delta);
63 |
64 | WriteFstKaldi(*fst, fst_out_filename);
65 | delete fst;
66 | return 0;
67 | } catch(const std::exception &e) {
68 | std::cerr << e.what();
69 | return -1;
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/src/fstbin/fstrand.cc:
--------------------------------------------------------------------------------
1 | // fstbin/fstrand.cc
2 |
3 | // Copyright 2009-2011 Microsoft Corporation
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 |
21 | #include "base/kaldi-common.h"
22 | #include "util/kaldi-io.h"
23 | #include "util/parse-options.h"
24 | #include "util/text-utils.h"
25 | #include "fstext/rand-fst.h"
26 | #include "time.h"
27 | #include "fstext/fstext-utils.h"
28 |
29 | int main(int argc, char *argv[]) {
30 | try {
31 | using namespace fst;
32 | using eesen::int32;
33 |
34 | const char *usage =
35 | "Generate random FST\n"
36 | "\n"
37 | "Usage: fstrand [out.fst]\n";
38 |
39 | srand(time(NULL));
40 | RandFstOptions opts;
41 |
42 |
43 | eesen::ParseOptions po(usage);
44 | po.Register("allow-empty", &opts.allow_empty,
45 | "If true, we may generate an empty FST.");
46 |
47 | if (po.NumArgs() > 1) {
48 | po.PrintUsage();
49 | exit(1);
50 | }
51 |
52 | std::string fst_out_filename = po.GetOptArg(1);
53 |
54 | VectorFst *rand_fst = RandFst(opts);
55 |
56 | WriteFstKaldi(*rand_fst, fst_out_filename);
57 | delete rand_fst;
58 | return 0;
59 | } catch(const std::exception &e) {
60 | std::cerr << e.what();
61 | return -1;
62 | }
63 | }
64 |
65 |
--------------------------------------------------------------------------------
/src/fstext/Makefile:
--------------------------------------------------------------------------------
1 |
2 | # make "all" the target.
3 | all:
4 |
5 | # Disable linking math libs because not needed here. Just for compilation speed.
6 | # no, it's now needed for context-fst-test.
7 | # MATHLIB = NONE
8 |
9 | EXTRA_CXXFLAGS = -Wno-sign-compare
10 |
11 | include ../config.mk
12 |
13 | TESTFILES = determinize-star-test \
14 | pre-determinize-test trivial-factor-weight-test \
15 | factor-test table-matcher-test fstext-utils-test \
16 | remove-eps-local-test rescale-test lattice-weight-test \
17 | determinize-lattice-test lattice-utils-test deterministic-fst-test \
18 | push-special-test epsilon-property-test prune-special-test
19 |
20 | OBJFILES = push-special.o
21 |
22 |
23 | LIBNAME = fstext
24 |
25 | # tree and matrix archives needed for test-context-fst
26 | # matrix archive needed for push-special.
27 | ADDLIBS = ../cpucompute/cpucompute.a \
28 | ../util/util.a ../base/base.a
29 |
30 | include ../makefiles/default_rules.mk
31 |
--------------------------------------------------------------------------------
/src/fstext/epsilon-property-test.cc:
--------------------------------------------------------------------------------
1 | // fstext/epsilon-property-test.cc
2 |
3 | // Copyright 2014 Johns Hopkins University (Author: Daniel Povey)
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 |
21 | #include "fstext/rand-fst.h"
22 | #include "fstext/epsilon-property.h"
23 |
24 |
25 | namespace fst {
26 |
27 | void TestEnsureEpsilonProperty() {
28 |
29 | for (int32 i = 0; i < 10; i++) {
30 | RandFstOptions opts;
31 | opts.acyclic = true;
32 | VectorFst *fst = RandFst(opts);
33 | VectorFst fst2(*fst); // copy it...
34 | EnsureEpsilonProperty(&fst2);
35 |
36 | std::vector info;
37 | ComputeStateInfo(fst2, &info);
38 | for (size_t i = 0; i < info.size(); i++) {
39 | char c = info[i];
40 | assert(!((c & kStateHasEpsilonArcsEntering) != 0 &&
41 | (c & kStateHasNonEpsilonArcsEntering) != 0));
42 | assert(!((c & kStateHasEpsilonArcsLeaving) != 0 &&
43 | (c & kStateHasNonEpsilonArcsLeaving) != 0));
44 | }
45 | assert(RandEquivalent(fst2, *fst, 5, 0.01, eesen::Rand(), 10));
46 | delete fst;
47 | }
48 | }
49 |
50 | } // end namespace fst
51 |
52 | int main() {
53 | using namespace fst;
54 | for (int i = 0; i < 2; i++) {
55 | TestEnsureEpsilonProperty();
56 | }
57 | std::cout << "Test OK\n";
58 | }
59 |
--------------------------------------------------------------------------------
/src/fstext/fst-test-utils.h:
--------------------------------------------------------------------------------
1 | // fstext/fst-test-utils.h
2 |
3 | // Copyright 2009-2011 Microsoft Corporation
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 | #ifndef KALDI_FSTEXT_FST_TEST_UTILS_H_
21 | #define KALDI_FSTEXT_FST_TEST_UTILS_H_
22 |
23 |
24 | #include
25 | #include
26 |
27 | // Just some #includes.
28 | #include "fst/script/print-impl.h"
29 | #include "fstext/rand-fst.h"
30 |
31 |
32 | #endif
33 |
34 |
--------------------------------------------------------------------------------
/src/fstext/fstext-lib.h:
--------------------------------------------------------------------------------
1 | // fstext/fstext-lib.h
2 |
3 | // Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (author: Daniel Povey)
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 | #ifndef KALDI_FSTEXT_FSTEXT_LIB_H_
21 | #define KALDI_FSTEXT_FSTEXT_LIB_H_
22 | #include "fst/fstlib.h"
23 | #include "context-fst.h"
24 | #include "determinize-star.h"
25 | #include "factor.h"
26 | #include "fst-test-utils.h"
27 | #include "fstext-utils.h"
28 | #include "pre-determinize.h"
29 | #include "table-matcher.h"
30 | #include "trivial-factor-weight.h"
31 | #include "lattice-weight.h"
32 | #include "lattice-utils.h"
33 | #include "determinize-lattice.h"
34 | #include "deterministic-fst.h"
35 | #endif
36 |
--------------------------------------------------------------------------------
/src/fstext/prune-special.h:
--------------------------------------------------------------------------------
1 | // fstext/prune-special.h
2 |
3 | // Copyright 2014 Johns Hopkins University (author: Daniel Povey)
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 |
21 | #ifndef KALDI_FSTEXT_PRUNE_SPECIAL_H_
22 | #define KALDI_FSTEXT_PRUNE_SPECIAL_H_
23 |
24 | #include "fst/fstlib.h"
25 | #include "fstext/lattice-weight.h"
26 | #include "fstext/factor.h"
27 |
28 | namespace fst {
29 |
30 |
31 |
32 | /**
33 | The function PruneSpecial is like the standard OpenFst function "prune",
34 | except it does not expand the entire "ifst"- this is useful for cases where
35 | ifst is an on-demand FST such as a ComposeFst and we don't want to visit
36 | it all. It supports pruning either to a specified beam (if beam is
37 | not One()), or to a specified max_states (if max_states is > 0). One of the
38 | two must be specified.
39 |
40 | Requirements:
41 | - Costs must be non-negative (equivalently, weights must not be greater than One()).
42 | - There must be a Compare(a, b) function that compares two weights and returns (-1,0,1)
43 | if (ab). We define this in Kaldi, for TropicalWeight, LogWeight (I think),
44 | and LatticeWeight... also CompactLatticeWeight, but we doubt that will be used here;
45 | better to use PruneCompactLattice().
46 | */
47 |
48 | template
49 | void PruneSpecial(const Fst &ifst,
50 | VectorFst *ofst,
51 | typename Arc::Weight beam,
52 | size_t max_states = 0);
53 |
54 |
55 |
56 | } // end namespace fst
57 |
58 | #include "fstext/prune-special-inl.h"
59 |
60 | #endif // KALDI_FSTEXT_PRUNE_SPECIAL_H_
61 |
--------------------------------------------------------------------------------
/src/fstext/push-special.h:
--------------------------------------------------------------------------------
1 | // fstext/push-special.h
2 |
3 | // Copyright 2012 Johns Hopkins Universithy (author: Daniel Povey)
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 | #ifndef KALDI_FSTEXT_PUSH_SPECIAL_H_
21 | #define KALDI_FSTEXT_PUSH_SPECIAL_H_
22 |
23 | #include
24 | #include
25 | #include "util/const-integer-set.h"
26 |
27 | namespace fst {
28 |
29 | /*
30 | This function does weight-pushing, in the log semiring,
31 | but in a special way, such that any "leftover weight" after pushing
32 | gets distributed evenly along the FST, and doesn't end up either
33 | at the start or at the end. Basically it pushes the weights such
34 | that the total weight of each state (i.e. the sum of the arc
35 | probabilities plus the final-prob) is the same for all states.
36 | */
37 |
38 | void PushSpecial(VectorFst *fst,
39 | float delta = kDelta);
40 |
41 | }
42 |
43 | #endif
44 |
--------------------------------------------------------------------------------
/src/fstext/ref-counter.h:
--------------------------------------------------------------------------------
1 | // ref-counter.h
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | //
15 | //
16 | // \file
17 | // Google-compatibility locking declarations and inline definitions
18 | //
19 | // Classes and functions here are no-ops (by design); proper locking requires
20 | // actual implementation.
21 |
22 | #ifndef REF_COUNTER_H__
23 | #define REF_COUNTER_H__
24 |
25 | #include // for DISALLOW_COPY_AND_ASSIGN
26 |
27 | namespace fst {
28 |
29 | using namespace std;
30 |
31 | // Reference counting - single-thread implementation
32 | class RefCounter {
33 | public:
34 | RefCounter() : count_(1) {}
35 |
36 | int count() const { return count_; }
37 | int Incr() const { return ++count_; }
38 | int Decr() const { return --count_; }
39 |
40 | private:
41 | mutable int count_;
42 |
43 | DISALLOW_COPY_AND_ASSIGN(RefCounter);
44 | };
45 |
46 | } // namespace fst
47 |
48 | #endif // REF_COUNTER_H__
49 |
--------------------------------------------------------------------------------
/src/gpucompute/cuda-common.cc:
--------------------------------------------------------------------------------
1 | //#ifndef KALDI_CUDAMATRIX_COMMON_H_
2 | //#define KALDI_CUDAMATRIX_COMMON_H_
3 |
4 | // This file contains some #includes, forward declarations
5 | // and typedefs that are needed by all the main header
6 | // files in this directory.
7 |
8 | #include "base/kaldi-common.h"
9 | #include "cpucompute/blas.h"
10 | #include "gpucompute/cuda-device.h"
11 | #include "gpucompute/cuda-common.h"
12 |
13 | namespace eesen {
14 |
15 | #if HAVE_CUDA == 1
16 | cublasOperation_t KaldiTransToCuTrans(MatrixTransposeType kaldi_trans) {
17 | cublasOperation_t cublas_trans;
18 |
19 | if (kaldi_trans == kNoTrans)
20 | cublas_trans = CUBLAS_OP_N;
21 | else if (kaldi_trans == kTrans)
22 | cublas_trans = CUBLAS_OP_T;
23 | else
24 | cublas_trans = CUBLAS_OP_C;
25 | return cublas_trans;
26 | }
27 | #endif
28 |
29 | } // namespace
30 |
31 |
32 | //#endif // KALDI_CUDAMATRIX_COMMON_H_
33 |
--------------------------------------------------------------------------------
/src/gpucompute/cuda-common.h:
--------------------------------------------------------------------------------
1 | // gpucompute/cuda-common.h
2 |
3 | // Copyright 2009-2011 Karel Vesely
4 | // Johns Hopkins University (author: Daniel Povey)
5 | // 2015 Yajie Miao
6 | // See ../../COPYING for clarification regarding multiple authors
7 | //
8 | // Licensed under the Apache License, Version 2.0 (the "License");
9 | // you may not use this file except in compliance with the License.
10 | // You may obtain a copy of the License at
11 | //
12 | // http://www.apache.org/licenses/LICENSE-2.0
13 | //
14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 | // MERCHANTABLITY OR NON-INFRINGEMENT.
18 | // See the Apache 2 License for the specific language governing permissions and
19 | // limitations under the License.
20 |
21 |
22 | #ifndef EESEN_GPUCOMPUTE_CUDA_COMMON_H_
23 | #define EESEN_GPUCOMPUTE_CUDA_COMMON_H_
24 | #include "gpucompute/cuda-matrixdim.h" // for CU1DBLOCK and CU2DBLOCK
25 |
26 | #include
27 | #include
28 | #include "base/kaldi-error.h"
29 | #include "cpucompute/matrix-common.h"
30 |
31 | #if HAVE_CUDA == 1
32 | #include
33 | #include
34 |
35 |
36 |
37 | #define CU_SAFE_CALL(fun) \
38 | { \
39 | int32 ret; \
40 | if ((ret = (fun)) != 0) { \
41 | KALDI_ERR << "cudaError_t " << ret << " : \"" << cudaGetErrorString((cudaError_t)ret) << "\" returned from '" << #fun << "'"; \
42 | } \
43 | cudaThreadSynchronize(); \
44 | }
45 |
46 |
47 | namespace eesen {
48 |
49 | /** Number of blocks in which the task of size 'size' is splitted **/
50 | inline int32 n_blocks(int32 size, int32 block_size) {
51 | return size / block_size + ((size % block_size == 0)? 0 : 1);
52 | }
53 |
54 | cublasOperation_t KaldiTransToCuTrans(MatrixTransposeType kaldi_trans);
55 |
56 | }
57 |
58 | #endif // HAVE_CUDA
59 |
60 | namespace eesen {
61 | // Some forward declarations, needed for friend declarations.
62 | template class CuVectorBase;
63 | template class CuVector;
64 | template class CuSubVector;
65 | template class CuRand;
66 | template class CuMatrixBase;
67 | template class CuMatrix;
68 | template class CuSubMatrix;
69 |
70 | }
71 |
72 |
73 | #endif
74 |
--------------------------------------------------------------------------------
/src/gpucompute/cuda-matrix-inl.h:
--------------------------------------------------------------------------------
1 | // gpucompute/cuda-matrix-inl.h
2 |
3 | // Copyright 2009-2012 Karel Vesely
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 | // Do not include this file directly. It is included by cu-matrix.h.
21 |
22 | #ifndef KALDI_CUDAMATRIX_CU_MATRIX_INL_H_
23 | #define KALDI_CUDAMATRIX_CU_MATRIX_INL_H_
24 |
25 | namespace eesen {
26 |
27 | template
28 | inline CuSubMatrix::CuSubMatrix(const CuMatrixBase &mat,
29 | const MatrixIndexT row_offset,
30 | const MatrixIndexT num_rows,
31 | const MatrixIndexT col_offset,
32 | const MatrixIndexT num_cols):
33 | CuMatrixBase(mat.data_ + (row_offset * mat.stride_) + col_offset,
34 | num_rows,
35 | num_cols,
36 | mat.stride_) {
37 | KALDI_ASSERT(row_offset >= 0 && col_offset >= 0 &&
38 | row_offset + num_rows <= mat.num_rows_ &&
39 | col_offset + num_cols <= mat.num_cols_);
40 | }
41 |
42 | } // namespace eesen
43 |
44 | #endif
45 |
46 |
47 |
--------------------------------------------------------------------------------
/src/gpucompute/cuda-randkernels.h:
--------------------------------------------------------------------------------
1 | // gpucompute/cuda-randkernels.h
2 |
3 | // Copyright 2012 Karel Vesely
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 |
21 |
22 | #ifndef EESEN_GPUCOMPUTE_CUDA_RANDKERNELS_H_
23 | #define EESEN_GPUCOMPUTE_CUDA_RANDKERNELS_H_
24 |
25 | #include "gpucompute/cuda-matrixdim.h"
26 | #include "gpucompute/cuda-kernels.h"
27 |
28 | #if HAVE_CUDA == 1
29 |
30 | extern "C" {
31 |
32 | /*********************************************************
33 | * float CUDA kernel calls
34 | */
35 | void cudaF_rand(dim3 Gr, dim3 Bl, float *mat, uint32_cuda *z1, uint32_cuda *z2, uint32_cuda *z3, uint32_cuda *z4, MatrixDim d);
36 | void cudaF_gauss_rand(dim3 Gr, dim3 Bl, float *mat, uint32_cuda *z1, uint32_cuda *z2, uint32_cuda *z3, uint32_cuda *z4, MatrixDim d);
37 | void cudaF_vec_gauss_rand(int Gr, int Bl, float *v, uint32_cuda *z1, uint32_cuda *z2, uint32_cuda *z3, uint32_cuda *z4, int dim);
38 | void cudaF_binarize_probs(dim3 Gr, dim3 Bl, float *states, const float *probs, float *rand, MatrixDim d);
39 |
40 | /*********************************************************
41 | * double CUDA kernel calls
42 | */
43 | void cudaD_rand(dim3 Gr, dim3 Bl, double *mat, uint32_cuda *z1, uint32_cuda *z2, uint32_cuda *z3, uint32_cuda *z4, MatrixDim d);
44 | void cudaD_gauss_rand(dim3 Gr, dim3 Bl, double *mat, uint32_cuda *z1, uint32_cuda *z2, uint32_cuda *z3, uint32_cuda *z4, MatrixDim d);
45 | void cudaD_vec_gauss_rand(int Gr, int Bl, double *v, uint32_cuda *z1, uint32_cuda *z2, uint32_cuda *z3, uint32_cuda *z4, int dim);
46 | void cudaD_binarize_probs(dim3 Gr, dim3 Bl, double *states, const double *probs, double *rand, MatrixDim d);
47 |
48 | }
49 |
50 |
51 |
52 | #endif // HAVE_CUDA
53 |
54 | #endif
55 |
--------------------------------------------------------------------------------
/src/lat/Makefile:
--------------------------------------------------------------------------------
1 |
2 | all:
3 |
4 | include ../config.mk
5 |
6 | EXTRA_CXXFLAGS += -Wno-sign-compare
7 |
8 | TESTFILES = kaldi-lattice-test push-lattice-test minimize-lattice-test \
9 | determinize-lattice-pruned-test
10 |
11 | OBJFILES = kaldi-lattice.o lattice-functions.o \
12 | push-lattice.o minimize-lattice.o sausages.o \
13 | determinize-lattice-pruned.o confidence.o
14 |
15 | LIBNAME = lat
16 |
17 | ADDLIBS = ../cpucompute/cpucompute.a \
18 | ../util/util.a ../base/base.a
19 |
20 |
21 | include ../makefiles/default_rules.mk
22 |
--------------------------------------------------------------------------------
/src/lat/minimize-lattice-test.cc:
--------------------------------------------------------------------------------
1 | // lat/minimize-lattice-test.cc
2 |
3 | // Copyright 2013 Johns Hopkins University (author: Daniel Povey)
4 |
5 | // See ../../COPYING for clarification regarding multiple authors
6 | //
7 | // Licensed under the Apache License, Version 2.0 (the "License");
8 | // you may not use this file except in compliance with the License.
9 | // You may obtain a copy of the License at
10 | //
11 | // http://www.apache.org/licenses/LICENSE-2.0
12 | //
13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 | // MERCHANTABLITY OR NON-INFRINGEMENT.
17 | // See the Apache 2 License for the specific language governing permissions and
18 | // limitations under the License.
19 |
20 |
21 | #include "lat/kaldi-lattice.h"
22 | #include "lat/minimize-lattice.h"
23 | #include "lat/push-lattice.h"
24 | #include "fstext/rand-fst.h"
25 |
26 |
27 | namespace eesen {
28 | using namespace fst;
29 |
30 | CompactLattice *RandDeterministicCompactLattice() {
31 | RandFstOptions opts;
32 | opts.acyclic = true;
33 | while (1) {
34 | Lattice *fst = fst::RandPairFst(opts);
35 | CompactLattice *cfst = new CompactLattice;
36 | if (!DeterminizeLattice(*fst, cfst)) {
37 | delete fst;
38 | delete cfst;
39 | KALDI_WARN << "Determinization failed, trying again.";
40 | } else {
41 | delete fst;
42 | return cfst;
43 | }
44 | }
45 | }
46 |
47 | void TestMinimizeCompactLattice() {
48 | CompactLattice *clat = RandDeterministicCompactLattice();
49 | CompactLattice clat2(*clat);
50 | BaseFloat delta = (Rand() % 2 == 0 ? 1.0 : 1.0e-05);
51 |
52 | // Minimization will only work well on determinized and pushed lattices.
53 | PushCompactLatticeStrings(&clat2);
54 | PushCompactLatticeWeights(&clat2);
55 |
56 | MinimizeCompactLattice(&clat2, delta);
57 | KALDI_ASSERT(fst::RandEquivalent(*clat, clat2, 5, delta, Rand(), 10));
58 |
59 | delete clat;
60 | }
61 |
62 |
63 | } // end namespace eesen
64 |
65 | int main() {
66 | using namespace eesen;
67 | using eesen::int32;
68 | SetVerboseLevel(4);
69 | for (int32 i = 0; i < 1000; i++) {
70 | TestMinimizeCompactLattice();
71 | }
72 | KALDI_LOG << "Success.";
73 | }
74 |
--------------------------------------------------------------------------------
/src/lat/minimize-lattice.h:
--------------------------------------------------------------------------------
1 | // lat/minimize-lattice.h
2 |
3 | // Copyright 2013 Johns Hopkins University (Author: Daniel Povey)
4 | // 2014 Guoguo Chen
5 |
6 | // See ../../COPYING for clarification regarding multiple authors
7 | //
8 | // Licensed under the Apache License, Version 2.0 (the "License");
9 | // you may not use this file except in compliance with the License.
10 | // You may obtain a copy of the License at
11 | //
12 | // http://www.apache.org/licenses/LICENSE-2.0
13 | //
14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 | // MERCHANTABLITY OR NON-INFRINGEMENT.
18 | // See the Apache 2 License for the specific language governing permissions and
19 | // limitations under the License.
20 |
21 |
22 | #ifndef KALDI_LAT_MINIMIZE_LATTICE_H_
23 | #define KALDI_LAT_MINIMIZE_LATTICE_H_
24 |
25 | #include
26 | #include