├── model ├── __init__.py └── model_utils.py ├── experiments └── .gitkeep ├── requirements.txt ├── paper.pdf ├── data ├── download.sh ├── data_prepare.sh ├── label.py └── zip.py ├── baseline.sh ├── utils ├── filt.py ├── data │ ├── get_num_frames.sh │ ├── get_segments_for_data.sh │ ├── perturb_data_dir_speed_3way.sh │ ├── get_frame_shift.sh │ ├── remove_dup_utts.sh │ ├── perturb_data_dir_volume.sh │ ├── combine_data.sh │ ├── copy_data_dir.sh │ ├── perturb_data_dir_speed.sh │ ├── get_utt2dur.sh │ ├── internal │ │ └── modify_speaker_info.py │ ├── modify_speaker_info.sh │ ├── extend_segment_times.py │ └── normalize_data_range.pl ├── fix_ctm.sh ├── spk2utt_to_utt2spk.pl ├── s2eps.pl ├── eps2disambig.pl ├── build_const_arpa_lm.sh ├── summarize_warnings.pl ├── utt2spk_to_spk2utt.pl ├── shuffle_list.pl ├── analyze_segments.pl ├── show_lattice.sh ├── best_wer.sh ├── remove_oovs.pl ├── add_disambig.pl ├── remove_data_links.sh ├── nnet │ ├── gen_hamm_mat.py │ ├── gen_splice.py │ ├── gen_dct_mat.py │ ├── make_lstm_proto.py │ └── make_blstm_proto.py ├── ln.pl ├── make_unigram_grammar.pl ├── int2sym.pl ├── scoring │ └── wer_report.pl ├── find_arpa_oovs.pl ├── prepare_online_nnet_dist_build.sh ├── format_lm.sh ├── convert_slf_parallel.sh ├── lang │ ├── check_phones_compatible.sh │ ├── validate_disambig_sym_file.pl │ ├── check_g_properties.pl │ ├── internal │ │ ├── apply_unk_lm.sh │ │ └── modify_unk_pron.py │ └── make_phone_bigram_lang.sh ├── create_split_dir.pl ├── apply_map.pl ├── filter_scp.pl ├── gen_topo.pl ├── subset_scp.pl ├── convert_ctm.pl ├── summarize_logs.pl ├── rnnlm_compute_scores.sh ├── sym2int.pl ├── format_lm_sri.sh ├── parse_options.sh ├── map_arpa_lm.pl ├── pinyin_map.pl ├── subset_data_dir_tr_cv.sh ├── combine_data.sh ├── create_data_link.pl ├── copy_data_dir.sh └── perturb_data_dir_speed.sh ├── train.sh ├── eval.sh ├── path.sh ├── local ├── cosine_scoring.sh ├── plda_scoring.sh ├── pca_plda_scoring.sh └── lda_plda_scoring.sh ├── baseline.py ├── .gitignore ├── eer.sh ├── README.md └── main.py /model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow-gpu==1.8 2 | numpy 3 | scipy 4 | -------------------------------------------------------------------------------- /paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSLT-THU/IS2019-VAE/HEAD/paper.pdf -------------------------------------------------------------------------------- /data/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Author: Yang Zhang 4 | # Mail: zyziszy@foxmail.com 5 | # Apache 2.0. 6 | # 2019, CSLT 7 | 8 | -------------------------------------------------------------------------------- /baseline.sh: -------------------------------------------------------------------------------- 1 | # this code is used to 2 | # get xvector.ark from xvector.npz and 3 | # calculate baseline EER 4 | 5 | python -u baseline.py 6 | wait; 7 | 8 | sh eer.sh -------------------------------------------------------------------------------- /utils/filt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Apache 2.0 4 | 5 | import sys 6 | 7 | vocab=set() 8 | with open(sys.argv[1]) as vocabfile: 9 | for line in vocabfile: 10 | vocab.add(line.strip()) 11 | 12 | with open(sys.argv[2]) as textfile: 13 | for line in textfile: 14 | print " ".join(map(lambda word: word if word in vocab else '', line.strip().split())) 15 | -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019 Yang Zhang 3 | # Apache 2.0. 4 | 5 | 6 | python -u main.py \ 7 | --epoch 200 \ 8 | --batch_size 200 \ 9 | --n_hidden 1800 \ 10 | --learn_rate 0.00001 \ 11 | --beta1 0.5 \ 12 | --dataset_path ./data/voxceleb_combined_200000/xvector.npz \ 13 | --spk_path ./data/voxceleb_combined_200000/spk.npz \ 14 | --z_dim 200 \ 15 | --KL_weigth 0.03 \ 16 | --cohesive_weight 0 \ 17 | --is_training 1 18 | 19 | -------------------------------------------------------------------------------- /eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019 Yang Zhang 3 | # Apache 2.0. 4 | 5 | python -u main.py \ 6 | --epoch 200 \ 7 | --batch_size 200 \ 8 | --n_hidden 1800 \ 9 | --learn_rate 0.00001 \ 10 | --beta1 0.5 \ 11 | --dataset_path ./data/voxceleb_combined_200000/xvector.npz \ 12 | --spk_path ./data/voxceleb_combined_200000/spk.npz \ 13 | --z_dim 200 \ 14 | --KL_weigth 0.03 \ 15 | --cohesive_weight 0 \ 16 | --is_training 0 17 | 18 | wait 19 | 20 | bash eer.sh 21 | -------------------------------------------------------------------------------- /path.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2015 David Snyder 3 | # 2019 Lantian Li 4 | # 2019 Yang Zhang 5 | # Apache 2.0. 6 | 7 | export KALDI_ROOT=${replace it by your kaldi root path} 8 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH 9 | 10 | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 11 | . $KALDI_ROOT/tools/config/common_path.sh 12 | export LC_ALL=C 13 | -------------------------------------------------------------------------------- /data/data_prepare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Author: Yang Zhang 4 | # Author: Xueyi Wang 5 | # Apache 2.0. 6 | # 2019, CSLT 7 | 8 | # xvector 9 | for ark in `find -name "xvector.ark"` 10 | do 11 | npz=`dirname $ark`"/xvector.npz" 12 | python -u zip.py \ 13 | --source_path $ark \ 14 | --dest_path $npz 15 | echo 16 | done 17 | echo 18 | 19 | # utt2spk 20 | for utt2spk in `find -name "utt2spk"` 21 | do 22 | spknpz=`dirname $utt2spk`"/spk.npz" 23 | python -u label.py \ 24 | --source_path $utt2spk \ 25 | --dest_path $spknpz 26 | echo 27 | done 28 | echo 29 | 30 | echo data_prepare all DONE! 31 | -------------------------------------------------------------------------------- /utils/data/get_num_frames.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script works out the approximate number of frames in a training directory. 4 | # This is sometimes needed by higher-level scripts 5 | 6 | 7 | if [ -f path.sh ]; then . ./path.sh; fi 8 | . parse_options.sh || exit 1; 9 | 10 | if [ $# -ne 1 ]; then 11 | ( 12 | echo "Usage: $0 " 13 | echo "Prints the number of frames of data in the data-dir" 14 | ) 1>&2 15 | fi 16 | 17 | data=$1 18 | 19 | if [ ! -f $data/utt2dur ]; then 20 | utils/data/get_utt2dur.sh $data 1>&2 || exit 1 21 | fi 22 | 23 | frame_shift=$(utils/data/get_frame_shift.sh $data) || exit 1 24 | 25 | awk -v s=$frame_shift '{n += $2} END{printf("%d\n", int(n / s))}' <$data/utt2dur 26 | -------------------------------------------------------------------------------- /local/cosine_scoring.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2015 David Snyder 3 | # 2019 Lantian Li 4 | # Apache 2.0. 5 | # 6 | # This script trains an LDA transform and does cosine scoring. 7 | 8 | #echo "$0 $@" # Print the command line for logging 9 | 10 | if [ -f path.sh ]; then . ./path.sh; fi 11 | . parse_options.sh || exit 1; 12 | 13 | if [ $# != 4 ]; then 14 | echo "Usage: $0 " 15 | fi 16 | 17 | enroll_data_dir=$1 18 | test_data_dir=$2 19 | trials=$3 20 | scores_dir=$4 21 | 22 | mkdir -p $scores_dir/log 23 | run.pl $scores_dir/log/cosine_scoring.log \ 24 | cat $trials \| awk '{print $1" "$2}' \| \ 25 | ivector-compute-dot-products - \ 26 | "ark:ivector-normalize-length ark:${enroll_data_dir}/xvector.ark ark:- |" \ 27 | "ark:ivector-normalize-length ark:${test_data_dir}/xvector.ark ark:- |" \ 28 | $scores_dir/cosine_scores || exit 1; 29 | -------------------------------------------------------------------------------- /utils/data/get_segments_for_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script operates on a data directory, such as in data/train/, 4 | # and writes new segments to stdout. The file 'segments' maps from 5 | # utterance to time offsets into a recording, with the format: 6 | # 7 | # This script assumes utterance and recording ids are the same (i.e., that 8 | # wav.scp is indexed by utterance), and uses durations from 'utt2dur', 9 | # created if necessary by get_utt2dur.sh. 10 | 11 | . ./path.sh 12 | 13 | if [ $# != 1 ]; then 14 | echo "Usage: $0 [options] " 15 | echo "e.g.:" 16 | echo " $0 data/train > data/train/segments" 17 | exit 1 18 | fi 19 | 20 | data=$1 21 | 22 | if [ ! -f $data/utt2dur ]; then 23 | utils/data/get_utt2dur.sh $data 1>&2 || exit 1; 24 | fi 25 | 26 | # 0 27 | awk '{ print $1, $1, 0, $2 }' $data/utt2dur 28 | 29 | exit 0 30 | -------------------------------------------------------------------------------- /utils/fix_ctm.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | stmfile=$1 4 | ctmfile=$2 5 | 6 | segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u` 7 | segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u` 8 | 9 | segments_stm_count=`echo "$segments_stm" | wc -l ` 10 | segments_ctm_count=`echo "$segments_ctm" | wc -l ` 11 | 12 | #echo $segments_stm_count 13 | #echo $segments_ctm_count 14 | 15 | if [ "$segments_stm_count" -gt "$segments_ctm_count" ] ; then 16 | pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g") 17 | ( 18 | for elem in $pp ; do 19 | echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE" 20 | done 21 | ) >> $ctmfile 22 | echo "FIXED CTM FILE" 23 | exit 0 24 | elif [ "$segments_stm_count" -lt "$segments_ctm_count" ] ; then 25 | echo "Segment STM count: $segments_stm_count" 26 | echo "Segment CTM count: $segments_ctm_count" 27 | echo "FAILURE FIXING CTM FILE" 28 | exit 1 29 | else 30 | exit 0 31 | fi 32 | 33 | -------------------------------------------------------------------------------- /utils/spk2utt_to_utt2spk.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | while(<>){ 19 | @A = split(" ", $_); 20 | @A > 1 || die "Invalid line in spk2utt file: $_"; 21 | $s = shift @A; 22 | foreach $u ( @A ) { 23 | print "$u $s\n"; 24 | } 25 | } 26 | 27 | 28 | -------------------------------------------------------------------------------- /utils/s2eps.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # This script replaces and with (on both input and output sides), 18 | # for the G.fst acceptor. 19 | 20 | while(<>){ 21 | @A = split(" ", $_); 22 | if ( @A >= 4 ) { 23 | if ($A[2] eq "" || $A[2] eq "") { $A[2] = ""; } 24 | if ($A[3] eq "" || $A[3] eq "") { $A[3] = ""; } 25 | } 26 | print join("\t", @A) . "\n"; 27 | } 28 | -------------------------------------------------------------------------------- /utils/eps2disambig.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | # 2015 Guoguo Chen 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # This script replaces epsilon with #0 on the input side only, of the G.fst 19 | # acceptor. 20 | 21 | while(<>){ 22 | if (/\s+#0\s+/) { 23 | print STDERR "$0: ERROR: LM has word #0, " . 24 | "which is reserved as disambiguation symbol\n"; 25 | exit 1; 26 | } 27 | s:^(\d+\s+\d+\s+)\(\s+):$1#0$2:; 28 | print; 29 | } 30 | -------------------------------------------------------------------------------- /baseline.py: -------------------------------------------------------------------------------- 1 | # this code is used to 2 | # get xvector.ark from xvector.npz and 3 | # calculate baseline EER 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | import os 8 | 9 | 10 | paths = ["./data/voxceleb_combined_200000/xvector", 11 | "./data/sitw_dev/enroll/xvector", 12 | "./data/sitw_dev/test/xvector", 13 | "./data/sitw_eval/enroll/xvector", 14 | "./data/sitw_eval/test/xvector" 15 | ] 16 | 17 | # delete 18 | for path in paths: 19 | if os.path.exists(path+'.ark') == True: 20 | os.remove(path+'.ark') 21 | print('delete {}.ark'.format(path)) 22 | 23 | # write 24 | for path in paths: 25 | # load npz data 26 | vector = np.load(path+'.npz')['vector'] 27 | labels = np.load(path+'.npz')['utt'] 28 | with open(path+'.ark', 'w') as f: 29 | for i in range(vector.shape[0]): 30 | f.write(str(labels[i])) 31 | f.write(' [ ') 32 | for j in vector[i]: 33 | f.write(str(j)) 34 | f.write(' ') 35 | f.write(']') 36 | f.write('\n') 37 | print('{}.ark is done!'.format(path)) 38 | 39 | print('\nall done!') 40 | -------------------------------------------------------------------------------- /utils/build_const_arpa_lm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Guoguo Chen 4 | # Apache 2.0 5 | 6 | # This script reads in an Arpa format language model, and converts it into the 7 | # ConstArpaLm format language model. 8 | 9 | # begin configuration section 10 | # end configuration section 11 | 12 | [ -f path.sh ] && . ./path.sh; 13 | 14 | . utils/parse_options.sh 15 | 16 | if [ $# != 3 ]; then 17 | echo "Usage: " 18 | echo " $0 [options] " 19 | echo "e.g.:" 20 | echo " $0 data/local/lm/3-gram.full.arpa.gz data/lang/ data/lang_test_tgmed" 21 | echo "Options" 22 | exit 1; 23 | fi 24 | 25 | export LC_ALL=C 26 | 27 | arpa_lm=$1 28 | old_lang=$2 29 | new_lang=$3 30 | 31 | mkdir -p $new_lang 32 | 33 | mkdir -p $new_lang 34 | cp -r $old_lang/* $new_lang 35 | 36 | unk=`cat $new_lang/oov.int` 37 | bos=`grep "" $new_lang/words.txt | awk '{print $2}'` 38 | eos=`grep "" $new_lang/words.txt | awk '{print $2}'` 39 | if [[ -z $bos || -z $eos ]]; then 40 | echo "$0: and symbols are not in $new_lang/words.txt" 41 | exit 1 42 | fi 43 | 44 | 45 | arpa-to-const-arpa --bos-symbol=$bos \ 46 | --eos-symbol=$eos --unk-symbol=$unk \ 47 | "gunzip -c $arpa_lm | utils/map_arpa_lm.pl $new_lang/words.txt|" $new_lang/G.carpa || exit 1; 48 | 49 | exit 0; 50 | -------------------------------------------------------------------------------- /utils/summarize_warnings.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 4 | 5 | @ARGV != 1 && print STDERR "Usage: summarize_warnings.pl \n" && exit 1; 6 | 7 | $dir = $ARGV[0]; 8 | 9 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" && exit 1; 10 | 11 | $dir =~ s:/$::; # Remove trailing slash. 12 | 13 | 14 | # Group the files into categories where all have the same base-name. 15 | foreach $f (glob ("$dir/*.log")) { 16 | $f_category = $f; 17 | # do next expression twice; s///g doesn't work as they overlap. 18 | $f_category =~ s:\.\d+\.:.*.:; 19 | $f_category =~ s:\.\d+\.:.*.:; 20 | $fmap{$f_category} .= " $f"; 21 | } 22 | 23 | sub split_hundreds { # split list of filenames into groups of 100. 24 | my $names = shift @_; 25 | my @A = split(" ", $names); 26 | my @ans = (); 27 | while (@A > 0) { 28 | my $group = ""; 29 | for ($x = 0; $x < 100 && @A>0; $x++) { 30 | $fname = pop @A; 31 | $group .= "$fname "; 32 | } 33 | push @ans, $group; 34 | } 35 | return @ans; 36 | } 37 | 38 | foreach $c (keys %fmap) { 39 | $n = 0; 40 | foreach $fgroup (split_hundreds($fmap{$c})) { 41 | $n += `grep -w WARNING $fgroup | wc -l`; 42 | } 43 | if ($n != 0) { 44 | print "$n warnings in $c\n" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /utils/utt2spk_to_spk2utt.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # converts an utt2spk file to a spk2utt file. 18 | # Takes input from the stdin or from a file argument; 19 | # output goes to the standard out. 20 | 21 | if ( @ARGV > 1 ) { 22 | die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt"; 23 | } 24 | 25 | while(<>){ 26 | @A = split(" ", $_); 27 | @A == 2 || die "Invalid line in utt2spk file: $_"; 28 | ($u,$s) = @A; 29 | if(!$seen_spk{$s}) { 30 | $seen_spk{$s} = 1; 31 | push @spklist, $s; 32 | } 33 | push (@{$spk_hash{$s}}, "$u"); 34 | } 35 | foreach $s (@spklist) { 36 | $l = join(' ',@{$spk_hash{$s}}); 37 | print "$s $l\n"; 38 | } 39 | -------------------------------------------------------------------------------- /utils/shuffle_list.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2013 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | 19 | if ($ARGV[0] eq "--srand") { 20 | $n = $ARGV[1]; 21 | $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\""; 22 | srand($ARGV[1]); 23 | shift; 24 | shift; 25 | } else { 26 | srand(0); # Gives inconsistent behavior if we don't seed. 27 | } 28 | 29 | if (@ARGV > 1 || $ARGV[0] =~ m/^-.+/) { # >1 args, or an option we 30 | # don't understand. 31 | print "Usage: shuffle_list.pl [--srand N] [input file] > output\n"; 32 | print "randomizes the order of lines of input.\n"; 33 | exit(1); 34 | } 35 | 36 | @lines; 37 | while (<>) { 38 | push @lines, [ (rand(), $_)] ; 39 | } 40 | 41 | @lines = sort { $a->[0] cmp $b->[0] } @lines; 42 | foreach $l (@lines) { 43 | print $l->[1]; 44 | } 45 | -------------------------------------------------------------------------------- /utils/analyze_segments.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Copyright 2015 GoVivace Inc. (Author: Nagendra Kumar Goel) 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Analyze a segments file and print important stats on it. 18 | 19 | $dur = $total = 0; 20 | $maxDur = 0; 21 | $minDur = 9999999999; 22 | $n = 0; 23 | while(<>){ 24 | chomp; 25 | @t = split(/\s+/); 26 | $dur = $t[3] - $t[2]; 27 | $total += $dur; 28 | if ($dur > $maxDur) { 29 | $maxSegId = $t[0]; 30 | $maxDur = $dur; 31 | } 32 | if ($dur < $minDur) { 33 | $minSegId = $t[0]; 34 | $minDur = $dur; 35 | } 36 | $n++; 37 | } 38 | $avg=$total/$n; 39 | $hrs = $total/3600; 40 | print "Total $hrs hours of data\n"; 41 | print "Average segment length $avg seconds\n"; 42 | print "Segment $maxSegId has length of $maxDur seconds\n"; 43 | print "Segment $minSegId has length of $minDur seconds\n"; 44 | -------------------------------------------------------------------------------- /utils/show_lattice.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | format=pdf # pdf svg 4 | mode=save # display save 5 | lm_scale=0.0 6 | acoustic_scale=0.0 7 | #end of config 8 | 9 | . utils/parse_options.sh 10 | 11 | if [ $# != 3 ]; then 12 | echo "usage: $0 [--mode display|save] [--format pdf|svg] " 13 | echo "e.g.: $0 utt-0001 \"test/lat.*.gz\" tri1/graph/words.txt" 14 | exit 1; 15 | fi 16 | 17 | . path.sh 18 | 19 | uttid=$1 20 | lat=$2 21 | words=$3 22 | 23 | tmpdir=$(mktemp -d /tmp/kaldi.XXXX); # trap "rm -r $tmpdir" EXIT # cleanup 24 | 25 | gunzip -c $lat | lattice-to-fst --lm-scale=$lm_scale --acoustic-scale=$acoustic_scale ark:- "scp,p:echo $uttid $tmpdir/$uttid.fst|" || exit 1; 26 | ! [ -s $tmpdir/$uttid.fst ] && \ 27 | echo "Failed to extract lattice for utterance $uttid (not present?)" && exit 1; 28 | fstdraw --portrait=true --osymbols=$words $tmpdir/$uttid.fst | dot -T${format} > $tmpdir/$uttid.${format} 29 | 30 | if [ "$(uname)" == "Darwin" ]; then 31 | doc_open=open 32 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 33 | doc_open=xdg-open 34 | elif [ $mode == "display" ] ; then 35 | echo "Can not automaticaly open file on your operating system" 36 | mode=save 37 | fi 38 | 39 | [ $mode == "display" ] && $doc_open $tmpdir/$uttid.${format} 40 | [[ $mode == "display" && $? -ne 0 ]] && echo "Failed to open ${format} format." && mode=save 41 | [ $mode == "save" ] && echo "Saving to $uttid.${format}" && cp $tmpdir/$uttid.${format} . 42 | 43 | exit 0 44 | -------------------------------------------------------------------------------- /utils/best_wer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2010-2011 Microsoft Corporation 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # To be run from one directory above this script. 19 | 20 | perl -e 'while(<>){ 21 | s/\|(\d)/\| $1/g; s/(\d)\|/$1 \|/g; 22 | if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool. 23 | elsif (m: (Mean|Sum/Avg|)\s*\|\s*\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|: 24 | && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } } # sclite. 25 | if (defined $bestline){ print $bestline; } ' | \ 26 | awk 'BEGIN{ FS="%WER"; } { if(NF == 2) { print FS$2" "$1; } else { print $0; }}' | \ 27 | awk 'BEGIN{ FS="Sum/Avg"; } { if(NF == 2) { print $2" "$1; } else { print $0; }}' | \ 28 | awk '{ if($1!~/%WER/) { print "%WER "$9" "$0; } else { print $0; }}' | \ 29 | sed -e 's|\s\s*| |g' -e 's|\:$||' -e 's|\:\s*\|\s*$||' 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /utils/remove_oovs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # This script removes lines that contain these OOVs on either the 18 | # third or fourth fields of the line. It is intended to remove arcs 19 | # with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in). 20 | 21 | if ( @ARGV < 1 && @ARGV > 2) { 22 | die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n"; 23 | } 24 | 25 | $unklist = shift @ARGV; 26 | open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n"; 27 | while(){ 28 | @A = split(" ", $_); 29 | @A == 1 || die "Bad line in unknown-symbol list: $_"; 30 | $unk{$A[0]} = 1; 31 | } 32 | 33 | $num_removed = 0; 34 | while(<>){ 35 | @A = split(" ", $_); 36 | if(defined $unk{$A[2]} || defined $unk{$A[3]}) { 37 | $num_removed++; 38 | } else { 39 | print; 40 | } 41 | } 42 | print STDERR "remove_oovs.pl: removed $num_removed lines.\n"; 43 | 44 | -------------------------------------------------------------------------------- /data/label.py: -------------------------------------------------------------------------------- 1 | # Author: Yang Zhang 2 | # Author: Xueyi Wang 3 | # Apache 2.0. 4 | # 2019, CSLT 5 | 6 | import argparse 7 | import numpy as np 8 | 9 | 10 | def prepare_label_data(source_path, dest_path): 11 | print("source_path: ", source_path) 12 | print("dest_path: ", dest_path) 13 | print("start zip...") 14 | print("waiting...") 15 | 16 | utt2spk = np.loadtxt(source_path, dtype=bytes).astype(str) 17 | all_labels = [] 18 | for i in utt2spk: 19 | all_labels.append(i[1].strip('id')) 20 | 21 | spker = [] 22 | for i in all_labels: 23 | if i not in spker: 24 | spker.append(i) 25 | spk = [] 26 | 27 | temp = [] 28 | for i in all_labels: 29 | for j in range(len(spker)): 30 | if spker[j] == i: 31 | temp.append(j) 32 | # print(j) 33 | spk = np.array(temp) 34 | spk = spk.reshape(-1, 1) 35 | 36 | spker = [] 37 | for i in temp: 38 | if i not in spker: 39 | spker.append(i) 40 | 41 | spker = np.array(spker) 42 | 43 | print('spk', spk.shape) 44 | print('spker', spker.shape) 45 | 46 | np.savez(dest_path, spk_list=spk, spker=spker) 47 | 48 | print("prepare_label_data {} is done".format(dest_path)) 49 | 50 | 51 | if __name__ == "__main__": 52 | parser = argparse.ArgumentParser() 53 | parser.add_argument("--source_path", help="source_path of utt2spk") 54 | parser.add_argument("--dest_path", help="destination of spk.npz") 55 | args = parser.parse_args() 56 | 57 | source_path = args.source_path 58 | dest_path = args.dest_path 59 | 60 | prepare_label_data(source_path, dest_path) 61 | -------------------------------------------------------------------------------- /utils/add_disambig.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | # Adds some specified number of disambig symbols to a symbol table. 19 | # Adds these as #1, #2, etc. 20 | # If the --include-zero option is specified, includes an extra one 21 | # #0. 22 | 23 | $include_zero = 0; 24 | if($ARGV[0] eq "--include-zero") { 25 | $include_zero = 1; 26 | shift @ARGV; 27 | } 28 | 29 | if(@ARGV != 2) { 30 | die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt "; 31 | } 32 | 33 | 34 | $input = $ARGV[0]; 35 | $nsyms = $ARGV[1]; 36 | 37 | open(F, "<$input") || die "Opening file $input"; 38 | 39 | while() { 40 | @A = split(" ", $_); 41 | @A == 2 || die "Bad line $_"; 42 | $lastsym = $A[1]; 43 | print; 44 | } 45 | 46 | if(!defined($lastsym)){ 47 | die "Empty symbol file?"; 48 | } 49 | 50 | if($include_zero) { 51 | $lastsym++; 52 | print "#0 $lastsym\n"; 53 | } 54 | 55 | for($n = 1; $n <= $nsyms; $n++) { 56 | $y = $n + $lastsym; 57 | print "#$n $y\n"; 58 | } 59 | -------------------------------------------------------------------------------- /utils/data/perturb_data_dir_speed_3way.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2016 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Apache 2.0 6 | 7 | # This script does the standard 3-way speed perturbing of 8 | # a data directory (it operates on the wav.scp). 9 | 10 | . utils/parse_options.sh 11 | 12 | if [ $# != 2 ]; then 13 | echo "Usage: perturb_data_dir_speed_3way.sh " 14 | echo "Applies standard 3-way speed perturbation using factors of 0.9, 1.0 and 1.1." 15 | echo "e.g.:" 16 | echo " $0 data/train data/train_sp" 17 | echo "Note: if /feats.scp already exists, this will refuse to run." 18 | exit 1 19 | fi 20 | 21 | srcdir=$1 22 | destdir=$2 23 | 24 | if [ ! -f $srcdir/wav.scp ]; then 25 | echo "$0: expected $srcdir/wav.scp to exist" 26 | exit 1 27 | fi 28 | 29 | if [ -f $destdir/feats.scp ]; then 30 | echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)" 31 | exit 1 32 | fi 33 | 34 | echo "$0: making sure the utt2dur file is present in ${srcdir}, because " 35 | echo "... obtaining it after speed-perturbing would be very slow, and" 36 | echo "... you might need it." 37 | utils/data/get_utt2dur.sh ${srcdir} 38 | 39 | utils/data/perturb_data_dir_speed.sh 0.9 ${srcdir} ${destdir}_speed0.9 || exit 1 40 | utils/data/perturb_data_dir_speed.sh 1.1 ${srcdir} ${destdir}_speed1.1 || exit 1 41 | utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1 42 | 43 | rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 44 | 45 | echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir" 46 | utils/validate_data_dir.sh --no-feats $destdir 47 | 48 | -------------------------------------------------------------------------------- /utils/remove_data_links.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This program searches within a directory for soft links that 4 | # appear to be created by 'create_data_link.pl' to a 'storage/' subdirectory, 5 | # and it removes both the soft links and the things they point to. 6 | # for instance, if you have a soft link 7 | # foo/egs/1.1.egs -> storage/2/1.1.egs 8 | # it will remove both foo/egs/storage/2/1.1.egs, and foo/egs/1.1.egs. 9 | 10 | ret=0 11 | 12 | dry_run=false 13 | 14 | if [ "$1" == "--dry-run" ]; then 15 | dry_run=true 16 | shift 17 | fi 18 | 19 | if [ $# == 0 ]; then 20 | echo "Usage: $0 [--dry-run] " 21 | echo "e.g.: $0 exp/nnet4a/egs/" 22 | echo " Removes from any subdirectories of the command-line arguments, soft links that " 23 | echo " appear to have been created by utils/create_data_link.pl, as well as the things" 24 | echo " that those soft links point to. Will typically be called on a directory prior" 25 | echo " to 'rm -r' on that directory, to ensure that data that was distributed on other" 26 | echo " volumes also gets deleted." 27 | echo " With --dry-run, just prints what it would do." 28 | fi 29 | 30 | for dir in $*; do 31 | if [ ! -d $dir ]; then 32 | echo "$0: not a directory: $dir" 33 | ret=1 34 | else 35 | for subdir in $(find $dir -type d); do 36 | if [ -d $subdir/storage ]; then 37 | for x in $(ls $subdir); do 38 | f=$subdir/$x 39 | if [ -L $f ] && [[ $(readlink $f) == storage/* ]]; then 40 | target=$subdir/$(readlink $f) 41 | if $dry_run; then 42 | echo rm $f $target 43 | else 44 | rm $f $target 45 | fi 46 | fi 47 | done 48 | fi 49 | done 50 | fi 51 | done 52 | 53 | exit $ret 54 | -------------------------------------------------------------------------------- /utils/nnet/gen_hamm_mat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Brno University of Technology (author: Karel Vesely) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # ./gen_hamm_mat.py 19 | # script generates diagonal matrix with hamming window values 20 | 21 | from math import * 22 | import sys 23 | 24 | 25 | from optparse import OptionParser 26 | 27 | parser = OptionParser() 28 | parser.add_option('--fea-dim', dest='dim', help='feature dimension') 29 | parser.add_option('--splice', dest='splice', help='applied splice value') 30 | (options, args) = parser.parse_args() 31 | 32 | if(options.dim == None): 33 | parser.print_help() 34 | sys.exit(1) 35 | 36 | dim=int(options.dim) 37 | splice=int(options.splice) 38 | 39 | 40 | #generate the diagonal matrix with hammings 41 | M_2PI = 6.283185307179586476925286766559005 42 | 43 | dim_mat=(2*splice+1)*dim 44 | timeContext=2*splice+1 45 | print '[' 46 | for row in range(dim_mat): 47 | for col in range(dim_mat): 48 | if col!=row: 49 | print '0', 50 | else: 51 | i=int(row/dim) 52 | print str(0.54 - 0.46*cos((M_2PI * i) / (timeContext-1))), 53 | print 54 | 55 | print ']' 56 | 57 | 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # vscode 2 | .vscode/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # Environments 88 | .env 89 | .venv 90 | env/ 91 | venv/ 92 | ENV/ 93 | env.bak/ 94 | venv.bak/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | -------------------------------------------------------------------------------- /utils/nnet/gen_splice.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Brno University of Technology (author: Karel Vesely) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # ./gen_splice.py 19 | # generates Component 20 | 21 | from math import * 22 | import sys 23 | 24 | 25 | from optparse import OptionParser 26 | 27 | parser = OptionParser() 28 | parser.add_option('--fea-dim', dest='dim_in', help='feature dimension') 29 | parser.add_option('--splice', dest='splice', help='number of frames to concatenate with the central frame') 30 | parser.add_option('--splice-step', dest='splice_step', help='splicing step (frames dont need to be consecutive, --splice 3 --splice-step 2 will select offsets: -6 -4 -2 0 2 4 6)', default='1' ) 31 | (options, args) = parser.parse_args() 32 | 33 | if(options.dim_in == None): 34 | parser.print_help() 35 | sys.exit(1) 36 | 37 | dim_in=int(options.dim_in) 38 | splice=int(options.splice) 39 | splice_step=int(options.splice_step) 40 | 41 | dim_out=(2*splice+1)*dim_in 42 | 43 | print '', dim_out, dim_in 44 | print '[', 45 | 46 | splice_vec = range(-splice*splice_step, splice*splice_step+1, splice_step) 47 | for idx in range(len(splice_vec)): 48 | print splice_vec[idx], 49 | 50 | print ']' 51 | 52 | -------------------------------------------------------------------------------- /utils/data/get_frame_shift.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2016 Johns Hopkins University (author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # This script takes as input a data directory, such as data/train/, preferably 7 | # with utt2dur file already existing (or the utt2dur file will be created if 8 | # not), and it attempts to work out the approximate frame shift by comparing the 9 | # utt2dur with the output of feat-to-len on the feats.scp. It prints it out. 10 | # if the shift is very close to, but above, 0.01 (the normal frame shift) it 11 | # rounds it down. 12 | 13 | . utils/parse_options.sh 14 | . ./path.sh 15 | 16 | if [ $# != 1 ]; then 17 | echo "Usage: $0 " 18 | echo "e.g.:" 19 | echo " $0 data/train" 20 | echo "This script prints the frame-shift (e.g. 0.01) to the standard out." 21 | echo "If does not contain utt2dur, this script may call utils/data/get_utt2dur.sh," 22 | echo "which will require write permission to " 23 | exit 1 24 | fi 25 | 26 | export LC_ALL=C 27 | 28 | dir=$1 29 | 30 | 31 | if [ ! -s $dir/utt2dur ]; then 32 | if [ ! -e $dir/wav.scp ] && [ ! -s $dir/segments ]; then 33 | echo "$0: neither $dir/wav.scp nor $dir/segments exist; assuming a frame shift of 0.01." 1>&2 34 | echo 0.01 35 | exit 0 36 | fi 37 | echo "$0: $dir/utt2dur does not exist: creating it" 1>&2 38 | utils/data/get_utt2dur.sh $dir 1>&2 39 | fi 40 | 41 | if [ ! -f $dir/feats.scp ]; then 42 | echo "$0: $dir/feats.scp does not exist" 1>&2 43 | exit 1 44 | fi 45 | 46 | temp=$(mktemp /tmp/tmp.XXXX) 47 | 48 | feat-to-len scp:$dir/feats.scp ark,t:- | head -n 10 > $temp 49 | 50 | if [ -z $temp ]; then 51 | echo "$0: error running feat-to-len" 1>&2 52 | exit 1 53 | fi 54 | 55 | head -n 10 $dir/utt2dur | paste - $temp | \ 56 | awk '{ dur += $2; frames += $4; } END { shift = dur / frames; if (shift > 0.01 && shift < 0.0102) shift = 0.01; print shift; }' || exit 1; 57 | 58 | rm $temp 59 | 60 | exit 0 61 | -------------------------------------------------------------------------------- /model/model_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Author: Yang Zhang 4 | # Mail: zyziszy@foxmail.com 5 | # Apache 2.0. 6 | 7 | import math 8 | import numpy as np 9 | import tensorflow as tf 10 | from scipy import stats 11 | import random 12 | 13 | def get_skew_and_kurt(data): 14 | '''calculate skew and kurt''' 15 | data = np.array(data) 16 | data = data.transpose() 17 | print(data.shape) # test 18 | skew = [] 19 | kurt = [] 20 | for i in data: 21 | # print(len(i)) 22 | skew.append(stats.skew(i)) 23 | kurt.append(stats.kurtosis(i)) 24 | 25 | skew_mean = sum(skew)/len(skew) 26 | kurt_mean = sum(kurt)/len(kurt) 27 | 28 | # print('skew:', skew_mean) # test 29 | # print('kurt:', kurt_mean) # test 30 | return skew_mean, kurt_mean 31 | 32 | def shuffle_data_table(data, table): 33 | '''random shuffle data and table''' 34 | index = [i for i in range(len(data))] 35 | random.shuffle(index) 36 | data = data[index] 37 | table = table[index] 38 | return data, table 39 | 40 | def shuffle_data(data): 41 | '''random shuffle data''' 42 | index = [i for i in range(len(data))] 43 | random.shuffle(index) 44 | data = data[index] 45 | return data 46 | 47 | def MLP_net(input, layer_name, n_hidden, acitvate="elu"): 48 | '''tensorflow-layer''' 49 | w_init = tf.contrib.layers.variance_scaling_initializer() 50 | b_init = tf.constant_initializer(0.) 51 | 52 | w_str = 'w_'+str(layer_name) 53 | b_str = 'b_'+str(layer_name) 54 | 55 | w = tf.get_variable( 56 | w_str, [input.get_shape()[1], n_hidden], initializer=w_init) 57 | b = tf.get_variable(b_str, [n_hidden], initializer=b_init) 58 | 59 | output = tf.matmul(input, w) + b 60 | 61 | if acitvate == 'tanh': 62 | output = tf.nn.tanh(output) 63 | elif acitvate == 'sigmoid': 64 | output = tf.nn.sigmoid(output) 65 | else: 66 | output = tf.nn.elu(output) 67 | return output 68 | -------------------------------------------------------------------------------- /utils/ln.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use File::Spec; 3 | 4 | if ( @ARGV < 2 ) { 5 | print STDERR "usage: ln.pl input1 input2 dest-dir\n" . 6 | "This script does a soft link of input1, input2, etc." . 7 | "to dest-dir, using relative links where possible\n" . 8 | "Note: input-n and dest-dir may both be absolute pathnames,\n" . 9 | "or relative pathnames, relative to the current directlory.\n"; 10 | exit(1); 11 | } 12 | 13 | $dir = pop @ARGV; 14 | if ( ! -d $dir ) { 15 | print STDERR "ln.pl: last argument must be a directory ($dir is not a directory)\n"; 16 | exit(1); 17 | } 18 | 19 | $ans = 1; # true. 20 | 21 | $absdir = File::Spec->rel2abs($dir); # Get $dir as abs path. 22 | defined $absdir || die "No such directory $dir"; 23 | foreach $file (@ARGV) { 24 | $absfile = File::Spec->rel2abs($file); # Get $file as abs path. 25 | defined $absfile || die "No such file or directory: $file"; 26 | @absdir_split = split("/", $absdir); 27 | @absfile_split = split("/", $absfile); 28 | 29 | $newfile = $absdir . "/" . $absfile_split[$#absfile_split]; # we'll use this 30 | # as the destination in the link command. 31 | $num_removed = 0; 32 | while (@absdir_split > 0 && $absdir_split[0] eq $absfile_split[0]) { 33 | shift @absdir_split; 34 | shift @absfile_split; 35 | $num_removed++; 36 | } 37 | if (-l $newfile) { # newfile is already a link -> safe to delete it. 38 | unlink($newfile); # "unlink" just means delete. 39 | } 40 | if ($num_removed == 0) { # will use absolute pathnames. 41 | $oldfile = "/" . join("/", @absfile_split); 42 | $ret = symlink($oldfile, $newfile); 43 | } else { 44 | $num_dots = @absdir_split; 45 | $oldfile = join("/", @absfile_split); 46 | for ($n = 0; $n < $num_dots; $n++) { 47 | $oldfile = "../" . $oldfile; 48 | } 49 | $ret = symlink($oldfile, $newfile); 50 | } 51 | $ans = $ans && $ret; 52 | if (! $ret) { 53 | print STDERR "Error linking $oldfile to $newfile\n"; 54 | } 55 | } 56 | 57 | exit ($ans == 1 ? 0 : 1); 58 | 59 | -------------------------------------------------------------------------------- /local/plda_scoring.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2015 David Snyder 3 | # 2019 Lantian Li 4 | # Apache 2.0. 5 | # 6 | # This script trains PLDA models and does scoring. 7 | 8 | simple_length_norm=true # If true, replace the default length normalization 9 | # performed in PLDA by an alternative that 10 | # normalizes the length of the iVectors to be equal 11 | # to the square root of the iVector dimension. 12 | 13 | #echo "$0 $@" # Print the command line for logging 14 | 15 | if [ -f path.sh ]; then . ./path.sh; fi 16 | . parse_options.sh || exit 1; 17 | 18 | if [ $# != 5 ]; then 19 | echo "Usage: $0 " 20 | fi 21 | 22 | plda_data_dir=$1 23 | enroll_data_dir=$2 24 | test_data_dir=$3 25 | trials=$4 26 | scores_dir=$5 27 | 28 | mkdir -p $plda_data_dir/log 29 | run.pl $plda_data_dir/log/compute_mean.log \ 30 | ivector-normalize-length ark:${plda_data_dir}/xvector.ark \ 31 | ark:- \| ivector-mean ark:- ${plda_data_dir}/mean.vec || exit 1; 32 | run.pl $plda_data_dir/log/plda.log \ 33 | ivector-compute-plda ark:$plda_data_dir/spk2utt \ 34 | "ark:ivector-normalize-length ark:${plda_data_dir}/xvector.ark ark:- |" \ 35 | $plda_data_dir/plda || exit 1; 36 | 37 | mkdir -p $scores_dir/log 38 | run.pl $scores_dir/log/plda_scoring.log \ 39 | ivector-plda-scoring --normalize-length=true \ 40 | --simple-length-normalization=$simple_length_norm \ 41 | --num-utts=ark:${enroll_data_dir}/num_utts.ark \ 42 | "ivector-copy-plda --smoothing=0.0 ${plda_data_dir}/plda - |" \ 43 | "ark:ivector-normalize-length ark:${enroll_data_dir}/xvector.ark ark:- | ivector-subtract-global-mean ${plda_data_dir}/mean.vec ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 44 | "ark:ivector-normalize-length ark:${test_data_dir}/xvector.ark ark:- | ivector-subtract-global-mean ${plda_data_dir}/mean.vec ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 45 | "cat '$trials' | cut -d\ --fields=1,2 |" $scores_dir/plda_scores || exit 1; 46 | 47 | rm $plda_data_dir/{plda,mean.vec} 48 | -------------------------------------------------------------------------------- /data/zip.py: -------------------------------------------------------------------------------- 1 | # Author: Yang Zhang 2 | # Author: Xueyi Wang 3 | # Apache 2.0. 4 | # 2019, CSLT 5 | 6 | import argparse 7 | import os 8 | import numpy as np 9 | 10 | 11 | def ark2npz(source_path, dest_path): 12 | print("source_path: ", source_path) 13 | print("dest_path: ", dest_path) 14 | print("start zip...") 15 | print("waiting...") 16 | 17 | count = 0 18 | labels = [] 19 | vector = [] 20 | with open(source_path) as f: 21 | lines = f.readlines() 22 | for line in lines: 23 | count += 1 24 | # print("load {} success!".format(count)) 25 | line.strip('\n') 26 | vector_string = "" 27 | id = "" 28 | is_vector = False 29 | for c in line: 30 | if c == '[': 31 | is_vector = True 32 | if is_vector: 33 | if c != '[' and c != ']': 34 | vector_string += c 35 | if (not is_vector) and c != " ": 36 | id += c 37 | labels.append(id) 38 | num_list = vector_string.split(' ') 39 | num_list.pop() 40 | del(num_list[0]) 41 | num_list = list(map(eval, num_list)) 42 | 43 | vector.append(num_list) 44 | labels = np.array(labels, dtype=" G.txt" 29 | } 30 | 31 | $totcount = 0; 32 | $nl = 0; 33 | while (<>) { 34 | @A = split(" ", $_); 35 | foreach $a (@A) { 36 | $count{$a}++; 37 | $totcount++; 38 | } 39 | $nl++; 40 | $totcount++; # Treat end-of-sentence as a symbol for purposes of 41 | # $totcount, so the grammar is properly stochastic. This doesn't 42 | # become , it just becomes the final-prob. 43 | } 44 | 45 | foreach $a (keys %count) { 46 | $prob = $count{$a} / $totcount; 47 | $cost = -log($prob); # Negated natural-log probs. 48 | print "0\t0\t$a\t$a\t$cost\n"; 49 | } 50 | # Zero final-cost. 51 | $final_prob = $nl / $totcount; 52 | $final_cost = -log($final_prob); 53 | print "0\t$final_cost\n"; 54 | 55 | -------------------------------------------------------------------------------- /utils/data/remove_dup_utts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Remove excess utterances once they appear more than a specified 4 | # number of times with the same transcription, in a data set. 5 | # E.g. useful for removing excess "uh-huh" from training. 6 | 7 | if [ $# != 3 ]; then 8 | echo "Usage: remove_dup_utts.sh max-count " 9 | echo "e.g.: remove_dup_utts.sh 10 data/train data/train_nodup" 10 | echo "This script is used to filter out utterances that have from over-represented" 11 | echo "transcriptions (such as 'uh-huh'), by limiting the number of repetitions of" 12 | echo "any given word-sequence to a specified value. It's often used to get" 13 | echo "subsets for early stages of training." 14 | exit 1; 15 | fi 16 | 17 | maxcount=$1 18 | srcdir=$2 19 | destdir=$3 20 | mkdir -p $destdir 21 | 22 | [ ! -f $srcdir/text ] && echo "$0: Invalid input directory $srcdir" && exit 1; 23 | 24 | ! mkdir -p $destdir && echo "$0: could not create directory $destdir" && exit 1; 25 | 26 | ! [ "$maxcount" -gt 1 ] && echo "$0: invalid max-count '$maxcount'" && exit 1; 27 | 28 | cp $srcdir/* $destdir 29 | cat $srcdir/text | \ 30 | perl -e ' 31 | $maxcount = shift @ARGV; 32 | @all = (); 33 | $p1 = 103349; $p2 = 71147; $k = 0; 34 | sub random { # our own random number generator: predictable. 35 | $k = ($k + $p1) % $p2; 36 | return ($k / $p2); 37 | } 38 | while(<>) { 39 | push @all, $_; 40 | @A = split(" ", $_); 41 | shift @A; 42 | $text = join(" ", @A); 43 | $count{$text} ++; 44 | } 45 | foreach $line (@all) { 46 | @A = split(" ", $line); 47 | shift @A; 48 | $text = join(" ", @A); 49 | $n = $count{$text}; 50 | if ($n < $maxcount || random() < ($maxcount / $n)) { 51 | print $line; 52 | } 53 | }' $maxcount >$destdir/text 54 | 55 | echo "Reduced number of utterances from `cat $srcdir/text | wc -l` to `cat $destdir/text | wc -l`" 56 | 57 | echo "Using fix_data_dir.sh to reconcile the other files." 58 | utils/fix_data_dir.sh $destdir 59 | rm -r $destdir/.backup 60 | 61 | exit 0 62 | -------------------------------------------------------------------------------- /utils/int2sym.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) 3 | # Apache 2.0. 4 | 5 | undef $field_begin; 6 | undef $field_end; 7 | 8 | 9 | if ($ARGV[0] eq "-f") { 10 | shift @ARGV; 11 | $field_spec = shift @ARGV; 12 | if ($field_spec =~ m/^\d+$/) { 13 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1; 14 | } 15 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) 16 | if ($1 ne "") { 17 | $field_begin = $1 - 1; # Change to zero-based indexing. 18 | } 19 | if ($2 ne "") { 20 | $field_end = $2 - 1; # Change to zero-based indexing. 21 | } 22 | } 23 | if (!defined $field_begin && !defined $field_end) { 24 | die "Bad argument to -f option: $field_spec"; 25 | } 26 | } 27 | $symtab = shift @ARGV; 28 | if(!defined $symtab) { 29 | print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" . 30 | "options: [-f (|-)]\n" . 31 | "e.g.: -f 2, or -f 3-4\n"; 32 | exit(1); 33 | } 34 | 35 | open(F, "<$symtab") || die "Error opening symbol table file $symtab"; 36 | while() { 37 | @A = split(" ", $_); 38 | @A == 2 || die "bad line in symbol table file: $_"; 39 | $int2sym{$A[1]} = $A[0]; 40 | } 41 | 42 | sub int2sym { 43 | my $a = shift @_; 44 | my $pos = shift @_; 45 | if($a !~ m:^\d+$:) { # not all digits.. 46 | $pos1 = $pos+1; # make it one-based. 47 | die "int2sym.pl: found noninteger token $a [in position $pos1]\n"; 48 | } 49 | $s = $int2sym{$a}; 50 | if(!defined ($s)) { 51 | die "int2sym.pl: integer $a not in symbol table $symtab."; 52 | } 53 | return $s; 54 | } 55 | 56 | $error = 0; 57 | while (<>) { 58 | @A = split(" ", $_); 59 | for ($pos = 0; $pos <= $#A; $pos++) { 60 | $a = $A[$pos]; 61 | if ( (!defined $field_begin || $pos >= $field_begin) 62 | && (!defined $field_end || $pos <= $field_end)) { 63 | $a = int2sym($a, $pos); 64 | } 65 | print $a . " "; 66 | } 67 | print "\n"; 68 | } 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /utils/scoring/wer_report.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2015 Johns Hopkins University (author: Jan Trmal ) 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | # This script reads per-utt table generated for example during scoring 19 | # and outpus the WER similar to the format the compute-wer utility 20 | # or the utils/best_wer.pl produces 21 | # i.e. from table containing lines in this format 22 | # SUM raw 23344 243230 176178 46771 9975 20281 77027 16463 23 | # produces something output like this 24 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] 25 | # NB: if the STDIN stream will contain more of the SUM raw entries, 26 | # the best one will be found and printed 27 | # 28 | # If the script is called with parameters, it uses them pro provide 29 | # a description of the output 30 | # i.e. 31 | # cat per-spk-report | utils/scoring/wer_report.pl Full set 32 | # the following output will be produced 33 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] Full set 34 | 35 | 36 | while () { 37 | if ( m:SUM\s+raw:) { 38 | @F = split; 39 | if ((!defined $wer) || ($wer > $F[8])) { 40 | $corr=$F[4]; 41 | $sub=$F[5]; 42 | $ins=$F[6]; 43 | $del=$F[7]; 44 | $wer=$F[8]; 45 | $words=$F[3]; 46 | } 47 | } 48 | } 49 | 50 | if (defined $wer) { 51 | $wer_str = sprintf("%.2f", (100.0 * $wer) / $words); 52 | print "%WER $wer_str [ $wer / $words, $ins ins, $del del, $sub sub ]"; 53 | print " " . join(" ", @ARGV) if @ARGV > 0; 54 | print "\n"; 55 | } 56 | -------------------------------------------------------------------------------- /utils/nnet/gen_dct_mat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2012 Brno University of Technology (author: Karel Vesely) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # ./gen_dct_mat.py 19 | # script generates matrix with DCT transform, which is sparse 20 | # and takes into account that data-layout is along frequency axis, 21 | # while DCT is done along temporal axis. 22 | 23 | from math import * 24 | import sys 25 | 26 | 27 | from optparse import OptionParser 28 | 29 | parser = OptionParser() 30 | parser.add_option('--fea-dim', dest='dim', help='feature dimension') 31 | parser.add_option('--splice', dest='splice', help='applied splice value') 32 | parser.add_option('--dct-basis', dest='dct_basis', help='number of DCT basis') 33 | (options, args) = parser.parse_args() 34 | 35 | if(options.dim == None): 36 | parser.print_help() 37 | sys.exit(1) 38 | 39 | dim=int(options.dim) 40 | splice=int(options.splice) 41 | dct_basis=int(options.dct_basis) 42 | 43 | timeContext=2*splice+1 44 | 45 | 46 | #generate the DCT matrix 47 | M_PI = 3.1415926535897932384626433832795 48 | M_SQRT2 = 1.4142135623730950488016887 49 | 50 | 51 | #generate sparse DCT matrix 52 | print '[' 53 | for k in range(dct_basis): 54 | for m in range(dim): 55 | for n in range(timeContext): 56 | if(n==0): 57 | print m*'0 ', 58 | else: 59 | print (dim-1)*'0 ', 60 | print str(sqrt(2.0/timeContext)*cos(M_PI/timeContext*k*(n+0.5))), 61 | if(n==timeContext-1): 62 | print (dim-m-1)*'0 ', 63 | print 64 | print 65 | 66 | print ']' 67 | 68 | -------------------------------------------------------------------------------- /local/pca_plda_scoring.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2015 David Snyder 3 | # Apache 2.0. 4 | # 5 | # This script trains PLDA models and does scoring. 6 | 7 | use_existing_models=false 8 | pca_dim=150 9 | simple_length_norm=false # If true, replace the default length normalization 10 | # performed in PLDA by an alternative that 11 | # normalizes the length of the iVectors to be equal 12 | # to the square root of the iVector dimension. 13 | 14 | #echo "$0 $@" # Print the command line for logging 15 | 16 | if [ -f path.sh ]; then . ./path.sh; fi 17 | . parse_options.sh || exit 1; 18 | 19 | if [ $# != 5 ]; then 20 | echo "Usage: $0 " 21 | fi 22 | 23 | plda_data_dir=$1 24 | enroll_data_dir=$2 25 | test_data_dir=$3 26 | trials=$4 27 | scores_dir=$5 28 | 29 | mkdir -p $plda_data_dir/log 30 | run.pl $plda_data_dir/log/compute_mean.log \ 31 | ivector-mean ark:$plda_data_dir/xvector.ark \ 32 | $plda_data_dir/mean.vec || exit 1; 33 | 34 | run.pl $plda_data_dir/log/pca.log \ 35 | est-pca --dim=$pca_dim --read-vectors=true --normalize-mean=true \ 36 | "ark:ivector-subtract-global-mean ark:$plda_data_dir/xvector.ark ark:- |" \ 37 | $plda_data_dir/transform_pca.mat || exit 1; 38 | 39 | run.pl $plda_data_dir/log/pca_plda.log \ 40 | ivector-compute-plda ark:$plda_data_dir/spk2utt \ 41 | "ark:ivector-subtract-global-mean ark:$plda_data_dir/xvector.ark ark:- | transform-vec $plda_data_dir/transform_pca.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 42 | $plda_data_dir/pca_plda || exit 1; 43 | 44 | mkdir -p $scores_dir/log 45 | run.pl $scores_dir/log/pca_plda_scoring.log \ 46 | ivector-plda-scoring --normalize-length=true \ 47 | --num-utts=ark:${enroll_data_dir}/num_utts.ark \ 48 | "ivector-copy-plda --smoothing=0.0 ${plda_data_dir}/pca_plda - |" \ 49 | "ark:ivector-subtract-global-mean $plda_data_dir/mean.vec ark:$enroll_data_dir/xvector.ark ark:- | transform-vec $plda_data_dir/transform_pca.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 50 | "ark:ivector-subtract-global-mean $plda_data_dir/mean.vec ark:$test_data_dir/xvector.ark ark:- | transform-vec $plda_data_dir/transform_pca.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 51 | "cat '$trials' | cut -d\ --fields=1,2 |" $scores_dir/pca_plda_scores || exit 1; 52 | 53 | rm $plda_data_dir/{transform_pca.mat,pca_plda,mean.vec} 54 | -------------------------------------------------------------------------------- /utils/data/perturb_data_dir_volume.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2016 Johns Hopkins University (author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # This script operates on a data directory, such as in data/train/, and modifies 7 | # the wav.scp to perturb the volume (typically useful for training data when 8 | # using systems that don't have cepstral mean normalization). 9 | 10 | . utils/parse_options.sh 11 | 12 | if [ $# != 1 ]; then 13 | echo "Usage: $0 " 14 | echo "e.g.:" 15 | echo " $0 data/train" 16 | exit 1 17 | fi 18 | 19 | export LC_ALL=C 20 | 21 | data=$1 22 | 23 | if [ ! -f $data/wav.scp ]; then 24 | echo "$0: Expected $data/wav.scp to exist" 25 | exit 1 26 | fi 27 | 28 | if grep -q "sox --vol" $data/wav.scp; then 29 | echo "$0: It looks like the data was already volume perturbed. Not doing anything." 30 | exit 0 31 | fi 32 | 33 | cat $data/wav.scp | python -c " 34 | import sys, os, subprocess, re, random 35 | random.seed(0) 36 | scale_low = 1.0/8 37 | scale_high = 2.0 38 | for line in sys.stdin.readlines(): 39 | if len(line.strip()) == 0: 40 | continue 41 | # Handle three cases of rxfilenames appropriately; 'input piped command', 'file offset' and 'filename' 42 | if line.strip()[-1] == '|': 43 | print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) 44 | elif re.search(':[0-9]+$', line.strip()) is not None: 45 | parts = line.split() 46 | print '{id} wav-copy {wav} - | sox --vol {vol} -t wav - -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high)) 47 | else: 48 | parts = line.split() 49 | print '{id} sox --vol {vol} -t wav {wav} -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high)) 50 | " > $data/wav.scp_scaled || exit 1; 51 | 52 | len1=$(cat $data/wav.scp | wc -l) 53 | len2=$(cat $data/wav.scp_scaled | wc -l) 54 | if [ "$len1" != "$len2" ]; then 55 | echo "$0: error detected: number of lines changed $len1 vs $len2"; 56 | exit 1 57 | fi 58 | 59 | mv $data/wav.scp_scaled $data/wav.scp 60 | 61 | if [ -f $data/feats.scp ]; then 62 | echo "$0: $data/feats.scp exists; moving it to $data/.backup/ as it wouldn't be valid any more." 63 | mkdir -p $data/.backup/ 64 | mv $data/feats.scp $data/.backup/ 65 | fi 66 | 67 | echo "$0: added volume perturbation to the data in $data" 68 | exit 0 69 | 70 | -------------------------------------------------------------------------------- /local/lda_plda_scoring.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2015 David Snyder 3 | # 2019 Lantian Li 4 | # Apache 2.0. 5 | # 6 | # This script trains PLDA models and does scoring. 7 | 8 | lda_dim=150 9 | covar_factor=0.1 10 | simple_length_norm=false # If true, replace the default length normalization 11 | # performed in PLDA by an alternative that 12 | # normalizes the length of the iVectors to be equal 13 | # to the square root of the iVector dimension. 14 | 15 | #echo "$0 $@" # Print the command line for logging 16 | 17 | if [ -f path.sh ]; then . ./path.sh; fi 18 | . parse_options.sh || exit 1; 19 | 20 | if [ $# != 5 ]; then 21 | echo "Usage: $0 " 22 | fi 23 | 24 | plda_data_dir=$1 25 | enroll_data_dir=$2 26 | test_data_dir=$3 27 | trials=$4 28 | scores_dir=$5 29 | 30 | mkdir -p $plda_data_dir/log 31 | run.pl $plda_data_dir/log/compute_mean.log \ 32 | ivector-mean ark:$plda_data_dir/xvector.ark \ 33 | $plda_data_dir/mean.vec || exit 1; 34 | 35 | run.pl $plda_data_dir/log/lda.log \ 36 | ivector-compute-lda --total-covariance-factor=$covar_factor --dim=$lda_dim \ 37 | "ark:ivector-subtract-global-mean ark:$plda_data_dir/xvector.ark ark:- |" \ 38 | ark:$plda_data_dir/utt2spk $plda_data_dir/transform_lda.mat || exit 1; 39 | 40 | run.pl $plda_data_dir/log/lda_plda.log \ 41 | ivector-compute-plda ark:$plda_data_dir/spk2utt \ 42 | "ark:ivector-subtract-global-mean ark:$plda_data_dir/xvector.ark ark:- | transform-vec $plda_data_dir/transform_lda.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 43 | $plda_data_dir/lda_plda || exit 1; 44 | 45 | mkdir -p $scores_dir/log 46 | run.pl $scores_dir/log/lda_plda_scoring.log \ 47 | ivector-plda-scoring --normalize-length=true \ 48 | --num-utts=ark:${enroll_data_dir}/num_utts.ark \ 49 | "ivector-copy-plda --smoothing=0.0 ${plda_data_dir}/lda_plda - |" \ 50 | "ark:ivector-subtract-global-mean $plda_data_dir/mean.vec ark:$enroll_data_dir/xvector.ark ark:- | transform-vec $plda_data_dir/transform_lda.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 51 | "ark:ivector-subtract-global-mean $plda_data_dir/mean.vec ark:$test_data_dir/xvector.ark ark:- | transform-vec $plda_data_dir/transform_lda.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 52 | "cat '$trials' | cut -d\ --fields=1,2 |" $scores_dir/lda_plda_scores || exit 1; 53 | 54 | rm $plda_data_dir/{transform_lda.mat,lda_plda,mean.vec} 55 | -------------------------------------------------------------------------------- /utils/find_arpa_oovs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2011 Microsoft Corporation 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | if ( @ARGV < 1 && @ARGV > 2) { 19 | die "Usage: find_arpa_oovs.pl words.txt [lm.arpa]\n"; 20 | # This program finds words in the arpa file that are not symbols 21 | # in the OpenFst-format symbol table words.txt. It prints them 22 | # on the standard output, one per line. 23 | } 24 | 25 | $symtab = shift @ARGV; 26 | open(S, "<$symtab") || die "Failed opening symbol table file $symtab\n"; 27 | while(){ 28 | @A = split(" ", $_); 29 | @A == 2 || die "Bad line in symbol table file: $_"; 30 | $seen{$A[0]} = 1; 31 | } 32 | 33 | $found_data=0; 34 | $curgram=0; 35 | while(<>) { # Find the \data\ marker. 36 | if(m:^\\data\\\s*$:) { $found_data=1; last; } 37 | } 38 | 39 | if ($found_data==0) { 40 | print STDERR "find_arpa_oovs.pl: found no \\data\\ marker in the ARPA input.\n"; 41 | exit(1); 42 | } 43 | 44 | while(<>) { 45 | if(m/^\\(\d+)\-grams:\s*$/) { 46 | $curgram = $1; 47 | if($curgram > 1) { 48 | last; # This is an optimization as we can get the vocab from the 1-grams 49 | } 50 | } elsif($curgram > 0) { 51 | @A = split(" ", $_); 52 | if(@A > 1) { 53 | shift @A; 54 | for($n=0;$n<$curgram;$n++) { 55 | $word = $A[$n]; 56 | if(!defined $word) { print STDERR "Unusual line $_ (line $.) in arpa file.\n"; } 57 | $in_arpa{$word} = 1; 58 | } 59 | } else { 60 | if(@A > 0 && $A[0] !~ m:\\end\\:) { 61 | print STDERR "Unusual line $_ (line $.) in arpa file\n"; 62 | } 63 | } 64 | } 65 | } 66 | 67 | foreach $w (keys %in_arpa) { 68 | if(!defined $seen{$w} && $w ne "" && $w ne "") { 69 | print "$w\n"; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /utils/prepare_online_nnet_dist_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Johns Hopkins University (Author: Vijayaditya Peddinti) 4 | # Guoguo Chen 5 | # Apache 2.0 6 | # Script to prepare the distribution from the online-nnet build 7 | 8 | other_files= #other files to be included in the build 9 | other_dirs= 10 | conf_files="ivector_extractor.conf mfcc.conf online_cmvn.conf online_nnet2_decoding.conf splice.conf" 11 | ivec_extractor_files="final.dubm final.ie final.mat global_cmvn.stats online_cmvn.conf splice_opts" 12 | 13 | echo "$0 $@" # Print the command line for logging 14 | [ -f path.sh ] && . ./path.sh; 15 | . parse_options.sh || exit 1; 16 | 17 | if [ $# -ne 3 ]; then 18 | echo "Usage: $0 " 19 | echo "e.g.: $0 data/lang exp/nnet2_online/nnet_ms_a_online tedlium.tgz" 20 | exit 1; 21 | fi 22 | 23 | lang=$1 24 | modeldir=$2 25 | tgzfile=$3 26 | 27 | for f in $lang/phones.txt $other_files; do 28 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 29 | done 30 | 31 | build_files= 32 | for d in $modeldir/conf $modeldir/ivector_extractor; do 33 | [ ! -d $d ] && echo "$0: no such directory $d" && exit 1; 34 | done 35 | 36 | for f in $ivec_extractor_files; do 37 | f=$modeldir/ivector_extractor/$f 38 | [ ! -f $f ] && echo "$0: no such file $f" && exit 1; 39 | build_files="$build_files $f" 40 | done 41 | 42 | # Makes a copy of the original config files, as we will change the absolute path 43 | # to relative. 44 | rm -rf $modeldir/conf_abs_path 45 | mkdir -p $modeldir/conf_abs_path 46 | cp -r $modeldir/conf/* $modeldir/conf_abs_path 47 | 48 | for f in $conf_files; do 49 | [ ! -f $modeldir/conf/$f ] && \ 50 | echo "$0: no such file $modeldir/conf/$f" && exit 1; 51 | # Changes absolute path to relative path. The path entries in the config file 52 | # are generated by scripts and it is safe to assume that they have structure: 53 | # variable=path 54 | cat $modeldir/conf_abs_path/$f | perl -e ' 55 | use File::Spec; 56 | while() { 57 | chomp; 58 | @col = split("=", $_); 59 | if (@col == 2 && (-f $col[1])) { 60 | $col[1] = File::Spec->abs2rel($col[1]); 61 | print "$col[0]=$col[1]\n"; 62 | } else { 63 | print "$_\n"; 64 | } 65 | } 66 | ' > $modeldir/conf/$f 67 | build_files="$build_files $modeldir/conf/$f" 68 | done 69 | 70 | tar -hczvf $tgzfile $lang $build_files $other_files $other_dirs \ 71 | $modeldir/final.mdl $modeldir/tree >/dev/null 72 | 73 | # Changes back to absolute path. 74 | rm -rf $modeldir/conf 75 | mv $modeldir/conf_abs_path $modeldir/conf 76 | -------------------------------------------------------------------------------- /utils/format_lm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -u 2 | 3 | # Copyright 2012 Arnab Ghoshal 4 | # Copyright 2010-2011 Microsoft Corporation 5 | 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 15 | # MERCHANTABLITY OR NON-INFRINGEMENT. 16 | # See the Apache 2 License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | set -o errexit 20 | 21 | if [ $# -ne 4 ]; then 22 | echo "Usage: $0 " 23 | echo "E.g.: $0 data/lang data/local/lm/foo.kn.gz data/local/dict/lexicon.txt data/lang_test" 24 | echo "Convert ARPA-format language models to FSTs."; 25 | exit 1; 26 | fi 27 | 28 | lang_dir=$1 29 | lm=$2 30 | lexicon=$3 31 | out_dir=$4 32 | mkdir -p $out_dir 33 | 34 | [ -f ./path.sh ] && . ./path.sh 35 | 36 | echo "Converting '$lm' to FST" 37 | 38 | for f in phones.txt words.txt topo L.fst L_disambig.fst phones/ oov.int oov.txt; do 39 | cp -r $lang_dir/$f $out_dir 40 | done 41 | 42 | lm_base=$(basename $lm '.gz') 43 | gunzip -c $lm \ 44 | | arpa2fst --disambig-symbol=#0 \ 45 | --read-symbol-table=$out_dir/words.txt - $out_dir/G.fst 46 | set +e 47 | fstisstochastic $out_dir/G.fst 48 | set -e 49 | # The output is like: 50 | # 9.14233e-05 -0.259833 51 | # we do expect the first of these 2 numbers to be close to zero (the second is 52 | # nonzero because the backoff weights make the states sum to >1). 53 | 54 | # Everything below is only for diagnostic. 55 | # Checking that G has no cycles with empty words on them (e.g. , ); 56 | # this might cause determinization failure of CLG. 57 | # #0 is treated as an empty word. 58 | mkdir -p $out_dir/tmpdir.g 59 | awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }} 60 | END{print "0 0 #0 #0"; print "0";}' \ 61 | < "$lexicon" > $out_dir/tmpdir.g/select_empty.fst.txt 62 | 63 | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \ 64 | $out_dir/tmpdir.g/select_empty.fst.txt \ 65 | | fstarcsort --sort_type=olabel \ 66 | | fstcompose - $out_dir/G.fst > $out_dir/tmpdir.g/empty_words.fst 67 | 68 | fstinfo $out_dir/tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' \ 69 | && echo "Language model has cycles with empty words" && exit 1 70 | 71 | rm -r $out_dir/tmpdir.g 72 | 73 | 74 | echo "Succeeded in formatting LM: '$lm'" 75 | -------------------------------------------------------------------------------- /utils/convert_slf_parallel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright Brno University of Technology (Author: Karel Vesely) 2014. Apache 2.0. 3 | 4 | # This script converts lattices to HTK format compatible with other toolkits. 5 | # We can choose to put words to nodes or arcs, as both is valid in the SLF format. 6 | 7 | # begin configuration section. 8 | cmd=run.pl 9 | dirname=lats-in-htk-slf 10 | parallel_opts="-tc 50" # We should limit disk stress 11 | word_to_node=false # Words in arcs or nodes? [default:arcs] 12 | #end configuration section. 13 | 14 | echo "$0 $@" 15 | 16 | [ -f ./path.sh ] && . ./path.sh 17 | . parse_options.sh || exit 1; 18 | 19 | if [ $# -ne 3 ]; then 20 | echo "Usage: $0 [options] " 21 | echo " Options:" 22 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 23 | echo " --word-to-link (true|false) # put word symbols on links or nodes." 24 | echo " --parallel-opts STR # parallelization options (def.: '-tc 50')." 25 | echo "e.g.:" 26 | echo "$0 data/dev data/lang exp/tri4a/decode_dev" 27 | exit 1; 28 | fi 29 | 30 | data=$1 31 | lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. 32 | dir=$3 33 | 34 | model=$(dirname $dir)/final.mdl # assume model one level up from decoding dir. 35 | 36 | for f in $lang/words.txt $lang/phones/align_lexicon.int $model $dir/lat.1.gz; do 37 | [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; 38 | done 39 | 40 | [ ! -d $dir/$dirname/log ] && mkdir -p $dir/$dirname 41 | 42 | echo "$0: Converting lattices into '$dir/$dirname'" 43 | 44 | # Words in arcs or nodes? [default:nodes] 45 | word_to_link_arg= 46 | $word_to_node && word_to_node_arg="--word-to-node" 47 | 48 | nj=$(cat $dir/num_jobs) 49 | 50 | # convert the lattices (individually, gzipped) 51 | $cmd $parallel_opts JOB=1:$nj $dir/$dirname/log/lat_convert.JOB.log \ 52 | mkdir -p $dir/$dirname/JOB/ '&&' \ 53 | lattice-align-words-lexicon --output-error-lats=true --output-if-empty=true \ 54 | $lang/phones/align_lexicon.int $model "ark:gunzip -c $dir/lat.JOB.gz |" ark,t:- \| \ 55 | utils/int2sym.pl -f 3 $lang/words.txt \| \ 56 | utils/convert_slf.pl $word_to_node_arg - $dir/$dirname/JOB/ || exit 1 57 | 58 | # make list of lattices 59 | find -L $PWD/$dir/$dirname -name *.lat.gz > $dir/$dirname/lat_htk.scp || exit 1 60 | 61 | # check number of lattices: 62 | nseg=$(cat $data/segments | wc -l) 63 | nlat_out=$(cat $dir/$dirname/lat_htk.scp | wc -l) 64 | echo "segments $nseg, saved-lattices $nlat_out" 65 | # 66 | [ $nseg -ne $nlat_out ] && echo "WARNING: missing $((nseg-nlat_out)) lattices for some segments!" \ 67 | && exit 1 68 | 69 | echo "success, converted lats to HTK : $PWD/$dir/$dirname/lat_htk.scp" 70 | exit 0 71 | 72 | -------------------------------------------------------------------------------- /utils/lang/check_phones_compatible.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Hang Lyu 3 | 4 | # Licensed udner the Apache License, Version 2.0 (the "Lincense"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OF IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # This script exits with status zero if the phone symbols tables are the same 18 | # except for possible differences in disambiguation symbols (meaning that all 19 | # symbols except those beginning with a # are mapped to the same values). 20 | # Otherwise it prints a warning and exits with status 1. 21 | # For the sake of compatibility with other scripts that did not write the 22 | # phones.txt to model directories, this script exits silently with status 0 23 | # if one of the phone symbol tables does not exist. 24 | # For the sake of compatibility with other scripts that did not write the 25 | # phones.txt to model directories, this script exits silently with status 0 26 | # if one of the phone symbol tables does not exist. 27 | 28 | . utils/parse_options.sh || exit 1; 29 | 30 | if [ $# -ne 2 ]; then 31 | echo "Usage: utils/lang/check_phones_compatible.sh " 32 | echo "e.g.: utils/lang/check_phones_compatible.sh data/lang/phones.txt exp/tri3/phones.txt" 33 | exit 1; 34 | fi 35 | 36 | table_first=$1 37 | table_second=$2 38 | 39 | # check the files exist or not 40 | if [ ! -f $table_first ]; then 41 | if [ ! -f $table_second ]; then 42 | echo "$0: Error! Both of the two phones-symbol tables are absent." 43 | echo "Please check your command" 44 | exit 1; 45 | else 46 | #The phones-symbol-table1 is absent. The model directory maybe created by old script. 47 | #For back compatibility, this script exits silently with status 0. 48 | exit 0; 49 | fi 50 | elif [ ! -f $table_second ]; then 51 | #The phones-symbol-table2 is absent. The model directory maybe created by old script. 52 | #For back compatibility, this script exits silently with status 0. 53 | exit 0; 54 | fi 55 | 56 | #Check the two tables are same or not (except for possible difference in disambiguation symbols). 57 | if ! cmp -s <(grep -v "^#" $table_first) <(grep -v "^#" $table_second); then 58 | echo "$0: phone symbol tables $table_first and $table_second are not compatible." 59 | exit 1; 60 | fi 61 | 62 | exit 0; 63 | -------------------------------------------------------------------------------- /utils/lang/validate_disambig_sym_file.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2016 FAU Erlangen (Author: Axel Horndasch) 4 | # Apache 2.0. 5 | # 6 | # Concept: Dan Povey 7 | 8 | use strict; 9 | use warnings; 10 | use Getopt::Long; 11 | 12 | my $Usage = < \$allow_numeric, 37 | ) or die ($Usage); 38 | 39 | if (@ARGV != 1) { 40 | die($Usage); 41 | } 42 | 43 | my $disambig_sym_file = shift @ARGV; 44 | 45 | print "$0: Checking validity of file \"$disambig_sym_file\" ...\n"; 46 | if (-z $disambig_sym_file) { 47 | print "$0: The file \"$disambig_sym_file\" is empty or does not exist, exiting ...\n"; exit 1; 48 | } 49 | 50 | if (not open(SYMS, "<$disambig_sym_file")) { 51 | print "$0: Could not open file \"$disambig_sym_file\", exiting ...\n"; exit 1; 52 | } 53 | 54 | # Go through the file containing disambiguation symbols line by line 55 | while () { 56 | chomp; 57 | my $symbol = $_; 58 | 59 | if ($symbol =~ /^#(.*)$/) { 60 | my $sympart = $1; 61 | if ($sympart eq "") { 62 | print "$0: Only \"$symbol\" is not allowed as a disambiguation symbol, exiting ...\n"; exit 1; 63 | } 64 | if ($sympart =~/\s+/) { 65 | print "$0: The disambiguation symbol \"$symbol\" contains whitespace, exiting ...\n"; exit 1; 66 | } 67 | if ($sympart eq "-1") { 68 | print "$0: The disambiguation symbol \"$symbol\" is not allowed, exiting ...\n"; exit 1; 69 | } 70 | if ($allow_numeric eq "false" && 71 | $sympart =~/^[0-9]+$/) { 72 | print "$0: Since \"$symbol\" is supposed to be an extra disambiguation symbol, it must not be numeric, exiting ...\n"; exit 1; 73 | } 74 | } else { 75 | print "$0: The disambiguation symbol \"$symbol\" does not start with a '#', exiting ...\n"; exit 1; 76 | } 77 | } 78 | 79 | print "--> SUCCESS [validating disambiguation symbol file \"$disambig_sym_file\"]\n"; 80 | exit 0; 81 | 82 | -------------------------------------------------------------------------------- /utils/create_split_dir.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2013 Guoguo Chen 4 | # Apache 2.0. 5 | # 6 | # This script creates storage directories on different file systems, and creates 7 | # symbolic links to those directories. For example, a command 8 | # 9 | # utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage 10 | # 11 | # will mkdir -p all of those directories, and will create links 12 | # 13 | # egs/storage/1 -> /export/gpu-03/egs/storage 14 | # egs/storage/2 -> /export/gpu-03/egs/storage 15 | # ... 16 | # 17 | use strict; 18 | use warnings; 19 | use File::Spec; 20 | use Getopt::Long; 21 | 22 | my $Usage = < 28 | e.g.: utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage 29 | 30 | Allowed options: 31 | --suffix : Common suffix to (string, default = "") 32 | 33 | See also create_data_link.pl, which is intended to work with the resulting 34 | directory structure, and remove_data_links.sh 35 | EOU 36 | 37 | my $suffix=""; 38 | GetOptions('suffix=s' => \$suffix); 39 | 40 | if (@ARGV < 2) { 41 | die $Usage; 42 | } 43 | 44 | my $ans = 1; 45 | 46 | my $dir = pop(@ARGV); 47 | system("mkdir -p $dir 2>/dev/null"); 48 | 49 | my @all_actual_storage = (); 50 | foreach my $file (@ARGV) { 51 | push @all_actual_storage, File::Spec->rel2abs($file . "/" . $suffix); 52 | } 53 | 54 | my $index = 1; 55 | foreach my $actual_storage (@all_actual_storage) { 56 | my $pseudo_storage = "$dir/$index"; 57 | 58 | # If the symbolic link already exists, delete it. 59 | if (-l $pseudo_storage) { 60 | print STDERR "$0: link $pseudo_storage already exists, not overwriting.\n"; 61 | $index++; 62 | next; 63 | } 64 | 65 | # Create the destination directory and make the link. 66 | system("mkdir -p $actual_storage 2>/dev/null"); 67 | if ($? != 0) { 68 | print STDERR "$0: error creating directory $actual_storage\n"; 69 | exit(1); 70 | } 71 | { # create a README file for easier deletion. 72 | open(R, ">$actual_storage/README.txt"); 73 | my $storage_dir = File::Spec->rel2abs($dir); 74 | print R "# This directory is linked from $storage_dir, as part of Kaldi striped data\n"; 75 | print R "# The full list of directories where this data resides is:\n"; 76 | foreach my $d (@all_actual_storage) { 77 | print R "$d\n"; 78 | } 79 | close(R); 80 | } 81 | my $ret = symlink($actual_storage, $pseudo_storage); 82 | 83 | # Process the returned values 84 | $ans = $ans && $ret; 85 | if (! $ret) { 86 | print STDERR "Error linking $actual_storage to $pseudo_storage\n"; 87 | } 88 | 89 | $index++; 90 | } 91 | 92 | exit($ans == 1 ? 0 : 1); 93 | -------------------------------------------------------------------------------- /utils/apply_map.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use warnings; #sed replacement for -w perl parameter 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) 4 | # Apache 2.0. 5 | 6 | # This program is a bit like ./sym2int.pl in that it applies a map 7 | # to things in a file, but it's a bit more general in that it doesn't 8 | # assume the things being mapped to are single tokens, they could 9 | # be sequences of tokens. See the usage message. 10 | 11 | 12 | if (@ARGV > 0 && $ARGV[0] eq "-f") { 13 | shift @ARGV; 14 | $field_spec = shift @ARGV; 15 | if ($field_spec =~ m/^\d+$/) { 16 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1; 17 | } 18 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) 19 | if ($1 ne "") { 20 | $field_begin = $1 - 1; # Change to zero-based indexing. 21 | } 22 | if ($2 ne "") { 23 | $field_end = $2 - 1; # Change to zero-based indexing. 24 | } 25 | } 26 | if (!defined $field_begin && !defined $field_end) { 27 | die "Bad argument to -f option: $field_spec"; 28 | } 29 | } 30 | 31 | # Mapping is obligatory 32 | $permissive = 0; 33 | if (@ARGV > 0 && $ARGV[0] eq '--permissive') { 34 | shift @ARGV; 35 | # Mapping is optional (missing key is printed to output) 36 | $permissive = 1; 37 | } 38 | 39 | if(@ARGV != 1) { 40 | print STDERR "Invalid usage: " . join(" ", @ARGV) . "\n"; 41 | print STDERR "Usage: apply_map.pl [options] map output\n" . 42 | "options: [-f ]\n" . 43 | "Applies the map 'map' to all input text, where each line of the map\n" . 44 | "is interpreted as a map from the first field to the list of the other fields\n" . 45 | "Note: can look like 4-5, or 4-, or 5-, or 1, it means the field\n" . 46 | "range in the input to apply the map to.\n" . 47 | "e.g.: echo A B | apply_map.pl a.txt\n" . 48 | "where a.txt is:\n" . 49 | "A a1 a2\n" . 50 | "B b\n" . 51 | "will produce:\n" . 52 | "a1 a2 b\n"; 53 | exit(1); 54 | } 55 | 56 | ($map) = @ARGV; 57 | open(M, "<$map") || die "Error opening map file $map: $!"; 58 | 59 | while () { 60 | @A = split(" ", $_); 61 | @A >= 1 || die "apply_map.pl: empty line."; 62 | $i = shift @A; 63 | $o = join(" ", @A); 64 | $map{$i} = $o; 65 | } 66 | 67 | while() { 68 | @A = split(" ", $_); 69 | for ($x = 0; $x < @A; $x++) { 70 | if ( (!defined $field_begin || $x >= $field_begin) 71 | && (!defined $field_end || $x <= $field_end)) { 72 | $a = $A[$x]; 73 | if (!defined $map{$a}) { 74 | if (!$permissive) { 75 | die "apply_map.pl: undefined key $a\n"; 76 | } else { 77 | print STDERR "apply_map.pl: warning! missing key $a\n"; 78 | } 79 | } else { 80 | $A[$x] = $map{$a}; 81 | } 82 | } 83 | } 84 | print join(" ", @A) . "\n"; 85 | } 86 | -------------------------------------------------------------------------------- /utils/lang/check_g_properties.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use IPC::Open2; 4 | 5 | if (@ARGV != 1) { 6 | print "Usage: $0 [options] \n"; 7 | print "e.g.: $0 data/lang\n"; 8 | exit(1); 9 | } 10 | 11 | $lang = shift @ARGV; 12 | 13 | # This script checks that G.fst in the lang.fst directory is OK with respect 14 | # to certain expected properties, and returns nonzero exit status if a problem was 15 | # detected. It is called from validate_lang.pl. 16 | # This only checks the properties of G that relate to disambiguation symbols, 17 | # epsilons and forbidden symbols and . 18 | 19 | if (! -e "$lang/G.fst") { 20 | print "$0: error: $lang/G.fst does not exist\n"; 21 | exit(1); 22 | } 23 | 24 | open(W, "<$lang/words.txt") || die "opening $lang/words.txt"; 25 | $hash_zero = -1; 26 | while () { 27 | @A = split(" ", $_); 28 | ($sym, $int) = @A; 29 | if ($sym eq "" || $sym eq "") { $is_forbidden{$int} = 1; } 30 | if ($sym eq "#0") { $hash_zero = $int; } 31 | } 32 | 33 | if (-e "$lang/phones/wdisambig_words.int") { 34 | open(F, "<$lang/phones/wdisambig_words.int") || die "opening $lang/phones/wdisambig_words.int"; 35 | while () { 36 | chop; 37 | $is_disambig{$_} = 1; 38 | } 39 | } else { 40 | $is_disambig{$hash_zero} = 1; 41 | } 42 | 43 | $input_cmd = ". ./path.sh; fstprint $lang/G.fst|"; 44 | open(G, $input_cmd) || die "running command $input_cmd"; 45 | 46 | $info_cmd = ". ./path.sh; fstcompile | fstinfo "; 47 | open2(O, I, "$info_cmd") || die "running command $info_cmd"; 48 | 49 | $has_epsilons = 0; 50 | 51 | while () { 52 | @A = split(" ", $_); 53 | if (@A >= 4) { 54 | if ($is_forbidden{$A[2]} || $is_forbidden{$A[3]}) { 55 | chop; 56 | print "$0: validating $lang: error: line $_ in G.fst contains forbidden symbol or \n"; 57 | exit(1); 58 | } elsif ($is_disambig{$A[2]}) { 59 | print I $_; 60 | if ($A[3] != 0) { 61 | chop; 62 | print "$0: validating $lang: error: line $_ in G.fst has disambig on input but no epsilon on output\n"; 63 | exit(1); 64 | } 65 | } elsif ($A[2] == 0) { 66 | print I $_; 67 | $has_epsilons = 1; 68 | } elsif ($A[2] != $A[3]) { 69 | chop; 70 | print "$0: validating $lang: error: line $_ in G.fst has inputs and outputs different but input is not disambig symbol.\n"; 71 | exit(1); 72 | } 73 | } 74 | } 75 | 76 | close(I); # tell 'fstcompile | fstinfo' pipeline that its input is done. 77 | while () { 78 | if (m/cyclic\s+y/) { 79 | print "$0: validating $lang: error: G.fst has cycles containing only disambig symbols and epsilons. Would cause determinization failure\n"; 80 | exit(1); 81 | } 82 | } 83 | 84 | if ($has_epsilons) { 85 | print "$0: warning: validating $lang: G.fst has epsilon-input arcs. We don't expect these in most setups.\n"; 86 | } 87 | 88 | print "--> $0 successfully validated $lang/G.fst\n"; 89 | exit(0); 90 | -------------------------------------------------------------------------------- /eer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 Johns Hopkins University (Author: Daniel Garcia-Romero) 3 | # 2017 Johns Hopkins University (Author: Daniel Povey) 4 | # 2017-2018 David Snyder 5 | # 2018 Ewald Enzinger 6 | # 2019 Tsinghua University (Author: Lantian Li) 7 | # 2019 Yang Zhang 8 | # Apache 2.0. 9 | # 10 | # This is an x-vector-based recipe for Speakers in the Wild (SITW). 11 | 12 | . ./path.sh 13 | 14 | 15 | for sub in dev eval; do 16 | # Cosine metric. 17 | echo "Test on SITW $sub:" 18 | 19 | local/cosine_scoring.sh data/sitw_$sub/enroll \ 20 | data/sitw_$sub/test \ 21 | data/sitw_$sub/test/core-core.lst \ 22 | data/sitw_$sub/foo 23 | 24 | eer=$(paste data/sitw_$sub/test/core-core.lst data/sitw_$sub/foo/cosine_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null) 25 | echo "Cosine EER: $eer%" 26 | 27 | # Create a PLDA model and do scoring. 28 | local/plda_scoring.sh data/voxceleb_combined_200000 \ 29 | data/sitw_$sub/enroll \ 30 | data/sitw_$sub/test \ 31 | data/sitw_$sub/test/core-core.lst \ 32 | data/sitw_$sub/foo 33 | 34 | eer=$(paste data/sitw_$sub/test/core-core.lst data/sitw_$sub/foo/plda_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null) 35 | echo "PLDA EER: $eer%" 36 | 37 | # Create a LDA-PLDA model and do scoring. 38 | for lda_dim in 150;do 39 | 40 | local/lda_plda_scoring.sh --lda-dim $lda_dim --covar-factor 0.0 \ 41 | data/voxceleb_combined_200000 \ 42 | data/sitw_$sub/enroll \ 43 | data/sitw_$sub/test \ 44 | data/sitw_$sub/test/core-core.lst \ 45 | data/sitw_$sub/foo 46 | eer=$(paste data/sitw_$sub/test/core-core.lst data/sitw_$sub/foo/lda_plda_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null) 47 | echo "LDA_PLDA EER(${lda_dim}): $eer%" 48 | 49 | done 50 | 51 | # Create a PCA-PLDA model and do scoring. 52 | for pca_dim in 150;do 53 | 54 | local/pca_plda_scoring.sh --pca-dim $pca_dim \ 55 | data/voxceleb_combined_200000 \ 56 | data/sitw_$sub/enroll \ 57 | data/sitw_$sub/test \ 58 | data/sitw_$sub/test/core-core.lst \ 59 | data/sitw_$sub/foo 60 | 61 | eer=$(paste data/sitw_$sub/test/core-core.lst data/sitw_$sub/foo/pca_plda_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null) 62 | echo "PCA_PLDA EER(${pca_dim}): $eer%" 63 | done 64 | 65 | echo 66 | done 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # v-vector-tf 2 | 3 | Tensorflow and kaldi implementation of our Interspeech2019 paper [VAE-based regularization for deep speaker embedding](https://github.com/zyzisyz/v-vector-tf/raw/master/paper.pdf) 4 | 5 | **note: the repo is not the final release, I will clean up our experiemental code and update soon** 6 | 7 | ## Dependency 8 | 9 | 1. computer 10 | 2. Linux (centos 7) 11 | 3. conda (Python 3.6) 12 | 4. Tensorflow-gpu 1.8 13 | 5. kaldi-toolkit 14 | 15 | ## Datasets and X-vector 16 | 17 | 1. VoxCeleb 18 | 2. SITW 19 | 3. CSLT_SITW 20 | 21 | ## Steps 22 | 23 | 1. use kaldi to extract x-vector from uttrance and get `xvector.ark` files 24 | 2. covert the kaldi `xvector.ark` files to numpy binary data format (`xvector.ark` -> `xvector.npz`) 25 | 3. use tensorflow to train a VAE model, and get the V-vectors 26 | 4. use kaldi recipes to calculate EER (equal error rate) 27 | 28 | ## Usage 29 | 30 | 1. [install kaldi](https://github.com/kaldi-asr/kaldi) (note: if you are one of CSLT members, you can referance[Dr. tzy's Kaldi](https://github.com/tzyll/kaldi) or [CSLT Kaldi](https://github.com/csltstu/kaldi)) 31 | 32 | 2. create a conda environment and install the necessary Python package 33 | 34 | ```bash 35 | # for example 36 | conda create -n tf python=3.6 37 | conda activate tf 38 | pip install -r requirements.txt 39 | ``` 40 | 41 | 3. git clone the code and modify the `path.sh`, make sure that `path.sh` contains your kaldi path 42 | 43 | ```bash 44 | git clone https://github.com/zyzisyz/v-vector-tf.git 45 | 46 | # edit path.sh 47 | vim path.sh 48 | # export KALDI_ROOT=${replace it by your kaldi root path} 49 | ``` 50 | 51 | 4. calculate baseline EER 52 | 53 | ```bash 54 | bash baseline.sh 55 | ``` 56 | 57 | 5. Train a model 58 | 59 | ```bash 60 | # first of all, activate the conda Python environment 61 | conda activate tf 62 | # you can edit train.sh to change VAE model's config 63 | bash train.sh 64 | ``` 65 | 66 | 6. Use kaldi-toolkit to train the backend scoring model and calculate EER 67 | 68 | ```bash 69 | bash eval.sh 70 | ``` 71 | 72 | ## Our result 73 | 74 | SITW Dev. Core 75 | 76 | | | Cosine | PCA | PLDA | L-PLDA | P-PLDA | 77 | | :------: | :------: | :------: | :------: | :------: | :------: | 78 | | x-vector | 15.67 | 16.17 | 9.09 | **3.12** | 4.16 | 79 | | a-vector | 16.10 | 16.48 | 11.21 | 4.24 | 5.01 | 80 | | v-vector | 10.32 | 9.94 | 3.62 | 3.54 | 4.31 | 81 | | c-vector | **9.05** | **8.55** | **3.50** | 3.31 | **3.85** | 82 | 83 | Read the paper for more detail 84 | 85 | ## About 86 | 87 | Licensed under the Apache License, Version 2.0, Copyright [zyzisyz](https://github.com/zyzisyz) 88 | 89 | ### Repo Author 90 | 91 | Yang Zhang (zyziszy@foxmail.com) 92 | 93 | ### Contributors 94 | 95 | - [@Lilt](http://166.111.134.19:8081/lilt/) 96 | - [@fatejessie](https://github.com/fatejessie) 97 | - [@xDarkLemon](https://github.com/xDarkLemon) 98 | - [@AlanXiuxiu](https://github.com/AlanXiuxiu) 99 | - @Z.K. 100 | -------------------------------------------------------------------------------- /utils/filter_scp.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2012 Microsoft Corporation 3 | # Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | 19 | # This script takes a list of utterance-ids or any file whose first field 20 | # of each line is an utterance-id, and filters an scp 21 | # file (or any file whose "n-th" field is an utterance id), printing 22 | # out only those lines whose "n-th" field is in id_list. The index of 23 | # the "n-th" field is 1, by default, but can be changed by using 24 | # the -f switch 25 | 26 | $exclude = 0; 27 | $field = 1; 28 | $shifted = 0; 29 | 30 | do { 31 | $shifted=0; 32 | if ($ARGV[0] eq "--exclude") { 33 | $exclude = 1; 34 | shift @ARGV; 35 | $shifted=1; 36 | } 37 | if ($ARGV[0] eq "-f") { 38 | $field = $ARGV[1]; 39 | shift @ARGV; shift @ARGV; 40 | $shifted=1 41 | } 42 | } while ($shifted); 43 | 44 | if(@ARGV < 1 || @ARGV > 2) { 45 | die "Usage: filter_scp.pl [--exclude] [-f ] id_list [in.scp] > out.scp \n" . 46 | "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" . 47 | "Note: only the first field of each line in id_list matters. With --exclude, prints\n" . 48 | "only the lines that were *not* in id_list.\n" . 49 | "Caution: previously, the -f option was interpreted as a zero-based field index.\n" . 50 | "If your older scripts (written before Oct 2014) stopped working and you used the\n" . 51 | "-f option, add 1 to the argument.\n" . 52 | "See also: utils/filter_scp.pl .\n"; 53 | } 54 | 55 | 56 | $idlist = shift @ARGV; 57 | open(F, "<$idlist") || die "Could not open id-list file $idlist"; 58 | while() { 59 | @A = split; 60 | @A>=1 || die "Invalid id-list file line $_"; 61 | $seen{$A[0]} = 1; 62 | } 63 | 64 | if ($field == 1) { # Treat this as special case, since it is common. 65 | while(<>) { 66 | $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field."; 67 | # $1 is what we filter on. 68 | if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) { 69 | print $_; 70 | } 71 | } 72 | } else { 73 | while(<>) { 74 | @A = split; 75 | @A > 0 || die "Invalid scp file line $_"; 76 | @A >= $field || die "Invalid scp file line $_"; 77 | if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) { 78 | print $_; 79 | } 80 | } 81 | } 82 | 83 | # tests: 84 | # the following should print "foo 1" 85 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo) 86 | # the following should print "bar 2". 87 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2) 88 | -------------------------------------------------------------------------------- /utils/gen_topo.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2012 Johns Hopkins University (author: Daniel Povey) 4 | 5 | # Generate a topology file. This allows control of the number of states in the 6 | # non-silence HMMs, and in the silence HMMs. 7 | 8 | if (@ARGV != 4) { 9 | print STDERR "Usage: utils/gen_topo.pl \n"; 10 | print STDERR "e.g.: utils/gen_topo.pl 3 5 4:5:6:7:8:9:10 1:2:3\n"; 11 | exit (1); 12 | } 13 | 14 | ($num_nonsil_states, $num_sil_states, $nonsil_phones, $sil_phones) = @ARGV; 15 | 16 | ( $num_nonsil_states >= 1 && $num_nonsil_states <= 100 ) || 17 | die "Unexpected number of nonsilence-model states $num_nonsil_states\n"; 18 | (( $num_sil_states == 1 || $num_sil_states >= 3) && $num_sil_states <= 100 ) || 19 | die "Unexpected number of silence-model states $num_sil_states\n"; 20 | 21 | $nonsil_phones =~ s/:/ /g; 22 | $sil_phones =~ s/:/ /g; 23 | $nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n"; 24 | $sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n"; 25 | 26 | print "\n"; 27 | print "\n"; 28 | print "\n"; 29 | print "$nonsil_phones\n"; 30 | print "\n"; 31 | for ($state = 0; $state < $num_nonsil_states; $state++) { 32 | $statep1 = $state+1; 33 | print " $state $state $state 0.75 $statep1 0.25 \n"; 34 | } 35 | print " $num_nonsil_states \n"; # non-emitting final state. 36 | print "\n"; 37 | # Now silence phones. They have a different topology-- apart from the first and 38 | # last states, it's fully connected, as long as you have >= 3 states. 39 | 40 | if ($num_sil_states > 1) { 41 | $transp = 1.0 / ($num_sil_states-1); 42 | print "\n"; 43 | print "\n"; 44 | print "$sil_phones\n"; 45 | print "\n"; 46 | print " 0 0 "; 47 | for ($nextstate = 0; $nextstate < $num_sil_states-1; $nextstate++) { # Transitions to all but last 48 | # emitting state. 49 | print " $nextstate $transp "; 50 | } 51 | print "\n"; 52 | for ($state = 1; $state < $num_sil_states-1; $state++) { # the central states all have transitions to 53 | # themselves and to the last emitting state. 54 | print " $state $state "; 55 | for ($nextstate = 1; $nextstate < $num_sil_states; $nextstate++) { 56 | print " $nextstate $transp "; 57 | } 58 | print "\n"; 59 | } 60 | # Final emitting state (non-skippable). 61 | $state = $num_sil_states-1; 62 | print " $state $state $state 0.75 $num_sil_states 0.25 \n"; 63 | # Final nonemitting state: 64 | print " $num_sil_states \n"; 65 | print "\n"; 66 | } else { 67 | print "\n"; 68 | print "\n"; 69 | print "$sil_phones\n"; 70 | print "\n"; 71 | print " 0 0 "; 72 | print " 0 0.75 "; 73 | print " 1 0.25 "; 74 | print "\n"; 75 | print " $num_nonsil_states \n"; # non-emitting final state. 76 | print "\n"; 77 | } 78 | 79 | print "\n"; 80 | -------------------------------------------------------------------------------- /utils/subset_scp.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use warnings; #sed replacement for -w perl parameter 3 | # Copyright 2010-2011 Microsoft Corporation 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # This program selects a subset of N elements in the scp. 19 | 20 | # By default, it selects them evenly from throughout the scp, in order to avoid 21 | # selecting too many from the same speaker. It prints them on the standard 22 | # output. 23 | # With the option --first, it just selects the N first utterances. 24 | # With the option --last, it just selects the N last utterances. 25 | 26 | # Last modified by JHU & HKUST @2013 27 | 28 | 29 | $quiet = 0; 30 | $first = 0; 31 | $last = 0; 32 | 33 | if (@ARGV > 0 && $ARGV[0] eq "--quiet") { 34 | shift; 35 | $quiet = 1; 36 | } 37 | if (@ARGV > 0 && $ARGV[0] eq "--first") { 38 | shift; 39 | $first = 1; 40 | } 41 | if (@ARGV > 0 && $ARGV[0] eq "--last") { 42 | shift; 43 | $last = 1; 44 | } 45 | 46 | if(@ARGV < 2 ) { 47 | die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" . 48 | " --quiet causes it to not die if N < num lines in scp.\n" . 49 | " --first and --last make it equivalent to head or tail.\n" . 50 | "See also: filter_scp.pl\n"; 51 | } 52 | 53 | $N = shift @ARGV; 54 | if($N == 0) { 55 | die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\""; 56 | } 57 | $inscp = shift @ARGV; 58 | open(I, "<$inscp") || die "Opening input scp file $inscp"; 59 | 60 | @F = (); 61 | while() { 62 | push @F, $_; 63 | } 64 | $numlines = @F; 65 | if($N > $numlines) { 66 | if ($quiet) { 67 | $N = $numlines; 68 | } else { 69 | die "You requested from subset_scp.pl more elements than available: $N > $numlines"; 70 | } 71 | } 72 | 73 | sub select_n { 74 | my ($start,$end,$num_needed) = @_; 75 | my $diff = $end - $start; 76 | if ($num_needed > $diff) { 77 | die "select_n: code error"; 78 | } 79 | if ($diff == 1 ) { 80 | if ($num_needed > 0) { 81 | print $F[$start]; 82 | } 83 | } else { 84 | my $halfdiff = int($diff/2); 85 | my $halfneeded = int($num_needed/2); 86 | select_n($start, $start+$halfdiff, $halfneeded); 87 | select_n($start+$halfdiff, $end, $num_needed - $halfneeded); 88 | } 89 | } 90 | 91 | if ( ! $first && ! $last) { 92 | if ($N > 0) { 93 | select_n(0, $numlines, $N); 94 | } 95 | } else { 96 | if ($first) { # --first option: same as head. 97 | for ($n = 0; $n < $N; $n++) { 98 | print $F[$n]; 99 | } 100 | } else { # --last option: same as tail. 101 | for ($n = @F - $N; $n < @F; $n++) { 102 | print $F[$n]; 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /utils/convert_ctm.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 4 | 5 | # This takes as standard input a ctm file that's "relative to the utterance", 6 | # i.e. times are measured relative to the beginning of the segments, and it 7 | # uses a "segments" file (format: 8 | # utterance-id recording-id start-time end-time 9 | # ) and a "reco2file_and_channel" file (format: 10 | # recording-id basename-of-file 11 | 12 | $skip_unknown=undef; 13 | if ( $ARGV[0] eq "--skip-unknown" ) { 14 | $skip_unknown=1; 15 | shift @ARGV; 16 | } 17 | 18 | if (@ARGV < 2 || @ARGV > 3) { 19 | print STDERR "Usage: convert_ctm.pl [] > real-ctm\n"; 20 | exit(1); 21 | } 22 | 23 | $segments = shift @ARGV; 24 | $reco2file_and_channel = shift @ARGV; 25 | 26 | open(S, "<$segments") || die "opening segments file $segments"; 27 | while() { 28 | @A = split(" ", $_); 29 | @A == 4 || die "Bad line in segments file: $_"; 30 | ($utt, $recording_id, $begin_time, $end_time) = @A; 31 | $utt2reco{$utt} = $recording_id; 32 | $begin{$utt} = $begin_time; 33 | $end{$utt} = $end_time; 34 | } 35 | close(S); 36 | open(R, "<$reco2file_and_channel") || die "open reco2file_and_channel file $reco2file_and_channel"; 37 | while() { 38 | @A = split(" ", $_); 39 | @A == 3 || die "Bad line in reco2file_and_channel file: $_"; 40 | ($recording_id, $file, $channel) = @A; 41 | $reco2file{$recording_id} = $file; 42 | $reco2channel{$recording_id} = $channel; 43 | } 44 | 45 | 46 | # Now process the ctm file, which is either the standard input or the third 47 | # command-line argument. 48 | $num_done = 0; 49 | while(<>) { 50 | @A= split(" ", $_); 51 | ( @A == 5 || @A == 6 ) || die "Unexpected ctm format: $_"; 52 | # lines look like: 53 | # 1 [ confidence ] 54 | ($utt, $one, $wbegin, $wlen, $w, $conf) = @A; 55 | $reco = $utt2reco{$utt}; 56 | if (!defined $reco) { 57 | next if defined $skip_unknown; 58 | die "Utterance-id $utt not defined in segments file $segments"; 59 | } 60 | $file = $reco2file{$reco}; 61 | $channel = $reco2channel{$reco}; 62 | if (!defined $file || !defined $channel) { 63 | die "Recording-id $reco not defined in reco2file_and_channel file $reco2file_and_channel"; 64 | } 65 | $b = $begin{$utt}; 66 | $e = $end{$utt}; 67 | $wbegin_r = $wbegin + $b; # Make it relative to beginning of the recording. 68 | $wbegin_r = sprintf("%.2f", $wbegin_r); 69 | $wlen = sprintf("%.2f", $wlen); 70 | if (defined $conf) { 71 | $line = "$file $channel $wbegin_r $wlen $w $conf\n"; 72 | } else { 73 | $line = "$file $channel $wbegin_r $wlen $w\n"; 74 | } 75 | if ($wbegin_r + $wlen > $e + 0.01) { 76 | print STDERR "Warning: word appears to be past end of recording; line is $line"; 77 | } 78 | print $line; # goes to stdout. 79 | $num_done++; 80 | } 81 | 82 | if ($num_done == 0) { exit 1; } else { exit 0; } 83 | 84 | __END__ 85 | 86 | # Test example [also test it without the 0.5's] 87 | echo utt reco 10.0 20.0 > segments 88 | echo reco file A > reco2file_and_channel 89 | echo utt 1 8.0 1.0 word 0.5 > ctm_in 90 | echo file A 18.00 1.00 word 0.5 > ctm_out 91 | utils/convert_ctm.pl segments reco2file_and_channel ctm_in | cmp - ctm_out || echo error 92 | rm segments reco2file_and_channel ctm_in ctm_out 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /utils/summarize_logs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 4 | 5 | #scalar(@ARGV) >= 1 && print STDERR "Usage: summarize_warnings.pl \n" && exit 1; 6 | 7 | sub split_hundreds { # split list of filenames into groups of 100. 8 | my $names = shift @_; 9 | my @A = split(" ", $names); 10 | my @ans = (); 11 | while (@A > 0) { 12 | my $group = ""; 13 | for ($x = 0; $x < 100 && @A>0; $x++) { 14 | $fname = pop @A; 15 | $group .= "$fname "; 16 | } 17 | push @ans, $group; 18 | } 19 | return @ans; 20 | } 21 | 22 | sub parse_accounting_entry { 23 | $entry= shift @_; 24 | 25 | @elems = split " ", $entry; 26 | 27 | $time=undef; 28 | $threads=undef; 29 | foreach $elem (@elems) { 30 | if ( $elem=~ m/time=(\d+)/ ) { 31 | $elem =~ s/time=(\d+)/$1/; 32 | $time = $elem; 33 | } elsif ( $elem=~ m/threads=(\d+)/ ) { 34 | $elem =~ s/threads=(\d+)/$1/g; 35 | $threads = $elem; 36 | } else { 37 | die "Unknown entry \"$elem\" when parsing \"$entry\" \n"; 38 | } 39 | } 40 | 41 | if (defined($time) and defined($threads) ) { 42 | return ($time, $threads); 43 | } else { 44 | die "The accounting entry \"$entry\" did not contain all necessary attributes"; 45 | } 46 | } 47 | 48 | foreach $dir (@ARGV) { 49 | 50 | #$dir = $ARGV[0]; 51 | print $dir 52 | 53 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" ; 54 | 55 | $dir =~ s:/$::; # Remove trailing slash. 56 | 57 | 58 | # Group the files into categories where all have the same base-name. 59 | foreach $f (glob ("$dir/*.log")) { 60 | $f_category = $f; 61 | # do next expression twice; s///g doesn't work as they overlap. 62 | $f_category =~ s:\.\d+\.(?!\d+):.*.:; 63 | #$f_category =~ s:\.\d+\.:.*.:; 64 | $fmap{$f_category} .= " $f"; 65 | } 66 | } 67 | 68 | foreach $c (sort (keys %fmap) ) { 69 | $n = 0; 70 | foreach $fgroup (split_hundreds($fmap{$c})) { 71 | $n += `grep -w WARNING $fgroup | wc -l`; 72 | } 73 | if ($n != 0) { 74 | print "$n warnings in $c\n" 75 | } 76 | } 77 | foreach $c (sort (keys %fmap)) { 78 | $n = 0; 79 | foreach $fgroup (split_hundreds($fmap{$c})) { 80 | $n += `grep -w ERROR $fgroup | wc -l`; 81 | } 82 | if ($n != 0) { 83 | print "$n errors in $c\n" 84 | } 85 | } 86 | 87 | $supertotal_cpu_time=0.0; 88 | $supertotal_clock_time=0.0; 89 | $supertotal_threads=0.0; 90 | 91 | foreach $c (sort (keys %fmap)) { 92 | $n = 0; 93 | 94 | $total_cpu_time=0.0; 95 | $total_clock_time=0.0; 96 | $total_threads=0.0; 97 | foreach $fgroup (split_hundreds($fmap{$c})) { 98 | $lines=`grep -a "# Accounting: " $fgroup |sed 's/.* Accounting: *//g'`; 99 | 100 | #print $lines ."\n"; 101 | 102 | @entries = split "\n", $lines; 103 | 104 | foreach $line (@entries) { 105 | $time, $threads = parse_accounting_entry($line); 106 | 107 | $total_cpu_time += $time * $threads; 108 | $total_threads += $threads; 109 | if ( $time > $total_clock_time ) { 110 | $total_clock_time = $time; 111 | } 112 | } 113 | } 114 | print "total_cpu_time=$total_cpu_time clock_time=$total_clock_time total_threads=$total_threads group=$c\n"; 115 | 116 | $supertotal_cpu_time += $total_cpu_time; 117 | $supertotal_clock_time += $total_clock_time; 118 | $supertotal_threads += $total_threads; 119 | } 120 | print "total_cpu_time=$supertotal_cpu_time clock_time=$supertotal_clock_time total_threads=$supertotal_threads group=all\n"; 121 | 122 | -------------------------------------------------------------------------------- /utils/rnnlm_compute_scores.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Compute scores from RNNLM. This script takes a directory 4 | # $dir (e.g. dir=local/rnnlm/rnnlm.voc30.hl30 ), 5 | # where it expects the files: 6 | # rnnlm wordlist.rnn unk.probs, 7 | # and also an input file location where it can get the sentences to score, and 8 | # an output file location to put the scores (negated logprobs) for each 9 | # sentence. This script uses the Kaldi-style "archive" format, so the input and 10 | # output files will have a first field that corresponds to some kind of 11 | # utterance-id or, in practice, utterance-id-1, utterance-id-2, etc., for the 12 | # N-best list. 13 | # 14 | # Here, "wordlist.rnn" is the set of words, like a vocabulary, 15 | # that the RNN was trained on (note, it won't include or ), 16 | # plus which is a kind of class where we put low-frequency 17 | # words; unk.probs gives the probs for words given this class, and it 18 | # has, on each line, "word prob". 19 | 20 | rnnlm_ver=rnnlm-0.3e 21 | ensure_normalized_probs=false # if true then we add the neccesary options to 22 | # normalize the probabilities of RNNLM 23 | # e.g. when using faster-rnnlm in the nce mode 24 | 25 | . ./path.sh || exit 1; 26 | . utils/parse_options.sh 27 | 28 | rnnlm=$KALDI_ROOT/tools/$rnnlm_ver/rnnlm 29 | 30 | [ ! -f $rnnlm ] && echo No such program $rnnlm && exit 1; 31 | 32 | if [ $# != 4 ]; then 33 | echo "Usage: rnnlm_compute_scores.sh " 34 | exit 1; 35 | fi 36 | 37 | dir=$1 38 | tempdir=$2 39 | text_in=$3 40 | scores_out=$4 41 | 42 | for x in rnnlm wordlist.rnn unk.probs; do 43 | if [ ! -f $dir/$x ]; then 44 | echo "rnnlm_compute_scores.sh: expected file $dir/$x to exist." 45 | exit 1; 46 | fi 47 | done 48 | 49 | mkdir -p $tempdir 50 | cat $text_in | awk '{for (x=2;x<=NF;x++) {printf("%s ", $x)} printf("\n");}' >$tempdir/text 51 | cat $text_in | awk '{print $1}' > $tempdir/ids # e.g. utterance ids. 52 | cat $tempdir/text | awk -v voc=$dir/wordlist.rnn -v unk=$dir/unk.probs \ 53 | -v logprobs=$tempdir/loglikes.oov \ 54 | 'BEGIN{ while((getline0) { invoc[$1]=1; } while ((getline0){ unkprob[$1]=$2;} } 55 | { logprob=0; 56 | if (NF==0) { printf ""; logprob = log(1.0e-07); 57 | print "Warning: empty sequence." | "cat 1>&2"; } 58 | for (x=1;x<=NF;x++) { w=$x; 59 | if (invoc[w]) { printf("%s ",w); } else { 60 | printf(" "); 61 | if (unkprob[w] != 0) { logprob += log(unkprob[w]); } 62 | else { print "Warning: unknown word ", w | "cat 1>&2"; logprob += log(1.0e-07); }}} 63 | printf("\n"); print logprob > logprobs } ' > $tempdir/text.nounk 64 | 65 | # OK, now we compute the scores on the text with OOVs replaced 66 | # with 67 | 68 | if [ $rnnlm_ver == "faster-rnnlm" ]; then 69 | extra_options= 70 | if [ "$ensure_normalized_probs" = true ]; then 71 | extra_options="--nce-accurate-test 1" 72 | fi 73 | $rnnlm $extra_options -independent -rnnlm $dir/rnnlm -test $tempdir/text.nounk -nbest -debug 0 | \ 74 | awk '{print $1*log(10);}' > $tempdir/loglikes.rnn 75 | else 76 | # add the utterance_id as required by Mikolove's rnnlm 77 | paste $tempdir/ids $tempdir/text.nounk > $tempdir/id_text.nounk 78 | 79 | $rnnlm -independent -rnnlm $dir/rnnlm -test $tempdir/id_text.nounk -nbest -debug 0 | \ 80 | awk '{print $1*log(10);}' > $tempdir/loglikes.rnn 81 | fi 82 | 83 | [ `cat $tempdir/loglikes.rnn | wc -l` -ne `cat $tempdir/loglikes.oov | wc -l` ] && \ 84 | echo "rnnlm rescoring failed" && exit 1; 85 | 86 | paste $tempdir/loglikes.rnn $tempdir/loglikes.oov | awk '{print -($1+$2);}' >$tempdir/scores 87 | 88 | # scores out, with utterance-ids. 89 | paste $tempdir/ids $tempdir/scores > $scores_out 90 | 91 | -------------------------------------------------------------------------------- /utils/sym2int.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) 3 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 13 | # MERCHANTABLITY OR NON-INFRINGEMENT. 14 | # See the Apache 2 License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | $ignore_oov = 0; 19 | 20 | for($x = 0; $x < 2; $x++) { 21 | if ($ARGV[0] eq "--map-oov") { 22 | shift @ARGV; 23 | $map_oov = shift @ARGV; 24 | if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") { 25 | # disallow '-f', the empty string and anything ending in words.txt as the 26 | # OOV symbol because these are likely command-line errors. 27 | die "the --map-oov option requires an argument"; 28 | } 29 | } 30 | if ($ARGV[0] eq "-f") { 31 | shift @ARGV; 32 | $field_spec = shift @ARGV; 33 | if ($field_spec =~ m/^\d+$/) { 34 | $field_begin = $field_spec - 1; $field_end = $field_spec - 1; 35 | } 36 | if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) 37 | if ($1 ne "") { 38 | $field_begin = $1 - 1; # Change to zero-based indexing. 39 | } 40 | if ($2 ne "") { 41 | $field_end = $2 - 1; # Change to zero-based indexing. 42 | } 43 | } 44 | if (!defined $field_begin && !defined $field_end) { 45 | die "Bad argument to -f option: $field_spec"; 46 | } 47 | } 48 | } 49 | 50 | $symtab = shift @ARGV; 51 | if (!defined $symtab) { 52 | print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" . 53 | "options: [--map-oov ] [-f ]\n" . 54 | "note: can look like 4-5, or 4-, or 5-, or 1.\n"; 55 | } 56 | open(F, "<$symtab") || die "Error opening symbol table file $symtab"; 57 | while() { 58 | @A = split(" ", $_); 59 | @A == 2 || die "bad line in symbol table file: $_"; 60 | $sym2int{$A[0]} = $A[1] + 0; 61 | } 62 | 63 | if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up 64 | if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; } 65 | $map_oov = $sym2int{$map_oov}; 66 | } 67 | 68 | $num_warning = 0; 69 | $max_warning = 20; 70 | 71 | while (<>) { 72 | @A = split(" ", $_); 73 | @B = (); 74 | for ($n = 0; $n < @A; $n++) { 75 | $a = $A[$n]; 76 | if ( (!defined $field_begin || $n >= $field_begin) 77 | && (!defined $field_end || $n <= $field_end)) { 78 | $i = $sym2int{$a}; 79 | if (!defined ($i)) { 80 | if (defined $map_oov) { 81 | if ($num_warning++ < $max_warning) { 82 | print STDERR "sym2int.pl: replacing $a with $map_oov\n"; 83 | if ($num_warning == $max_warning) { 84 | print STDERR "sym2int.pl: not warning for OOVs any more times\n"; 85 | } 86 | } 87 | $i = $map_oov; 88 | } else { 89 | $pos = $n+1; 90 | die "sym2int.pl: undefined symbol $a (in position $pos)\n"; 91 | } 92 | } 93 | $a = $i; 94 | } 95 | push @B, $a; 96 | } 97 | print join(" ", @B); 98 | print "\n"; 99 | } 100 | if ($num_warning > 0) { 101 | print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n"; 102 | } 103 | 104 | exit(0); 105 | -------------------------------------------------------------------------------- /utils/lang/internal/apply_unk_lm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2016 Johns Hopkins University (Author: Daniel Povey); 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | 19 | # Begin configuration section. 20 | 21 | # end configuration sections 22 | 23 | echo "$0 $@" # Print the command line for logging 24 | [ -f path.sh ] && . ./path.sh 25 | 26 | 27 | . utils/parse_options.sh 28 | 29 | if [ $# -ne 2 ]; then 30 | echo "Usage: $0 [options] " 31 | echo "e.g.: $0 exp/make_unk/unk_fst.txt data/lang_unk" 32 | echo "" 33 | echo "This script, which is called from the end of prepare_lang.sh," 34 | echo "inserts the unknown-word LM FST into the lexicon FSTs" 35 | echo "/L.fst and /L_disambig.fst in place of" 36 | echo "the special disambiguation symbol #2 (which was inserted by" 37 | echo "add_lex_disambig.pl as a placeholder for this FST)." 38 | echo "" 39 | echo " : A text-form FST, typically with the name" 40 | echo " unk_fst.txt. We will remove all symbols from the" 41 | echo " output before applying it." 42 | echo " : A partially built lang/ directory. We modify" 43 | echo " L.fst and L_disambig.fst, and read only words.txt." 44 | exit 1; 45 | fi 46 | 47 | 48 | unk_lm_fst=$1 49 | lang=$2 50 | 51 | set -e 52 | 53 | for f in "$unk_lm_fst" $lang/L.fst $lang/L_disambig.fst $lang/words.txt $lang/oov.int; do 54 | [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1; 55 | done 56 | 57 | unused_phone_label=$(tail -n 1 $lang/phones.txt | awk '{print $2 + 1}') 58 | label_to_replace=$(awk '{if ($1 == "#2") {print $2;}}' <$lang/phones.txt) 59 | ! [ "$unused_phone_label" -eq "$unused_phone_label" -a "$label_to_replace" -eq "$label_to_replace" ] && \ 60 | echo "$0: error getting unused phone label or label for #2" && exit 1 61 | 62 | 63 | # OK, now fstreplace works based on olabels, but we actually want to deal with ilabels, 64 | # so we need to invert all the FSTs before and after doing fstreplace. 65 | awk '{if(NF>=4) $4 = ""; print }' <$unk_lm_fst | \ 66 | fstcompile --isymbols=$lang/phones.txt --osymbols=$lang/words.txt | \ 67 | fstinvert > $lang/unk_temp.fst 68 | 69 | num_states_unk=$(fstinfo $lang/unk_temp.fst | grep '# of states' | awk '{print $NF}') 70 | 71 | # fstreplace usage is: 72 | # Usage: fstreplace root.fst rootlabel [rule1.fst label1 ...] [out.fst] 73 | # ... the rootlabel should just be an otherwise unused symbol. 74 | # all the labels are olabels (word labels).. that is hardcoded in fstreplace. 75 | 76 | for f in L.fst L_disambig.fst; do 77 | 78 | # with OpenFst tools, to refer to the standard input/output you need to use 79 | # the empty string '' and not '-'. 80 | fstinvert $lang/$f | fstreplace '' "$unused_phone_label" $lang/unk_temp.fst "$label_to_replace" | fstinvert > $lang/${f}.temp 81 | 82 | num_states_old=$(fstinfo $lang/$f | grep '# of states' | awk '{print $NF}') 83 | num_states_new=$(fstinfo $lang/${f}.temp | grep '# of states' | awk '{print $NF}') 84 | num_states_added=$[$num_states_new-$num_states_old] 85 | echo "$0: in $f, substituting in the unknown-word LM (which had $num_states_unk states) added $num_states_added new FST states." 86 | mv -f $lang/${f}.temp $lang/$f 87 | done 88 | 89 | rm $lang/unk_temp.fst 90 | 91 | exit 0; 92 | -------------------------------------------------------------------------------- /utils/format_lm_sri.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2012 Arnab Ghoshal 4 | # Copyright 2010-2011 Microsoft Corporation 5 | 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 15 | # MERCHANTABLITY OR NON-INFRINGEMENT. 16 | # See the Apache 2 License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | # Begin configuration section. 20 | srilm_opts="-subset -prune-lowprobs -unk -tolower" 21 | # end configuration sections 22 | 23 | 24 | . utils/parse_options.sh 25 | 26 | if [ $# -ne 4 ] && [ $# -ne 3 ]; then 27 | echo "Usage: $0 [options] [] " 28 | echo "The argument is no longer needed but is supported for back compatibility" 29 | echo "E.g.: utils/format_lm_sri.sh data/lang data/local/lm/foo.kn.gz data/local/dict/lexicon.txt data/lang_test" 30 | echo "Converts ARPA-format language models to FSTs. Change the LM vocabulary using SRILM." 31 | echo "Note: if you want to just convert ARPA LMs to FSTs, there is a simpler way to do this" 32 | echo "that doesn't require SRILM: see utils/format_lm.sh" 33 | echo "options:" 34 | echo " --help # print this message and exit" 35 | echo " --srilm-opts STRING # options to pass to SRILM tools (default: '$srilm_opts')" 36 | exit 1; 37 | fi 38 | 39 | 40 | if [ $# -eq 4 ] ; then 41 | lang_dir=$1 42 | lm=$2 43 | lexicon=$3 44 | out_dir=$4 45 | else 46 | lang_dir=$1 47 | lm=$2 48 | out_dir=$3 49 | fi 50 | 51 | mkdir -p $out_dir 52 | 53 | for f in $lm $lang_dir/words.txt; do 54 | if [ ! -f $f ]; then 55 | echo "$0: expected input file $f to exist." 56 | exit 1; 57 | fi 58 | done 59 | 60 | [ -f ./path.sh ] && . ./path.sh 61 | 62 | loc=`which change-lm-vocab` 63 | if [ -z $loc ]; then 64 | if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... 65 | sdir=`pwd`/../../../tools/srilm/bin/i686-m64 66 | else 67 | sdir=`pwd`/../../../tools/srilm/bin/i686 68 | fi 69 | if [ -f $sdir/../change-lm-vocab ]; then 70 | echo Using SRILM tools from $sdir 71 | export PATH=$PATH:$sdir:$sdir/.. 72 | else 73 | echo You appear to not have SRILM tools installed, either on your path, 74 | echo or installed in $sdir. cd to ../../../tools and run 75 | echo extras/install_srilm.sh. 76 | exit 1 77 | fi 78 | fi 79 | 80 | echo "Converting '$lm' to FST" 81 | tmpdir=$(mktemp -d /tmp/kaldi.XXXX); 82 | trap 'rm -rf "$tmpdir"' EXIT 83 | 84 | mkdir -p $out_dir 85 | cp -r $lang_dir/* $out_dir || exit 1; 86 | 87 | lm_base=$(basename $lm '.gz') 88 | awk '{print $1}' $out_dir/words.txt > $tmpdir/voc || exit 1; 89 | 90 | # Change the LM vocabulary to be the intersection of the current LM vocabulary 91 | # and the set of words in the pronunciation lexicon. This also renormalizes the 92 | # LM by recomputing the backoff weights, and remove those ngrams whose 93 | # probabilities are lower than the backed-off estimates. 94 | change-lm-vocab -vocab $tmpdir/voc -lm $lm -write-lm - $srilm_opts | \ 95 | arpa2fst --disambig-symbol=#0 \ 96 | --read-symbol-table=$out_dir/words.txt - $out_dir/G.fst || exit 1 97 | 98 | fstisstochastic $out_dir/G.fst 99 | 100 | # The output is like: 101 | # 9.14233e-05 -0.259833 102 | # we do expect the first of these 2 numbers to be close to zero (the second is 103 | # nonzero because the backoff weights make the states sum to >1). 104 | 105 | echo "Succeeded in formatting LM '$lm' -> '$out_dir/G.fst'" 106 | -------------------------------------------------------------------------------- /utils/parse_options.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey); 4 | # Arnab Ghoshal, Karel Vesely 5 | 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 15 | # MERCHANTABLITY OR NON-INFRINGEMENT. 16 | # See the Apache 2 License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | 20 | # Parse command-line options. 21 | # To be sourced by another script (as in ". parse_options.sh"). 22 | # Option format is: --option-name arg 23 | # and shell variable "option_name" gets set to value "arg." 24 | # The exception is --help, which takes no arguments, but prints the 25 | # $help_message variable (if defined). 26 | 27 | 28 | ### 29 | ### The --config file options have lower priority to command line 30 | ### options, so we need to import them first... 31 | ### 32 | 33 | # Now import all the configs specified by command-line, in left-to-right order 34 | for ((argpos=1; argpos<$#; argpos++)); do 35 | if [ "${!argpos}" == "--config" ]; then 36 | argpos_plus1=$((argpos+1)) 37 | config=${!argpos_plus1} 38 | [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1 39 | . $config # source the config file. 40 | fi 41 | done 42 | 43 | 44 | ### 45 | ### No we process the command line options 46 | ### 47 | while true; do 48 | [ -z "${1:-}" ] && break; # break if there are no arguments 49 | case "$1" in 50 | # If the enclosing script is called with --help option, print the help 51 | # message and exit. Scripts should put help messages in $help_message 52 | --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2; 53 | else printf "$help_message\n" 1>&2 ; fi; 54 | exit 0 ;; 55 | --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'" 56 | exit 1 ;; 57 | # If the first command-line argument begins with "--" (e.g. --foo-bar), 58 | # then work out the variable name as $name, which will equal "foo_bar". 59 | --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; 60 | # Next we test whether the variable in question is undefned-- if so it's 61 | # an invalid option and we die. Note: $0 evaluates to the name of the 62 | # enclosing script. 63 | # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar 64 | # is undefined. We then have to wrap this test inside "eval" because 65 | # foo_bar is itself inside a variable ($name). 66 | eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1; 67 | 68 | oldval="`eval echo \\$$name`"; 69 | # Work out whether we seem to be expecting a Boolean argument. 70 | if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then 71 | was_bool=true; 72 | else 73 | was_bool=false; 74 | fi 75 | 76 | # Set the variable to the right value-- the escaped quotes make it work if 77 | # the option had spaces, like --cmd "queue.pl -sync y" 78 | eval $name=\"$2\"; 79 | 80 | # Check that Boolean-valued arguments are really Boolean. 81 | if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then 82 | echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2 83 | exit 1; 84 | fi 85 | shift 2; 86 | ;; 87 | *) break; 88 | esac 89 | done 90 | 91 | 92 | # Check for an empty argument to the --cmd option, which can easily occur as a 93 | # result of scripting errors. 94 | [ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1; 95 | 96 | 97 | true; # so this script returns exit code 0. 98 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Author: Yang Zhang 4 | # Mail: zyziszy@foxmail.com 5 | # Apache 2.0. 6 | 7 | import os 8 | import numpy as np 9 | import tensorflow as tf # tf-gpu 1.8 10 | 11 | from model.vae import * 12 | from model.model_utils import * 13 | 14 | 15 | '''flags''' 16 | 17 | tf.app.flags.DEFINE_integer('epoch', 50, 'epoch num') 18 | 19 | tf.app.flags.DEFINE_integer('batch_size', 200, 'batch size') 20 | 21 | tf.app.flags.DEFINE_integer('n_hidden', 1800, 'dim of hidden') 22 | 23 | tf.app.flags.DEFINE_integer('z_dim', 200, 'dim of z') 24 | 25 | tf.app.flags.DEFINE_float('learn_rate', 0.00001, 'learn rate') 26 | 27 | tf.app.flags.DEFINE_float('beta1', 0.5, 'beta1 for AdamOptimizer') 28 | 29 | tf.app.flags.DEFINE_float('KL_weigth', 0.04, 'KL_weigth') 30 | 31 | tf.app.flags.DEFINE_float('cohesive_weight', 0., 'cohesive loss') 32 | 33 | tf.app.flags.DEFINE_string('dataset_path', './data/voxceleb_combined_200000/xvector.npz', 34 | 'x vector dataset path (npz format)') 35 | 36 | tf.app.flags.DEFINE_string('spk_path', './data/voxceleb_combined_200000/spk.npz', 37 | 'utt2spk label dataset path (npz format)') 38 | 39 | tf.app.flags.DEFINE_integer('is_training', 1, 'Training/Testing.') 40 | 41 | params = tf.app.flags.FLAGS # store flag 42 | 43 | '''model's log and checkpoints paths''' 44 | experiment_dir = '/experiments/'+'z' + \ 45 | str(params.z_dim)+'_h' + str(params.n_hidden) + \ 46 | '_kl'+str(params.KL_weigth)+'_c'+str(params.cohesive_weight) 47 | 48 | experiment_dir = os.path.dirname(os.path.abspath(__file__))+experiment_dir 49 | checkpoint_dir = experiment_dir+'/checkpoint' 50 | log_dir = experiment_dir+'/train_log' 51 | print('model/checkpoint/logs will save in {}.'.format(experiment_dir)) 52 | 53 | 54 | '''build the model and train''' 55 | with tf.Session() as sess: 56 | vae_model = VAE( 57 | sess=sess, 58 | epoch=params.epoch, 59 | batch_size=params.batch_size, 60 | z_dim=params.z_dim, 61 | dataset_path=params.dataset_path, 62 | checkpoint_dir=checkpoint_dir, 63 | log_dir=log_dir, 64 | n_hidden=params.n_hidden, 65 | KL_weigth=params.KL_weigth, 66 | cohesive_weight=params.cohesive_weight, 67 | learning_rate=params.learn_rate, 68 | beta1=params.beta1, 69 | spk_path=params.spk_path 70 | ) 71 | if params.is_training: 72 | vae_model.train() 73 | print('model / checkpoint / logs will save in {}.'.format(experiment_dir)) 74 | 75 | else: 76 | paths = ["./data/voxceleb_combined_200000/xvector", 77 | "./data/sitw_dev/enroll/xvector", 78 | "./data/sitw_dev/test/xvector", 79 | "./data/sitw_eval/enroll/xvector", 80 | "./data/sitw_eval/test/xvector" 81 | ] 82 | for path in paths: 83 | if os.path.exists(path+'.ark') == True: 84 | os.remove(path+'.ark') 85 | print('delete {}.ark'.format(path)) 86 | 87 | for path in paths: 88 | # load data 89 | vector = np.load(path+'.npz')['vector'] 90 | labels = np.load(path+'.npz')['utt'] 91 | 92 | # predict 93 | predict_mu = vae_model.predict(vector) 94 | print(path) 95 | print(predict_mu.shape) 96 | # get_skew_and_kurt(predict_mu) 97 | with open(path+'.ark', 'w') as f: 98 | for i in range(predict_mu.shape[0]): 99 | f.write(str(labels[i])) 100 | f.write(' [ ') 101 | for j in predict_mu[i]: 102 | f.write(str(j)) 103 | f.write(' ') 104 | f.write(']') 105 | f.write('\n') 106 | print('{}.ark is done!'.format(path)) 107 | print('\nall done!') 108 | 109 | print('done') 110 | -------------------------------------------------------------------------------- /utils/map_arpa_lm.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2014 Guoguo Chen 4 | # 2014 Johns Hopkins University (author: Daniel Povey) 5 | # Apache 2.0. 6 | # 7 | use strict; 8 | use warnings; 9 | use Getopt::Long; 10 | 11 | my $Usage = < < input-arpa >output-arpa 22 | e.g.: utils/map_arpa_lm.pl words.txt arpa_lm.int 23 | 24 | Allowed options: 25 | --sym2int : If true, maps words to integers, other wise maps integers to 26 | words. (boolean, default = true) 27 | 28 | EOU 29 | 30 | my $sym2int = "true"; 31 | GetOptions('sym2int=s' => \$sym2int); 32 | 33 | ($sym2int eq "true" || $sym2int eq "false") || 34 | die "$0: Bad value for option --sym2int\n"; 35 | 36 | if (@ARGV != 1) { 37 | die $Usage; 38 | } 39 | 40 | # Gets parameters. 41 | my $symtab = shift @ARGV; 42 | my $arpa_in = shift @ARGV; 43 | my $arpa_out = shift @ARGV; 44 | 45 | # Opens files. 46 | open(M, "<$symtab") || die "$0: Fail to open $symtab\n"; 47 | 48 | # Reads in the mapper. 49 | my %mapper; 50 | while () { 51 | chomp; 52 | my @col = split(/[\s]+/, $_); 53 | @col == 2 || die "$0: Bad line in mapper file \"$_\"\n"; 54 | if ($sym2int eq "true") { 55 | if (defined($mapper{$col[0]})) { 56 | die "$0: Duplicate entry \"$col[0]\"\n"; 57 | } 58 | $mapper{$col[0]} = $col[1]; 59 | } else { 60 | if (defined($mapper{$col[1]})) { 61 | die "$0: Duplicate entry \"$col[1]\"\n"; 62 | } 63 | $mapper{$col[1]} = $col[0]; 64 | } 65 | } 66 | 67 | my $num_oov_lines = 0; 68 | my $max_oov_warn = 20; 69 | 70 | # Parses Arpa n-gram language model. 71 | my $arpa = ""; 72 | my $current_order = -1; 73 | my %head_ngram_count; 74 | my %actual_ngram_count; 75 | while () { 76 | chomp; 77 | my @col = split(" ", $_); 78 | 79 | if ($current_order == -1 and ! m/^\\data\\$/) { 80 | next; 81 | } 82 | 83 | if (m/^\\data\\$/) { 84 | print STDERR "$0: Processing \"\\data\\\"\n"; 85 | print "$_\n"; 86 | $current_order = 0; 87 | } elsif (m/^\\[0-9]*-grams:$/) { 88 | $current_order = $_; 89 | $current_order =~ s/-grams:$//g; 90 | $current_order =~ s/^\\//g; 91 | print "$_\n"; 92 | print STDERR "$0: Processing \"\\$current_order-grams:\\\"\n"; 93 | } elsif (m/^\\end\\/) { 94 | print "$_\n"; 95 | } elsif ($_ eq "") { 96 | if ($current_order >= 1) { 97 | print "\n"; 98 | } 99 | } else { 100 | if ($current_order == 0) { 101 | # echo head section. 102 | print "$_\n"; 103 | } else { 104 | # Parses n-gram section. 105 | if (@col > 2 + $current_order || @col < 1 + $current_order) { 106 | die "$0: Bad line in arpa lm \"$_\"\n"; 107 | } 108 | my $prob = shift @col; 109 | my $is_oov = 0; 110 | for (my $i = 0; $i < $current_order; $i++) { 111 | my $temp = $mapper{$col[$i]}; 112 | if (!defined($temp)) { 113 | $is_oov = 1; 114 | $num_oov_lines++; 115 | last; 116 | } else { 117 | $col[$i] = $temp; 118 | } 119 | } 120 | if (!$is_oov) { 121 | my $rest_of_line = join(" ", @col); 122 | print "$prob\t$rest_of_line\n"; 123 | } else { 124 | if ($num_oov_lines < $max_oov_warn) { 125 | print STDERR "$0: Warning: OOV line $_\n"; 126 | } 127 | } 128 | } 129 | } 130 | } 131 | 132 | if ($num_oov_lines > 0) { 133 | print STDERR "$0: $num_oov_lines lines of the Arpa file contained OOVs and "; 134 | print STDERR "were not printed.\n"; 135 | } 136 | 137 | close(M); 138 | -------------------------------------------------------------------------------- /utils/pinyin_map.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use warnings; #sed replacement for -w perl parameter 3 | 4 | $num_args = $#ARGV + 1; 5 | if ($num_args != 1) { 6 | print "\nUsage: pinyin2phone.pl pinyin2phone\n"; 7 | exit; 8 | } 9 | 10 | open(MAPS, $ARGV[0]) or die("Could not open pinyin map file."); 11 | my %py2ph; foreach $line () { @A = split(" ", $line); 12 | $py = shift(@A); 13 | $py2ph{$py} = [@A]; 14 | } 15 | 16 | #foreach $word ( keys %py2ph ) { 17 | #foreach $i ( 0 .. $#{ $py2ph{$word} } ) { 18 | # print " $word = $py2ph{$word}[$i]"; 19 | #} 20 | #print " $#{ $py2ph{$word} }"; 21 | #print "\n"; 22 | #} 23 | 24 | my @entry; 25 | 26 | while () { 27 | @A = split(" ", $_); 28 | @entry = (); 29 | $W = shift(@A); 30 | push(@entry, $W); 31 | for($i = 0; $i < @A; $i++) { 32 | $initial= $A[$i]; $final = $A[$i]; 33 | #print $initial, " ", $final, "\n"; 34 | if ($A[$i] =~ /^CH[A-Z0-9]+$/) {$initial =~ s:(CH)[A-Z0-9]+:$1:; $final =~ s:CH([A-Z0-9]+):$1:;} 35 | elsif ($A[$i] =~ /^SH[A-Z0-9]+$/) {$initial =~ s:(SH)[A-Z0-9]+:$1:; $final =~ s:SH([A-Z0-9]+):$1:;} 36 | elsif ($A[$i] =~ /^ZH[A-Z0-9]+$/) {$initial =~ s:(ZH)[A-Z0-9]+:$1:; $final =~ s:ZH([A-Z0-9]+):$1:;} 37 | elsif ($A[$i] =~ /^B[A-Z0-9]+$/) {$initial =~ s:(B)[A-Z0-9]+:$1:; $final =~ s:B([A-Z0-9]+):$1:;} 38 | elsif ($A[$i] =~ /^C[A-Z0-9]+$/) {$initial =~ s:(C)[A-Z0-9]+:$1:; $final =~ s:C([A-Z0-9]+):$1:;} 39 | elsif ($A[$i] =~ /^D[A-Z0-9]+$/) {$initial =~ s:(D)[A-Z0-9]+:$1:; $final =~ s:D([A-Z0-9]+):$1:;} 40 | elsif ($A[$i] =~ /^F[A-Z0-9]+$/) {$initial =~ s:(F)[A-Z0-9]+:$1:; $final =~ s:F([A-Z0-9]+):$1:;} 41 | elsif ($A[$i] =~ /^G[A-Z0-9]+$/) {$initial =~ s:(G)[A-Z0-9]+:$1:; $final =~ s:G([A-Z0-9]+):$1:;} 42 | elsif ($A[$i] =~ /^H[A-Z0-9]+$/) {$initial =~ s:(H)[A-Z0-9]+:$1:; $final =~ s:H([A-Z0-9]+):$1:;} 43 | elsif ($A[$i] =~ /^J[A-Z0-9]+$/) {$initial =~ s:(J)[A-Z0-9]+:$1:; $final =~ s:J([A-Z0-9]+):$1:;} 44 | elsif ($A[$i] =~ /^K[A-Z0-9]+$/) {$initial =~ s:(K)[A-Z0-9]+:$1:; $final =~ s:K([A-Z0-9]+):$1:;} 45 | elsif ($A[$i] =~ /^L[A-Z0-9]+$/) {$initial =~ s:(L)[A-Z0-9]+:$1:; $final =~ s:L([A-Z0-9]+):$1:;} 46 | elsif ($A[$i] =~ /^M[A-Z0-9]+$/) {$initial =~ s:(M)[A-Z0-9]+:$1:; $final =~ s:M([A-Z0-9]+):$1:;} 47 | elsif ($A[$i] =~ /^N[A-Z0-9]+$/) {$initial =~ s:(N)[A-Z0-9]+:$1:; $final =~ s:N([A-Z0-9]+):$1:;} 48 | elsif ($A[$i] =~ /^P[A-Z0-9]+$/) {$initial =~ s:(P)[A-Z0-9]+:$1:; $final =~ s:P([A-Z0-9]+):$1:;} 49 | elsif ($A[$i] =~ /^Q[A-Z0-9]+$/) {$initial =~ s:(Q)[A-Z0-9]+:$1:; $final =~ s:Q([A-Z0-9]+):$1:;} 50 | elsif ($A[$i] =~ /^R[A-Z0-9]+$/) {$initial =~ s:(R)[A-Z0-9]+:$1:; $final =~ s:R([A-Z0-9]+):$1:;} 51 | elsif ($A[$i] =~ /^S[A-Z0-9]+$/) {$initial =~ s:(S)[A-Z0-9]+:$1:; $final =~ s:S([A-Z0-9]+):$1:;} 52 | elsif ($A[$i] =~ /^T[A-Z0-9]+$/) {$initial =~ s:(T)[A-Z0-9]+:$1:; $final =~ s:T([A-Z0-9]+):$1:;} 53 | elsif ($A[$i] =~ /^W[A-Z0-9]+$/) {$initial =~ s:(W)[A-Z0-9]+:$1:; $final =~ s:W([A-Z0-9]+):$1:;} 54 | elsif ($A[$i] =~ /^X[A-Z0-9]+$/) {$initial =~ s:(X)[A-Z0-9]+:$1:; $final =~ s:X([A-Z0-9]+):$1:;} 55 | elsif ($A[$i] =~ /^Y[A-Z0-9]+$/) {$initial =~ s:(Y)[A-Z0-9]+:$1:; $final =~ s:Y([A-Z0-9]+):$1:;} 56 | elsif ($A[$i] =~ /^Z[A-Z0-9]+$/) {$initial =~ s:(Z)[A-Z0-9]+:$1:; $final =~ s:Z([A-Z0-9]+):$1:;} 57 | if ($initial ne $A[$i]) { 58 | $tone = $final; 59 | $final =~ s:([A-Z]+)[0-9]:$1:; 60 | $tone =~ s:[A-Z]+([0-9]):$1:; 61 | if (!(exists $py2ph{$initial}) or !(exists $py2ph{$final})) { print "1: no entry find for ", $A[$i], " ", $initial, " ", $final; exit;} 62 | push(@entry, @{$py2ph{$initial}}); 63 | @tmp = @{$py2ph{$final}}; 64 | for($j = 0; $j < @tmp ; $j++) {$tmp[$j] = $tmp[$j].$tone;} 65 | push(@entry, @tmp); 66 | } 67 | else { 68 | $tone = $A[$i]; 69 | $A[$i] =~ s:([A-Z]+)[0-9]:$1:; 70 | $tone =~ s:[A-Z]+([0-9]):$1:; 71 | if (!(exists $py2ph{$A[$i]})) { print "2: no entry find for ", $A[$i]; exit;} 72 | @tmp = @{$py2ph{$A[$i]}}; 73 | for($j = 0; $j < @tmp ; $j++) {$tmp[$j] = $tmp[$j].$tone;} 74 | push(@entry, @tmp); 75 | } 76 | } 77 | print "@entry"; 78 | print "\n"; 79 | } 80 | -------------------------------------------------------------------------------- /utils/subset_data_dir_tr_cv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2013 Hong Kong University of Science and Technology (Author: Ricky Chan Ho Yin); 3 | # Brno University of Technology (Author: Karel Vesely); 4 | # Johns Hopkins University (Author: Daniel Povey); 5 | # Apache 2.0 6 | 7 | # This script splits dataset to two parts : 8 | # training set from (100-P)% of speakers/utterances and 9 | # held-out set (or cross-validation) from P% of remaining speakers/remaining utterances, 10 | # which will be later on used for neural network training 11 | # 12 | # There are two options for choosing held-out (or cross-validation) set, either by 13 | # --cv-spk-percent P , which will give you CV set based on random chosen P% of speakers, or 14 | # --cv-utt-percent P , which will give you CV set based on last P% utterances in the dataset 15 | # 16 | # If you don't apply the above two options, by default the script will use --cv-utt-percent option, 17 | # and the default cross validation percentage portion is equal to 10% (i.e. P=10) 18 | # 19 | # The --cv-spk-percent option is useful if you would like to have subset chosen from random speakers order, 20 | # especially for the cases where dataset contains multiple different corpora, 21 | # where type of speakers or recording channels may be quite different 22 | 23 | # Begin configuration. 24 | cv_spk_percent= # % of speakers is parsed by option 25 | cv_utt_percent=10 # default 10% of total utterances 26 | seed=777 # use seed for speaker shuffling 27 | # End configuration. 28 | 29 | echo "$0 $@" # Print the command line for logging 30 | 31 | uttbase=true; # by default, we choose last 10% utterances for CV 32 | 33 | if [ "$1" == "--cv-spk-percent" ]; then 34 | uttbase=false; 35 | spkbase=true; 36 | fi 37 | 38 | [ -f path.sh ] && . ./path.sh; 39 | 40 | . parse_options.sh || exit 1; 41 | 42 | if [ $# != 3 ]; then 43 | echo "Usage: $0 [--cv-spk-percent P|--cv-utt-percent P] " 44 | echo " --cv-spk-percent P Cross Validation portion of the total speakers, recommend value is 10% (i.e. P=10)" 45 | echo " --cv-utt-percent P Cross Validation portion of the total utterances, default is 10% (i.e. P=10)" 46 | echo " " 47 | exit 1; 48 | fi 49 | 50 | srcdir=$1 51 | trndir=$2 52 | cvdir=$3 53 | 54 | ## use simple last P% utterance for CV 55 | if $uttbase; then 56 | if [ ! -f $srcdir/utt2spk ]; then 57 | echo "$0: no such file $srcdir/utt2spk" 58 | exit 1; 59 | fi 60 | 61 | #total number of lines 62 | N=$(cat $srcdir/utt2spk | wc -l) 63 | #get line number where (100-P)% of the data lies 64 | P_utt=$((N * cv_utt_percent / 100)) 65 | N_head=$((N -P_utt)) 66 | #move the boundary so it is located on speaker change 67 | N_head=$(cat $srcdir/utt2spk | uniq -f1 -c | awk 'BEGIN{n=0} { if(n+$1<='$N_head') { n += $1 } else { nextfile } } END{if(n==0)n='$N_head'; print n }') 68 | #the rest of the data will be that big 69 | N_tail=$((N-N_head)) 70 | 71 | #now call the subset_data_dir.sh and fix the directories 72 | subset_data_dir.sh --first $srcdir $N_head $trndir 73 | subset_data_dir.sh --last $srcdir $N_tail $cvdir 74 | 75 | exit 0; 76 | fi 77 | 78 | ## use random chosen P% speakers for CV 79 | if [ ! -f $srcdir/spk2utt ]; then 80 | echo "$0: no such file $srcdir/spk2utt" 81 | exit 1; 82 | fi 83 | 84 | #total, cv, train number of speakers 85 | N=$(cat $srcdir/spk2utt | wc -l) 86 | N_spk_cv=$((N * cv_spk_percent / 100)) 87 | N_spk_trn=$((N - N_spk_cv)) 88 | 89 | mkdir -p $cvdir $trndir 90 | 91 | #shuffle the speaker list 92 | awk '{print $1}' $srcdir/spk2utt | shuffle_list.pl --srand $seed > $trndir/_tmpf_randspk 93 | 94 | #split the train/cv 95 | head -n $N_spk_cv $trndir/_tmpf_randspk > $cvdir/_tmpf_cvspk 96 | tail -n $N_spk_trn $trndir/_tmpf_randspk > $trndir/_tmpf_trainspk 97 | 98 | #now call the subset_data_dir.sh 99 | subset_data_dir.sh --spk-list $trndir/_tmpf_trainspk $srcdir $trndir 100 | subset_data_dir.sh --spk-list $cvdir/_tmpf_cvspk $srcdir $cvdir 101 | 102 | #clean-up 103 | rm -f $trndir/_tmpf_randspk $trndir/_tmpf_trainspk $cvdir/_tmpf_cvspk 104 | 105 | -------------------------------------------------------------------------------- /utils/combine_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 3 | # 2014 David Snyder 4 | 5 | # This script combines the data from multiple source directories into 6 | # a single destination directory. 7 | 8 | # See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information 9 | # about what these directories contain. 10 | 11 | # Begin configuration section. 12 | extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..." 13 | skip_fix=false # skip the fix_data_dir.sh in the end 14 | # End configuration section. 15 | 16 | echo "$0 $@" # Print the command line for logging 17 | 18 | if [ -f path.sh ]; then . ./path.sh; fi 19 | . parse_options.sh || exit 1; 20 | 21 | if [ $# -lt 2 ]; then 22 | echo "Usage: combine_data.sh [--extra-files 'file1 file2'] ..." 23 | echo "Note, files that don't appear in all source dirs will not be combined," 24 | echo "with the exception of utt2uniq and segments, which are created where necessary." 25 | exit 1 26 | fi 27 | 28 | dest=$1; 29 | shift; 30 | 31 | first_src=$1; 32 | 33 | rm -r $dest 2>/dev/null 34 | mkdir -p $dest; 35 | 36 | export LC_ALL=C 37 | 38 | for dir in $*; do 39 | if [ ! -f $dir/utt2spk ]; then 40 | echo "$0: no such file $dir/utt2spk" 41 | exit 1; 42 | fi 43 | done 44 | 45 | # W.r.t. utt2uniq file the script has different behavior compared to other files 46 | # it is not compulsary for it to exist in src directories, but if it exists in 47 | # even one it should exist in all. We will create the files where necessary 48 | has_utt2uniq=false 49 | for in_dir in $*; do 50 | if [ -f $in_dir/utt2uniq ]; then 51 | has_utt2uniq=true 52 | break 53 | fi 54 | done 55 | 56 | if $has_utt2uniq; then 57 | # we are going to create an utt2uniq file in the destdir 58 | for in_dir in $*; do 59 | if [ ! -f $in_dir/utt2uniq ]; then 60 | # we assume that utt2uniq is a one to one mapping 61 | cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}' 62 | else 63 | cat $in_dir/utt2uniq 64 | fi 65 | done | sort -k1 > $dest/utt2uniq 66 | echo "$0: combined utt2uniq" 67 | else 68 | echo "$0 [info]: not combining utt2uniq as it does not exist" 69 | fi 70 | # some of the old scripts might provide utt2uniq as an extrafile, so just remove it 71 | extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g") 72 | 73 | # segments are treated similarly to utt2uniq. If it exists in some, but not all 74 | # src directories, then we generate segments where necessary. 75 | has_segments=false 76 | for in_dir in $*; do 77 | if [ -f $in_dir/segments ]; then 78 | has_segments=true 79 | break 80 | fi 81 | done 82 | 83 | if $has_segments; then 84 | for in_dir in $*; do 85 | if [ ! -f $in_dir/segments ]; then 86 | echo "$0 [info]: will generate missing segments for $in_dir" 1>&2 87 | utils/data/get_segments_for_data.sh $in_dir 88 | else 89 | cat $in_dir/segments 90 | fi 91 | done | sort -k1 > $dest/segments 92 | echo "$0: combined segments" 93 | else 94 | echo "$0 [info]: not combining segments as it does not exist" 95 | fi 96 | 97 | for file in utt2spk utt2lang utt2dur feats.scp vad.scp ali.scp text cmvn.scp reco2file_and_channel wav.scp spk2gender $extra_files; do 98 | exists_somewhere=false 99 | absent_somewhere=false 100 | for d in $*; do 101 | if [ -f $d/$file ]; then 102 | exists_somewhere=true 103 | else 104 | absent_somewhere=true 105 | fi 106 | done 107 | 108 | if ! $absent_somewhere; then 109 | set -o pipefail 110 | ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1; 111 | set +o pipefail 112 | echo "$0: combined $file" 113 | else 114 | if ! $exists_somewhere; then 115 | echo "$0 [info]: not combining $file as it does not exist" 116 | else 117 | echo "$0 [info]: **not combining $file as it does not exist everywhere**" 118 | fi 119 | fi 120 | done 121 | 122 | utils/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt 123 | 124 | if ! $skip_fix ; then 125 | utils/fix_data_dir.sh $dest || exit 1; 126 | fi 127 | 128 | exit 0 129 | -------------------------------------------------------------------------------- /utils/data/combine_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. 3 | # 2014 David Snyder 4 | 5 | # This script combines the data from multiple source directories into 6 | # a single destination directory. 7 | 8 | # See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information 9 | # about what these directories contain. 10 | 11 | # Begin configuration section. 12 | extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..." 13 | skip_fix=false # skip the fix_data_dir.sh in the end 14 | # End configuration section. 15 | 16 | echo "$0 $@" # Print the command line for logging 17 | 18 | if [ -f path.sh ]; then . ./path.sh; fi 19 | . parse_options.sh || exit 1; 20 | 21 | if [ $# -lt 2 ]; then 22 | echo "Usage: combine_data.sh [--extra-files 'file1 file2'] ..." 23 | echo "Note, files that don't appear in all source dirs will not be combined," 24 | echo "with the exception of utt2uniq and segments, which are created where necessary." 25 | exit 1 26 | fi 27 | 28 | dest=$1; 29 | shift; 30 | 31 | first_src=$1; 32 | 33 | rm -r $dest 2>/dev/null 34 | mkdir -p $dest; 35 | 36 | export LC_ALL=C 37 | 38 | for dir in $*; do 39 | if [ ! -f $dir/utt2spk ]; then 40 | echo "$0: no such file $dir/utt2spk" 41 | exit 1; 42 | fi 43 | done 44 | 45 | # W.r.t. utt2uniq file the script has different behavior compared to other files 46 | # it is not compulsary for it to exist in src directories, but if it exists in 47 | # even one it should exist in all. We will create the files where necessary 48 | has_utt2uniq=false 49 | for in_dir in $*; do 50 | if [ -f $in_dir/utt2uniq ]; then 51 | has_utt2uniq=true 52 | break 53 | fi 54 | done 55 | 56 | if $has_utt2uniq; then 57 | # we are going to create an utt2uniq file in the destdir 58 | for in_dir in $*; do 59 | if [ ! -f $in_dir/utt2uniq ]; then 60 | # we assume that utt2uniq is a one to one mapping 61 | cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}' 62 | else 63 | cat $in_dir/utt2uniq 64 | fi 65 | done | sort -k1 > $dest/utt2uniq 66 | echo "$0: combined utt2uniq" 67 | else 68 | echo "$0 [info]: not combining utt2uniq as it does not exist" 69 | fi 70 | # some of the old scripts might provide utt2uniq as an extrafile, so just remove it 71 | extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g") 72 | 73 | # segments are treated similarly to utt2uniq. If it exists in some, but not all 74 | # src directories, then we generate segments where necessary. 75 | has_segments=false 76 | for in_dir in $*; do 77 | if [ -f $in_dir/segments ]; then 78 | has_segments=true 79 | break 80 | fi 81 | done 82 | 83 | if $has_segments; then 84 | for in_dir in $*; do 85 | if [ ! -f $in_dir/segments ]; then 86 | echo "$0 [info]: will generate missing segments for $in_dir" 1>&2 87 | utils/data/get_segments_for_data.sh $in_dir 88 | else 89 | cat $in_dir/segments 90 | fi 91 | done | sort -k1 > $dest/segments 92 | echo "$0: combined segments" 93 | else 94 | echo "$0 [info]: not combining segments as it does not exist" 95 | fi 96 | 97 | for file in utt2spk utt2lang utt2dur feats.scp vad.scp ali.scp text cmvn.scp reco2file_and_channel wav.scp spk2gender $extra_files; do 98 | exists_somewhere=false 99 | absent_somewhere=false 100 | for d in $*; do 101 | if [ -f $d/$file ]; then 102 | exists_somewhere=true 103 | else 104 | absent_somewhere=true 105 | fi 106 | done 107 | 108 | if ! $absent_somewhere; then 109 | set -o pipefail 110 | ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1; 111 | set +o pipefail 112 | echo "$0: combined $file" 113 | else 114 | if ! $exists_somewhere; then 115 | echo "$0 [info]: not combining $file as it does not exist" 116 | else 117 | echo "$0 [info]: **not combining $file as it does not exist everywhere**" 118 | fi 119 | fi 120 | done 121 | 122 | utils/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt 123 | 124 | if ! $skip_fix ; then 125 | utils/fix_data_dir.sh $dest || exit 1; 126 | fi 127 | 128 | exit 0 129 | -------------------------------------------------------------------------------- /utils/create_data_link.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2013 Guoguo Chen 4 | # 2014 Johns Hopkins University (author: Daniel Povey) 5 | # Apache 2.0. 6 | # 7 | # This script distributes data onto different file systems by making symbolic 8 | # links. It is supposed to use together with utils/create_split_dir.pl, which 9 | # creates a "storage" directory that links to different file systems. 10 | # 11 | # If a sub-directory egs/storage does not exist, it does nothing. If it exists, 12 | # then it selects pseudo-randomly a number from those available in egs/storage/* 13 | # creates a link such as 14 | # 15 | # egs/egs.3.4.ark -> storage/4/egs.3.4.ark 16 | # 17 | use strict; 18 | use warnings; 19 | use File::Basename; 20 | use File::Spec; 21 | use Getopt::Long; 22 | 23 | sub GetGCD { 24 | my ($a, $b) = @_; 25 | while ($a != $b) { 26 | if ($a > $b) { 27 | $a = $a - $b; 28 | } else { 29 | $b = $b - $a; 30 | } 31 | } 32 | return $a; 33 | } 34 | 35 | my $Usage = < storage/4/egs.3.4.ark 46 | 47 | Usage: utils/create_data_link.pl [ ... ] 48 | e.g.: utils/create_data_link.pl foo/bar/egs.3.4.ark foo/bar/egs.3.5.ark 49 | (note: the dirname, e.g. foo/bar/, must be the same in all cases). 50 | 51 | See also utils/remove_data_links.sh 52 | EOU 53 | 54 | GetOptions(); 55 | 56 | if (@ARGV == 0) { 57 | die $Usage; 58 | } 59 | 60 | my $example_fullpath = $ARGV[0]; 61 | 62 | # Check if the storage has been created. If so, do nothing. 63 | my $dirname = dirname($example_fullpath); 64 | if (! -d "$dirname/storage") { 65 | exit(0); 66 | } 67 | 68 | # Storage exists, create symbolic links in the next few steps. 69 | 70 | # First, get a list of the available storage directories, and check if they are 71 | # properly created. 72 | opendir(my $dh, "$dirname/storage/") || die "$0: Fail to open $dirname/storage/\n"; 73 | my @storage_dirs = grep(/^[0-9]*$/, readdir($dh)); 74 | closedir($dh); 75 | my $num_storage = scalar(@storage_dirs); 76 | for (my $x = 1; $x <= $num_storage; $x++) { 77 | (-d "$dirname/storage/$x") || die "$0: $dirname/storage/$x does not exist\n"; 78 | } 79 | 80 | # Second, get the coprime list. 81 | my @coprimes; 82 | for (my $n = 1; $n < $num_storage; $n++) { 83 | if (GetGCD($n, $num_storage) == 1) { 84 | push(@coprimes, $n); 85 | } 86 | } 87 | 88 | my $ret = 0; 89 | 90 | foreach my $fullpath (@ARGV) { 91 | if ($dirname ne dirname($fullpath)) { 92 | die "Mismatch in directory names of arguments: $example_fullpath versus $fullpath"; 93 | } 94 | 95 | # Finally, work out the directory index where we should put the data to. 96 | my $basename = basename($fullpath); 97 | my $filename_numbers = $basename; 98 | $filename_numbers =~ s/[^0-9]+/ /g; 99 | my @filename_numbers = split(" ", $filename_numbers); 100 | my $total = 0; 101 | my $index = 0; 102 | foreach my $x (@filename_numbers) { 103 | if ($index >= scalar(@coprimes)) { 104 | $index = 0; 105 | } 106 | $total += $x * $coprimes[$index]; 107 | $index++; 108 | } 109 | my $dir_index = $total % $num_storage + 1; 110 | 111 | # Make the symbolic link. 112 | if (-e $fullpath) { 113 | unlink($fullpath); 114 | } 115 | if (symlink("storage/$dir_index/$basename", $fullpath) != 1) { # failure 116 | $ret = 1; # will exit with error status. 117 | } 118 | } 119 | 120 | exit($ret); 121 | 122 | ## testing: 123 | # rm -rf foo bar 124 | # mkdir -p bar/{1,2,3,4} 125 | # mkdir -p foo/storage 126 | # for x in 1 2 3 4; do ln -s ../../bar/$x foo/storage/$x; done 127 | # utils/create_data_link.pl utils/create_data_link.pl foo/1.3.ark foo/2.3.ark 128 | # ls -l foo 129 | # total 0 130 | # lrwxrwxrwx 1 dpovey fax 17 Sep 2 17:41 1.3.ark -> storage/3/1.3.ark 131 | # lrwxrwxrwx 1 dpovey fax 17 Sep 2 17:41 2.3.ark -> storage/4/2.3.ark 132 | # drwxr-xr-x 2 dpovey fax 38 Sep 2 17:40 storage 133 | -------------------------------------------------------------------------------- /utils/nnet/make_lstm_proto.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2015-2016 Brno University of Technology (author: Karel Vesely) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # Generated Nnet prototype, to be initialized by 'nnet-initialize'. 19 | 20 | import sys 21 | 22 | ### 23 | ### Parse options 24 | ### 25 | from optparse import OptionParser 26 | usage="%prog [options] >nnet-proto-file" 27 | parser = OptionParser(usage) 28 | # Required, 29 | parser.add_option('--cell-dim', dest='cell_dim', type='int', default=320, 30 | help='Number of cells for one direction in LSTM [default: %default]'); 31 | parser.add_option('--proj-dim', dest='proj_dim', type='int', default=400, 32 | help='Number of LSTM recurrent units [default: %default]'); 33 | parser.add_option('--num-layers', dest='num_layers', type='int', default=2, 34 | help='Number of LSTM layers [default: %default]'); 35 | # Optional (default == 'None'), 36 | parser.add_option('--lstm-param-range', dest='lstm_param_range', type='float', 37 | help='Range of initial LSTM parameters [default: %default]'); 38 | parser.add_option('--param-stddev', dest='param_stddev', type='float', 39 | help='Standard deviation for initial weights of Softmax layer [default: %default]'); 40 | parser.add_option('--cell-clip', dest='cell_clip', type='float', 41 | help='Clipping cell values during propagation (per-frame) [default: %default]'); 42 | parser.add_option('--diff-clip', dest='diff_clip', type='float', 43 | help='Clipping partial-derivatives during BPTT (per-frame) [default: %default]'); 44 | parser.add_option('--cell-diff-clip', dest='cell_diff_clip', type='float', 45 | help='Clipping partial-derivatives of "cells" during BPTT (per-frame, those accumulated by CEC) [default: %default]'); 46 | parser.add_option('--grad-clip', dest='grad_clip', type='float', 47 | help='Clipping the accumulated gradients (per-updates) [default: %default]'); 48 | # 49 | 50 | (o,args) = parser.parse_args() 51 | if len(args) != 2 : 52 | parser.print_help() 53 | sys.exit(1) 54 | 55 | (feat_dim, num_leaves) = map(int,args); 56 | 57 | # Original prototype from Jiayu, 58 | # 59 | # 40 40 60 | # 40 512 800 0.01 4 61 | # 512 8000 0.000000 0.000000 0.04 62 | # 8000 8000 63 | # 64 | 65 | lstm_extra_opts="" 66 | if None != o.lstm_param_range: lstm_extra_opts += " %f " % o.lstm_param_range 67 | if None != o.cell_clip: lstm_extra_opts += " %f " % o.cell_clip 68 | if None != o.diff_clip: lstm_extra_opts += " %f " % o.diff_clip 69 | if None != o.cell_diff_clip: lstm_extra_opts += " %f " % o.cell_diff_clip 70 | if None != o.grad_clip: lstm_extra_opts += " %f " % o.grad_clip 71 | 72 | softmax_affine_opts="" 73 | if None != o.param_stddev: softmax_affine_opts += " %f " % o.param_stddev 74 | 75 | # The LSTM layers, 76 | print " %d %d %s" % (feat_dim, o.proj_dim, o.cell_dim) + lstm_extra_opts 77 | for l in range(o.num_layers - 1): 78 | print " %d %d %s" % (o.proj_dim, o.proj_dim, o.cell_dim) + lstm_extra_opts 79 | 80 | # Adding for more stability, 81 | print " %d %d" % (o.proj_dim, o.proj_dim) 82 | 83 | # Softmax layer, 84 | print " %d %d 0.0 0.0" % (o.proj_dim, num_leaves) + softmax_affine_opts 85 | print " %d %d" % (num_leaves, num_leaves) 86 | 87 | -------------------------------------------------------------------------------- /utils/lang/internal/modify_unk_pron.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2016 Johns Hopkins University (Author: Daniel Povey) 4 | # Apache 2.0. 5 | 6 | from __future__ import print_function 7 | import sys 8 | import os 9 | import argparse 10 | from collections import defaultdict 11 | 12 | # note, this was originally based 13 | 14 | parser = argparse.ArgumentParser(description=""" 15 | This script replaces the existing pronunciation of the 16 | unknown word in the provided lexicon, with a pronunciation 17 | consisting of three disambiguation symbols: #1 followed by #2 18 | followed by #3. 19 | The #2 will later be replaced by a phone-level LM by 20 | apply_unk_lm.sh (called later on by prepare_lang.sh). 21 | Caution: this script is sensitive to the basename of the 22 | lexicon: it should be called either lexiconp.txt, in which 23 | case the format is 'word pron-prob p1 p2 p3 ...' 24 | or lexiconp_silprob.txt, in which case the format is 25 | 'word pron-prob sil-prob1 sil-prob2 sil-prob3 p1 p2 p3....'. 26 | It is an error if there is not exactly one pronunciation of 27 | the unknown word in the lexicon.""", 28 | epilog="""E.g.: modify_unk_pron.py data/local/lang/lexiconp.txt ''. 29 | This script is called from prepare_lang.sh.""") 30 | 31 | parser.add_argument('lexicon_file', type = str, 32 | help = 'Filename of the lexicon file to operate on (this is ' 33 | 'both an input and output of this script).') 34 | parser.add_argument('unk_word', type = str, 35 | help = "The printed form of the unknown/OOV word, normally ''.") 36 | 37 | args = parser.parse_args() 38 | 39 | if len(args.unk_word.split()) != 1: 40 | sys.exit("{0}: invalid unknown-word '{1}'".format( 41 | sys.argv[0], args.unk_word)) 42 | 43 | basename = os.path.basename(args.lexicon_file) 44 | if basename != 'lexiconp.txt' and basename != 'lexiconp_silprob.txt': 45 | sys.exit("{0}: expected the basename of the lexicon file to be either " 46 | "'lexiconp.txt' or 'lexiconp_silprob.txt', got: {1}".format( 47 | sys.argv[0], args.lexicon_file)) 48 | # the lexiconp.txt format is: word pron-prob p1 p2 p3... 49 | # lexiconp_silprob.txt has 3 extra real-valued fields after the pron-prob. 50 | num_fields_before_pron = 2 if basename == 'lexiconp.txt' else 5 51 | 52 | print(' '.join(sys.argv), file = sys.stderr) 53 | 54 | try: 55 | lexicon_in = open(args.lexicon_file, 'r') 56 | except: 57 | sys.exit("{0}: failed to open lexicon file {1}".format( 58 | sys.argv[0], args.lexicon_file)) 59 | 60 | split_lines = [] 61 | unk_index = -1 62 | while True: 63 | line = lexicon_in.readline() 64 | if line == '': 65 | break 66 | this_split_line = line.split() 67 | if this_split_line[0] == args.unk_word: 68 | if unk_index != -1: 69 | sys.exit("{0}: expected there to be exactly one pronunciation of the " 70 | "unknown word {1} in {2}, but there are more than one.".format( 71 | sys.argv[0], args.lexicon_file, args.unk_word)) 72 | unk_index = len(split_lines) 73 | if len(this_split_line) <= num_fields_before_pron: 74 | sys.exit("{0}: input file {1} had a bad line (too few fields): {2}".format( 75 | sys.argv[0], args.lexicon_file, line[:-1])) 76 | split_lines.append(this_split_line) 77 | 78 | if len(split_lines) == 0: 79 | sys.exit("{0}: read no data from lexicon file {1}.".format( 80 | sys.argv[0], args.lexicon_file)) 81 | 82 | 83 | if unk_index == -1: 84 | sys.exit("{0}: expected there to be exactly one pronunciation of the " 85 | "unknown word {1} in {2}, but there are none.".format( 86 | sys.argv[0], args.unk_word, args.lexicon_file)) 87 | 88 | lexicon_in.close() 89 | 90 | # now modify the pron. 91 | split_lines[unk_index] = split_lines[unk_index][0:num_fields_before_pron] + [ '#1', '#2', '#3' ] 92 | 93 | 94 | try: 95 | # write to the same file. 96 | lexicon_out = open(args.lexicon_file, 'w') 97 | except: 98 | sys.exit("{0}: failed to open lexicon file {1} for writing (permissions probleM?)".format( 99 | sys.argv[0], args.lexicon_file)) 100 | 101 | for split_line in split_lines: 102 | print(' '.join(split_line), file = lexicon_out) 103 | 104 | try: 105 | lexicon_out.close() 106 | except: 107 | sys.exit("{0}: failed to close lexicon file {1} after writing (disk full?)".format( 108 | sys.argv[0], args.lexicon_file)) 109 | -------------------------------------------------------------------------------- /utils/copy_data_dir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2013 Johns Hopkins University (author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # This script operates on a directory, such as in data/train/, 7 | # that contains some subset of the following files: 8 | # feats.scp 9 | # wav.scp 10 | # spk2utt 11 | # utt2spk 12 | # text 13 | # 14 | # It copies to another directory, possibly adding a specified prefix or a suffix 15 | # to the utterance and/or speaker names. Note, the recording-ids stay the same. 16 | # 17 | 18 | 19 | # begin configuration section 20 | spk_prefix= 21 | utt_prefix= 22 | spk_suffix= 23 | utt_suffix= 24 | validate_opts= # should rarely be needed. 25 | # end configuration section 26 | 27 | . utils/parse_options.sh 28 | 29 | if [ $# != 2 ]; then 30 | echo "Usage: " 31 | echo " $0 [options] " 32 | echo "e.g.:" 33 | echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1" 34 | echo "Options" 35 | echo " --spk-prefix= # Prefix for speaker ids, default empty" 36 | echo " --utt-prefix= # Prefix for utterance ids, default empty" 37 | echo " --spk-suffix= # Suffix for speaker ids, default empty" 38 | echo " --utt-suffix= # Suffix for utterance ids, default empty" 39 | exit 1; 40 | fi 41 | 42 | 43 | export LC_ALL=C 44 | 45 | srcdir=$1 46 | destdir=$2 47 | 48 | if [ ! -f $srcdir/utt2spk ]; then 49 | echo "copy_data_dir.sh: no such file $srcdir/utt2spk" 50 | exit 1; 51 | fi 52 | 53 | if [ "$destdir" == "$srcdir" ]; then 54 | echo "$0: this script requires and to be different." 55 | exit 1 56 | fi 57 | 58 | set -e; 59 | 60 | mkdir -p $destdir 61 | 62 | cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map 63 | cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map 64 | 65 | if [ ! -f $srcdir/utt2uniq ]; then 66 | if [[ ! -z $utt_prefix || ! -z $utt_suffix ]]; then 67 | cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq 68 | fi 69 | else 70 | cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq 71 | fi 72 | 73 | cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map | \ 74 | utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk 75 | 76 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt 77 | 78 | if [ -f $srcdir/feats.scp ]; then 79 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp 80 | fi 81 | 82 | 83 | if [ -f $srcdir/segments ]; then 84 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments 85 | cp $srcdir/wav.scp $destdir 86 | if [ -f $srcdir/reco2file_and_channel ]; then 87 | cp $srcdir/reco2file_and_channel $destdir/ 88 | fi 89 | else # no segments->wav indexed by utt. 90 | if [ -f $srcdir/wav.scp ]; then 91 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp 92 | fi 93 | fi 94 | 95 | if [ -f $srcdir/text ]; then 96 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text 97 | fi 98 | if [ -f $srcdir/utt2dur ]; then 99 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur 100 | fi 101 | if [ -f $srcdir/spk2gender ]; then 102 | utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender 103 | fi 104 | if [ -f $srcdir/cmvn.scp ]; then 105 | utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp 106 | fi 107 | for f in stm glm ctm; do 108 | if [ -f $srcdir/$f ]; then 109 | cp $srcdir/$f $destdir 110 | fi 111 | done 112 | 113 | rm $destdir/spk_map $destdir/utt_map 114 | 115 | echo "$0: copied data from $srcdir to $destdir" 116 | 117 | for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel stm glm ctm; do 118 | if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then 119 | echo "$0: file $f exists in dest $destdir but not in src $srcdir. Moving it to" 120 | echo " ... $destdir/.backup/$f" 121 | mkdir -p $destdir/.backup 122 | mv $destdir/$f $destdir/.backup/ 123 | fi 124 | done 125 | 126 | 127 | [ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats" 128 | [ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text" 129 | 130 | utils/validate_data_dir.sh $validate_opts $destdir 131 | -------------------------------------------------------------------------------- /utils/data/copy_data_dir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2013 Johns Hopkins University (author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # This script operates on a directory, such as in data/train/, 7 | # that contains some subset of the following files: 8 | # feats.scp 9 | # wav.scp 10 | # spk2utt 11 | # utt2spk 12 | # text 13 | # 14 | # It copies to another directory, possibly adding a specified prefix or a suffix 15 | # to the utterance and/or speaker names. Note, the recording-ids stay the same. 16 | # 17 | 18 | 19 | # begin configuration section 20 | spk_prefix= 21 | utt_prefix= 22 | spk_suffix= 23 | utt_suffix= 24 | validate_opts= # should rarely be needed. 25 | # end configuration section 26 | 27 | . utils/parse_options.sh 28 | 29 | if [ $# != 2 ]; then 30 | echo "Usage: " 31 | echo " $0 [options] " 32 | echo "e.g.:" 33 | echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1" 34 | echo "Options" 35 | echo " --spk-prefix= # Prefix for speaker ids, default empty" 36 | echo " --utt-prefix= # Prefix for utterance ids, default empty" 37 | echo " --spk-suffix= # Suffix for speaker ids, default empty" 38 | echo " --utt-suffix= # Suffix for utterance ids, default empty" 39 | exit 1; 40 | fi 41 | 42 | 43 | export LC_ALL=C 44 | 45 | srcdir=$1 46 | destdir=$2 47 | 48 | if [ ! -f $srcdir/utt2spk ]; then 49 | echo "copy_data_dir.sh: no such file $srcdir/utt2spk" 50 | exit 1; 51 | fi 52 | 53 | if [ "$destdir" == "$srcdir" ]; then 54 | echo "$0: this script requires and to be different." 55 | exit 1 56 | fi 57 | 58 | set -e; 59 | 60 | mkdir -p $destdir 61 | 62 | cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map 63 | cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map 64 | 65 | if [ ! -f $srcdir/utt2uniq ]; then 66 | if [[ ! -z $utt_prefix || ! -z $utt_suffix ]]; then 67 | cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq 68 | fi 69 | else 70 | cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq 71 | fi 72 | 73 | cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map | \ 74 | utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk 75 | 76 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt 77 | 78 | if [ -f $srcdir/feats.scp ]; then 79 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp 80 | fi 81 | 82 | 83 | if [ -f $srcdir/segments ]; then 84 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments 85 | cp $srcdir/wav.scp $destdir 86 | if [ -f $srcdir/reco2file_and_channel ]; then 87 | cp $srcdir/reco2file_and_channel $destdir/ 88 | fi 89 | else # no segments->wav indexed by utt. 90 | if [ -f $srcdir/wav.scp ]; then 91 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp 92 | fi 93 | fi 94 | 95 | if [ -f $srcdir/text ]; then 96 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text 97 | fi 98 | if [ -f $srcdir/utt2dur ]; then 99 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur 100 | fi 101 | if [ -f $srcdir/spk2gender ]; then 102 | utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender 103 | fi 104 | if [ -f $srcdir/cmvn.scp ]; then 105 | utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp 106 | fi 107 | for f in stm glm ctm; do 108 | if [ -f $srcdir/$f ]; then 109 | cp $srcdir/$f $destdir 110 | fi 111 | done 112 | 113 | rm $destdir/spk_map $destdir/utt_map 114 | 115 | echo "$0: copied data from $srcdir to $destdir" 116 | 117 | for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel stm glm ctm; do 118 | if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then 119 | echo "$0: file $f exists in dest $destdir but not in src $srcdir. Moving it to" 120 | echo " ... $destdir/.backup/$f" 121 | mkdir -p $destdir/.backup 122 | mv $destdir/$f $destdir/.backup/ 123 | fi 124 | done 125 | 126 | 127 | [ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats" 128 | [ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text" 129 | 130 | utils/validate_data_dir.sh $validate_opts $destdir 131 | -------------------------------------------------------------------------------- /utils/perturb_data_dir_speed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2013 Johns Hopkins University (author: Daniel Povey) 4 | # 2014 Tom Ko 5 | # Apache 2.0 6 | 7 | # This script operates on a directory, such as in data/train/, 8 | # that contains some subset of the following files: 9 | # wav.scp 10 | # spk2utt 11 | # utt2spk 12 | # text 13 | # 14 | # It generates the files which are used for perturbing the speed of the original data. 15 | 16 | . utils/parse_options.sh 17 | 18 | if [ $# != 3 ]; then 19 | echo "Usage: perturb_data_dir_speed.sh " 20 | echo "e.g.:" 21 | echo " $0 0.9 data/train_si284 data/train_si284p" 22 | exit 1 23 | fi 24 | 25 | export LC_ALL=C 26 | 27 | factor=$1 28 | srcdir=$2 29 | destdir=$3 30 | label="sp" 31 | spk_prefix=$label$factor"-" 32 | utt_prefix=$label$factor"-" 33 | 34 | #check is sox on the path 35 | which sox &>/dev/null 36 | ! [ $? -eq 0 ] && echo "sox: command not found" && exit 1; 37 | 38 | if [ ! -f $srcdir/utt2spk ]; then 39 | echo "$0: no such file $srcdir/utt2spk" 40 | exit 1; 41 | fi 42 | 43 | if [ "$destdir" == "$srcdir" ]; then 44 | echo "$0: this script requires and to be different." 45 | exit 1 46 | fi 47 | 48 | set -e; 49 | set -o pipefail 50 | 51 | mkdir -p $destdir 52 | 53 | cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/utt_map 54 | cat $srcdir/spk2utt | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/spk_map 55 | if [ ! -f $srcdir/utt2uniq ]; then 56 | cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s%s %s\n", p, $1, $1);}' > $destdir/utt2uniq 57 | else 58 | cat $srcdir/utt2uniq | awk -v p=$utt_prefix '{printf("%s%s %s\n", p, $1, $2);}' > $destdir/utt2uniq 59 | fi 60 | 61 | 62 | cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map | \ 63 | utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk 64 | 65 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt 66 | 67 | if [ -f $srcdir/segments ]; then 68 | # also apply the spk_prefix to the recording-ids. 69 | cat $srcdir/wav.scp | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/reco_map 70 | 71 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments | \ 72 | utils/apply_map.pl -f 2 $destdir/reco_map | \ 73 | awk -v factor=$factor \ 74 | '{printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);}' >$destdir/segments 75 | 76 | utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \ 77 | # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename" 78 | awk -v factor=$factor \ 79 | '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"} 80 | else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" } 81 | else {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp 82 | if [ -f $srcdir/reco2file_and_channel ]; then 83 | utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel 84 | fi 85 | 86 | rm $destdir/reco_map 2>/dev/null 87 | else # no segments->wav indexed by utterance. 88 | if [ -f $srcdir/wav.scp ]; then 89 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \ 90 | # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename" 91 | awk -v factor=$factor \ 92 | '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"} 93 | else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" } 94 | else {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp 95 | fi 96 | fi 97 | 98 | if [ -f $srcdir/text ]; then 99 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text 100 | fi 101 | if [ -f $srcdir/spk2gender ]; then 102 | utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender 103 | fi 104 | 105 | if [ ! -f $srcdir/utt2dur ]; then 106 | # generate utt2dur if it does not exist in srcdir 107 | utils/data/get_utt2dur.sh $srcdir 108 | fi 109 | 110 | cat $srcdir/utt2dur | utils/apply_map.pl -f 1 $destdir/utt_map | \ 111 | awk -v factor=$factor '{print $1, $2/factor;}' >$destdir/utt2dur 112 | 113 | rm $destdir/spk_map $destdir/utt_map 2>/dev/null 114 | echo "$0: generated speed-perturbed version of data in $srcdir, in $destdir" 115 | utils/validate_data_dir.sh --no-feats $destdir 116 | -------------------------------------------------------------------------------- /utils/data/perturb_data_dir_speed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2013 Johns Hopkins University (author: Daniel Povey) 4 | # 2014 Tom Ko 5 | # Apache 2.0 6 | 7 | # This script operates on a directory, such as in data/train/, 8 | # that contains some subset of the following files: 9 | # wav.scp 10 | # spk2utt 11 | # utt2spk 12 | # text 13 | # 14 | # It generates the files which are used for perturbing the speed of the original data. 15 | 16 | . utils/parse_options.sh 17 | 18 | if [ $# != 3 ]; then 19 | echo "Usage: perturb_data_dir_speed.sh " 20 | echo "e.g.:" 21 | echo " $0 0.9 data/train_si284 data/train_si284p" 22 | exit 1 23 | fi 24 | 25 | export LC_ALL=C 26 | 27 | factor=$1 28 | srcdir=$2 29 | destdir=$3 30 | label="sp" 31 | spk_prefix=$label$factor"-" 32 | utt_prefix=$label$factor"-" 33 | 34 | #check is sox on the path 35 | which sox &>/dev/null 36 | ! [ $? -eq 0 ] && echo "sox: command not found" && exit 1; 37 | 38 | if [ ! -f $srcdir/utt2spk ]; then 39 | echo "$0: no such file $srcdir/utt2spk" 40 | exit 1; 41 | fi 42 | 43 | if [ "$destdir" == "$srcdir" ]; then 44 | echo "$0: this script requires and to be different." 45 | exit 1 46 | fi 47 | 48 | set -e; 49 | set -o pipefail 50 | 51 | mkdir -p $destdir 52 | 53 | cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/utt_map 54 | cat $srcdir/spk2utt | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/spk_map 55 | if [ ! -f $srcdir/utt2uniq ]; then 56 | cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s%s %s\n", p, $1, $1);}' > $destdir/utt2uniq 57 | else 58 | cat $srcdir/utt2uniq | awk -v p=$utt_prefix '{printf("%s%s %s\n", p, $1, $2);}' > $destdir/utt2uniq 59 | fi 60 | 61 | 62 | cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map | \ 63 | utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk 64 | 65 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt 66 | 67 | if [ -f $srcdir/segments ]; then 68 | # also apply the spk_prefix to the recording-ids. 69 | cat $srcdir/wav.scp | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/reco_map 70 | 71 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments | \ 72 | utils/apply_map.pl -f 2 $destdir/reco_map | \ 73 | awk -v factor=$factor \ 74 | '{printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);}' >$destdir/segments 75 | 76 | utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \ 77 | # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename" 78 | awk -v factor=$factor \ 79 | '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"} 80 | else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" } 81 | else {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp 82 | if [ -f $srcdir/reco2file_and_channel ]; then 83 | utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel 84 | fi 85 | 86 | rm $destdir/reco_map 2>/dev/null 87 | else # no segments->wav indexed by utterance. 88 | if [ -f $srcdir/wav.scp ]; then 89 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \ 90 | # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename" 91 | awk -v factor=$factor \ 92 | '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"} 93 | else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" } 94 | else {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp 95 | fi 96 | fi 97 | 98 | if [ -f $srcdir/text ]; then 99 | utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text 100 | fi 101 | if [ -f $srcdir/spk2gender ]; then 102 | utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender 103 | fi 104 | 105 | if [ ! -f $srcdir/utt2dur ]; then 106 | # generate utt2dur if it does not exist in srcdir 107 | utils/data/get_utt2dur.sh $srcdir 108 | fi 109 | 110 | cat $srcdir/utt2dur | utils/apply_map.pl -f 1 $destdir/utt_map | \ 111 | awk -v factor=$factor '{print $1, $2/factor;}' >$destdir/utt2dur 112 | 113 | rm $destdir/spk_map $destdir/utt_map 2>/dev/null 114 | echo "$0: generated speed-perturbed version of data in $srcdir, in $destdir" 115 | utils/validate_data_dir.sh --no-feats $destdir 116 | -------------------------------------------------------------------------------- /utils/data/get_utt2dur.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2016 Johns Hopkins University (author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # This script operates on a data directory, such as in data/train/, and adds the 7 | # utt2dur file if it does not already exist. The file 'utt2dur' maps from 8 | # utterance to the duration of the utterance in seconds. This script works it 9 | # out from the 'segments' file, or, if not present, from the wav.scp file (it 10 | # first tries interrogating the headers, and if this fails, it reads the wave 11 | # files in entirely.) 12 | 13 | frame_shift=0.01 14 | 15 | . utils/parse_options.sh 16 | . ./path.sh 17 | 18 | if [ $# != 1 ]; then 19 | echo "Usage: $0 [options] " 20 | echo "e.g.:" 21 | echo " $0 data/train" 22 | echo " Options:" 23 | echo " --frame-shift # frame shift in seconds. Only relevant when we are" 24 | echo " # getting duration from feats.scp (default: 0.01). " 25 | exit 1 26 | fi 27 | 28 | export LC_ALL=C 29 | 30 | data=$1 31 | 32 | if [ -s $data/utt2dur ] && \ 33 | [ $(cat $data/utt2spk | wc -l) -eq $(cat $data/utt2dur | wc -l) ]; then 34 | echo "$0: $data/utt2dur already exists with the expected length. We won't recompute it." 35 | exit 0; 36 | fi 37 | 38 | if [ -f $data/segments ]; then 39 | echo "$0: working out $data/utt2dur from $data/segments" 40 | cat $data/segments | awk '{len=$4-$3; print $1, len;}' > $data/utt2dur 41 | elif [ -f $data/wav.scp ]; then 42 | echo "$0: segments file does not exist so getting durations from wave files" 43 | 44 | # if the wav.scp contains only lines of the form 45 | # utt1 /foo/bar/sph2pipe -f wav /baz/foo.sph | 46 | if cat $data/wav.scp | perl -e ' 47 | while (<>) { s/\|\s*$/ |/; # make sure final | is preceded by space. 48 | @A = split; if (!($#A == 5 && $A[1] =~ m/sph2pipe$/ && 49 | $A[2] eq "-f" && $A[3] eq "wav" && $A[5] eq "|")) { exit(1); } 50 | $utt = $A[0]; $sphere_file = $A[4]; 51 | 52 | if (!open(F, "<$sphere_file")) { die "Error opening sphere file $sphere_file"; } 53 | $sample_rate = -1; $sample_count = -1; 54 | for ($n = 0; $n <= 30; $n++) { 55 | $line = ; 56 | if ($line =~ m/sample_rate -i (\d+)/) { $sample_rate = $1; } 57 | if ($line =~ m/sample_count -i (\d+)/) { $sample_count = $1; } 58 | if ($line =~ m/end_head/) { break; } 59 | } 60 | close(F); 61 | if ($sample_rate == -1 || $sample_count == -1) { 62 | die "could not parse sphere header from $sphere_file"; 63 | } 64 | $duration = $sample_count * 1.0 / $sample_rate; 65 | print "$utt $duration\n"; 66 | } ' > $data/utt2dur; then 67 | echo "$0: successfully obtained utterance lengths from sphere-file headers" 68 | else 69 | echo "$0: could not get utterance lengths from sphere-file headers, using wav-to-duration" 70 | if ! command -v wav-to-duration >/dev/null; then 71 | echo "$0: wav-to-duration is not on your path" 72 | exit 1; 73 | fi 74 | 75 | read_entire_file=false 76 | if cat $data/wav.scp | grep -q 'sox.*speed'; then 77 | read_entire_file=true 78 | echo "$0: reading from the entire wav file to fix the problem caused by sox commands with speed perturbation. It is going to be slow." 79 | echo "... It is much faster if you call get_utt2dur.sh *before* doing the speed perturbation via e.g. perturb_data_dir_speed.sh or " 80 | echo "... perturb_data_dir_speed_3way.sh." 81 | fi 82 | 83 | if ! wav-to-duration --read-entire-file=$read_entire_file scp:$data/wav.scp ark,t:$data/utt2dur 2>&1 | grep -v 'nonzero return status'; then 84 | echo "$0: there was a problem getting the durations; moving $data/utt2dur to $data/.backup/" 85 | mkdir -p $data/.backup/ 86 | mv $data/utt2dur $data/.backup/ 87 | fi 88 | fi 89 | elif [ -f $data/feats.scp ]; then 90 | echo "$0: wave file does not exist so getting durations from feats files" 91 | feat-to-len scp:$data/feats.scp ark,t:- | awk -v frame_shift=$frame_shift '{print $1, $2*frame_shift;}' >$data/utt2dur 92 | else 93 | echo "$0: Expected $data/wav.scp, $data/segments or $data/feats.scp to exist" 94 | exit 1 95 | fi 96 | 97 | len1=$(cat $data/utt2spk | wc -l) 98 | len2=$(cat $data/utt2dur | wc -l) 99 | if [ "$len1" != "$len2" ]; then 100 | echo "$0: warning: length of utt2dur does not equal that of utt2spk, $len2 != $len1" 101 | if [ $len1 -gt $[$len2*2] ]; then 102 | echo "$0: less than half of utterances got a duration: failing." 103 | exit 1 104 | fi 105 | fi 106 | 107 | echo "$0: computed $data/utt2dur" 108 | 109 | exit 0 110 | -------------------------------------------------------------------------------- /utils/lang/make_phone_bigram_lang.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Apache 2.0. Copyright 2012, Johns Hopkins University (author: Daniel Povey) 4 | 5 | # This script creates a "lang" directory of the "testing" type (including G.fst) 6 | # given an existing "alignment" directory and an existing "lang" directory. 7 | # The directory contains only single-phone words, and a bigram language model that 8 | # is built without smoothing, on top of single phones. The point of no smoothing 9 | # is to limit the number of transitions, so we can decode reasonably fast, and the 10 | # graph won't blow up. This is probably going to be most useful for things like 11 | # language-id. 12 | 13 | 14 | # We might later have options here; if not, I'll emove this. 15 | 16 | echo "$0 $@" # Print the command line for logging 17 | 18 | [ -f ./path.sh ] && . ./path.sh; # source the path. 19 | . parse_options.sh || exit 1; 20 | 21 | 22 | if [ $# != 3 ]; then 23 | echo "Usage: $0: [options] " 24 | echo "e.g.: $0: data/lang exp/tri3b_ali data/lang_phone_bg" 25 | exit 1; 26 | fi 27 | 28 | lang=$1 29 | alidir=$2 30 | lang_out=$3 31 | 32 | for f in $lang/phones.txt $alidir/ali.1.gz; do 33 | [ ! -f $f ] && echo "Expected file $f to exist" && exit 1; 34 | done 35 | 36 | mkdir -p $lang_out || exit 1; 37 | 38 | grep -v '#' $lang/phones.txt > $lang_out/phones.txt # no disambig symbols 39 | # needed; G and L . G will be deterministic. 40 | cp $lang/topo $lang_out 41 | rm -r $lang_out/phones 2>/dev/null 42 | cp -r $lang/phones/ $lang_out/ 43 | rm $lang_out/phones/word_boundary.* 2>/dev/null # these would 44 | # no longer be valid. 45 | rm $lang_out/phones/wdisambig* 2>/dev/null # ditto this. 46 | 47 | # List of disambig symbols will be empty: not needed, since G.fst and L.fst * G.fst 48 | # are determinizable without any. 49 | echo -n > $lang_out/phones/disambig.txt 50 | echo -n > $lang_out/phones/disambig.int 51 | echo -n > $lang_out/phones/disambig.csl 52 | echo -n > $lang_out/phones/wdisambig.txt 53 | echo -n > $lang_out/phones/wdisambig_phones.int 54 | echo -n > $lang_out/phones/wdisambig_words.int 55 | 56 | # Let OOV symbol be the first phone. This is arbitrary, it's just 57 | # so that validate_lang.pl succeeds. We should never actually use 58 | # this. 59 | oov_sym=$(tail -n +2 $lang_out/phones.txt | head -n 1 | awk '{print $1}') 60 | oov_int=$(tail -n +2 $lang_out/phones.txt | head -n 1 | awk '{print $2}') 61 | echo $oov_sym > $lang_out/oov.txt 62 | echo $oov_int > $lang_out/oov.int 63 | 64 | 65 | # Get phone-level transcripts of training data and create a 66 | # language model. 67 | ali-to-phones $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz|" ark,t:- | \ 68 | perl -e 'while(<>) { 69 | @A = split(" ", $_); 70 | shift @A; # Remove the utterance-id. 71 | foreach $p ( @A ) { $phones{$p} = 1; } # assoc. array of phones. 72 | unshift @A, ""; 73 | push @A, ""; 74 | for ($n = 0; $n+1 < @A; $n++) { 75 | $p = $A[$n]; $q = $A[$n+1]; 76 | $count{$p,$q}++; 77 | $histcount{$p}++; 78 | } 79 | } 80 | @phones = keys %phones; 81 | unshift @phones, ""; 82 | # @phones is now all real phones, plus . 83 | for ($n = 0; $n < @phones; $n++) { 84 | $phn2state{$phones[$n]} = $n; 85 | } 86 | foreach $p (@phones) { 87 | $src = $phn2state{$p}; 88 | $hist = $histcount{$p}; 89 | $hist > 0 || die; 90 | foreach $q (@phones) { 91 | $c = $count{$p,$q}; 92 | if (defined $c) { 93 | $cost = -log($c / $hist); # cost on FST arc. 94 | $dest = $phn2state{$q}; 95 | print "$src $dest $q $cost\n"; # Note: q is actually numeric. 96 | } 97 | } 98 | $c = $count{$p,""}; 99 | if (defined $c) { 100 | $cost = -log($c / $hist); # cost on FST arc. 101 | print "$src $cost\n"; # final-prob. 102 | } 103 | } ' | fstcompile --acceptor=true | \ 104 | fstarcsort --sort_type=ilabel > $lang_out/G.fst 105 | 106 | # symbols for phones and words are the same. 107 | # Neither has disambig symbols. 108 | cp $lang_out/phones.txt $lang_out/words.txt 109 | 110 | grep -v '' $lang_out/phones.txt | awk '{printf("0 0 %s %s\n", $2, $2);} END{print("0 0.0");}' | \ 111 | fstcompile > $lang_out/L.fst 112 | 113 | # note: first two fields of align_lexicon.txt are interpreted as the word; the remaining 114 | # fields are the phones that are in the pron of the word. These are all the same, for us. 115 | for p in $(grep -v '' $lang_out/phones.txt | awk '{print $1}'); do echo $p $p $p; done > $lang_out/phones/align_lexicon.txt 116 | 117 | # just use one sym2int.pl command, since phones.txt and words.txt are identical. 118 | utils/sym2int.pl $lang_out/phones.txt <$lang_out/phones/align_lexicon.txt >$lang_out/phones/align_lexicon.int 119 | 120 | # L and L_disambig are the same. 121 | cp $lang_out/L.fst $lang_out/L_disambig.fst 122 | 123 | utils/validate_lang.pl --skip-disambig-check $lang_out || exit 1; 124 | -------------------------------------------------------------------------------- /utils/data/internal/modify_speaker_info.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | import argparse, sys,os 5 | from collections import defaultdict 6 | parser = argparse.ArgumentParser(description=""" 7 | Combine consecutive utterances into fake speaker ids for a kind of 8 | poor man's segmentation. Reads old utt2spk from standard input, 9 | outputs new utt2spk to standard output.""") 10 | parser.add_argument("--utts-per-spk-max", type = int, required = True, 11 | help="Maximum number of utterances allowed per speaker") 12 | parser.add_argument("--seconds-per-spk-max", type = float, required = True, 13 | help="""Maximum duration in seconds allowed per speaker. 14 | If this option is >0, --utt2dur option must be provided.""") 15 | parser.add_argument("--utt2dur", type = str, 16 | help="""Filename of input 'utt2dur' file (needed only if 17 | --seconds-per-spk-max is provided)""") 18 | parser.add_argument("--respect-speaker-info", type = str, default = 'true', 19 | choices = ['true', 'false'], 20 | help="""If true, the output speakers will be split from " 21 | "existing speakers.""") 22 | 23 | args = parser.parse_args() 24 | 25 | utt2spk = dict() 26 | # an undefined spk2utt entry will default to an empty list. 27 | spk2utt = defaultdict(lambda: []) 28 | 29 | while True: 30 | line = sys.stdin.readline() 31 | if line == '': 32 | break; 33 | a = line.split() 34 | if len(a) != 2: 35 | sys.exit("modify_speaker_info.py: bad utt2spk line from standard input (expected two fields): " + 36 | line) 37 | [ utt, spk ] = a 38 | utt2spk[utt] = spk 39 | spk2utt[spk].append(utt) 40 | 41 | if args.seconds_per_spk_max > 0: 42 | utt2dur = dict() 43 | try: 44 | f = open(args.utt2dur) 45 | while True: 46 | line = f.readline() 47 | if line == '': 48 | break 49 | a = line.split() 50 | if len(a) != 2: 51 | sys.exit("modify_speaker_info.py: bad utt2dur line from standard input (expected two fields): " + 52 | line) 53 | [ utt, dur ] = a 54 | utt2dur[utt] = float(dur) 55 | for utt in utt2spk: 56 | if not utt in utt2dur: 57 | sys.exit("modify_speaker_info.py: utterance {0} not in utt2dur file {1}".format( 58 | utt, args.utt2dur)) 59 | except Exception as e: 60 | sys.exit("modify_speaker_info.py: problem reading utt2dur info: " + str(e)) 61 | 62 | # splits a list of utts into a list of lists, based on constraints from the 63 | # command line args. Note: the last list will tend to be shorter than the others, 64 | # we make no attempt to fix this. 65 | def SplitIntoGroups(uttlist): 66 | ans = [] # list of lists. 67 | cur_uttlist = [] 68 | cur_dur = 0.0 69 | for utt in uttlist: 70 | if ((args.utts_per_spk_max > 0 and len(cur_uttlist) == args.utts_per_spk_max) or 71 | (args.seconds_per_spk_max > 0 and len(cur_uttlist) > 0 and 72 | cur_dur + utt2dur[utt] > args.seconds_per_spk_max)): 73 | ans.append(cur_uttlist) 74 | cur_uttlist = [] 75 | cur_dur = 0.0 76 | cur_uttlist.append(utt) 77 | if args.seconds_per_spk_max > 0: 78 | cur_dur += utt2dur[utt] 79 | if len(cur_uttlist) > 0: 80 | ans.append(cur_uttlist) 81 | return ans 82 | 83 | 84 | # This function will return '%01d' if d < 10, '%02d' if d < 100, and so on. 85 | # It's for printf printing of numbers in such a way that sorted order will be 86 | # correct. 87 | def GetFormatString(d): 88 | ans = 1 89 | while (d >= 10): 90 | d //= 10 # integer division 91 | ans += 1 92 | # e.g. we might return the string '%01d' or '%02d' 93 | return '%0{0}d'.format(ans) 94 | 95 | 96 | if args.respect_speaker_info == 'true': 97 | for spk in sorted(spk2utt.keys()): 98 | uttlists = SplitIntoGroups(spk2utt[spk]) 99 | format_string = '%s-' + GetFormatString(len(uttlists)) 100 | for i in range(len(uttlists)): 101 | # the following might look like: '%s-%02d'.format('john_smith' 9 + 1), 102 | # giving 'john_smith-10'. 103 | this_spk = format_string % (spk, i + 1) 104 | for utt in uttlists[i]: 105 | print(utt, this_spk) 106 | else: 107 | uttlists = SplitIntoGroups(sorted(utt2spk.keys())) 108 | format_string = 'speaker-' + GetFormatString(len(uttlists)) 109 | for i in range(len(uttlists)): 110 | # the following might look like: 'speaker-%04d'.format(105 + 1), 111 | # giving 'speaker-0106'. 112 | this_spk = format_string % (i + 1) 113 | for utt in uttlists[i]: 114 | print(utt, this_spk) 115 | 116 | -------------------------------------------------------------------------------- /utils/nnet/make_blstm_proto.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2015-2016 Brno University of Technology (author: Karel Vesely) 4 | 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 14 | # MERCHANTABLITY OR NON-INFRINGEMENT. 15 | # See the Apache 2 License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # Generated Nnet prototype, to be initialized by 'nnet-initialize'. 19 | 20 | import sys 21 | 22 | ### 23 | ### Parse options 24 | ### 25 | from optparse import OptionParser 26 | usage="%prog [options] >nnet-proto-file" 27 | parser = OptionParser(usage) 28 | # Required, 29 | parser.add_option('--cell-dim', dest='cell_dim', type='int', default=320, 30 | help='Number of cells for one direction in BLSTM [default: %default]'); 31 | parser.add_option('--proj-dim', dest='proj_dim', type='int', default=200, 32 | help='Dim reduction for one direction in BLSTM [default: %default]'); 33 | parser.add_option('--proj-dim-last', dest='proj_dim_last', type='int', default=320, 34 | help='Dim reduction for one direction in BLSTM (last BLSTM component) [default: %default]'); 35 | parser.add_option('--num-layers', dest='num_layers', type='int', default=2, 36 | help='Number of BLSTM layers [default: %default]'); 37 | # Optional (default == 'None'), 38 | parser.add_option('--lstm-param-range', dest='lstm_param_range', type='float', 39 | help='Range of initial BLSTM parameters [default: %default]'); 40 | parser.add_option('--param-stddev', dest='param_stddev', type='float', 41 | help='Standard deviation for initial weights of Softmax layer [default: %default]'); 42 | parser.add_option('--cell-clip', dest='cell_clip', type='float', 43 | help='Clipping cell values during propagation (per-frame) [default: %default]'); 44 | parser.add_option('--diff-clip', dest='diff_clip', type='float', 45 | help='Clipping partial-derivatives during BPTT (per-frame) [default: %default]'); 46 | parser.add_option('--cell-diff-clip', dest='cell_diff_clip', type='float', 47 | help='Clipping partial-derivatives of "cells" during BPTT (per-frame, those accumulated by CEC) [default: %default]'); 48 | parser.add_option('--grad-clip', dest='grad_clip', type='float', 49 | help='Clipping the accumulated gradients (per-updates) [default: %default]'); 50 | # 51 | 52 | (o,args) = parser.parse_args() 53 | if len(args) != 2 : 54 | parser.print_help() 55 | sys.exit(1) 56 | 57 | (feat_dim, num_leaves) = map(int,args); 58 | 59 | # Original prototype from Jiayu, 60 | # 61 | # 40 40 62 | # 40 512 800 0.01 4 63 | # 512 8000 0.000000 0.000000 0.04 64 | # 8000 8000 65 | # 66 | 67 | lstm_extra_opts="" 68 | if None != o.lstm_param_range: lstm_extra_opts += " %f " % o.lstm_param_range 69 | if None != o.cell_clip: lstm_extra_opts += " %f " % o.cell_clip 70 | if None != o.diff_clip: lstm_extra_opts += " %f " % o.diff_clip 71 | if None != o.cell_diff_clip: lstm_extra_opts += " %f " % o.cell_diff_clip 72 | if None != o.grad_clip: lstm_extra_opts += " %f " % o.grad_clip 73 | 74 | softmax_affine_opts="" 75 | if None != o.param_stddev: softmax_affine_opts += " %f " % o.param_stddev 76 | 77 | # The BLSTM layers, 78 | if o.num_layers == 1: 79 | # Single BLSTM, 80 | print " %d %d %s" % (feat_dim, 2*o.proj_dim_last, o.cell_dim) + lstm_extra_opts 81 | else: 82 | # >1 BLSTM, 83 | print " %d %d %s" % (feat_dim, 2*o.proj_dim, o.cell_dim) + lstm_extra_opts 84 | for l in range(o.num_layers - 2): 85 | print " %d %d %s" % (2*o.proj_dim, 2*o.proj_dim, o.cell_dim) + lstm_extra_opts 86 | print " %d %d %s" % (2*o.proj_dim, 2*o.proj_dim_last, o.cell_dim) + lstm_extra_opts 87 | 88 | # Adding for more stability, 89 | print " %d %d" % (2*o.proj_dim_last, 2*o.proj_dim_last) 90 | 91 | # Softmax layer, 92 | print " %d %d 0.0 0.0" % (2*o.proj_dim_last, num_leaves) + softmax_affine_opts 93 | print " %d %d" % (num_leaves, num_leaves) 94 | 95 | -------------------------------------------------------------------------------- /utils/data/modify_speaker_info.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2013-2016 Johns Hopkins University (author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | # This script copies a data directory (like utils/copy_data.sh) while 7 | # modifying (splitting or merging) the speaker information in that data directory. 8 | # 9 | # This is done without looking at the data at all; we use only duration 10 | # constraints and maximum-num-utts-per-speaker to assign contiguous 11 | # sets of utterances to speakers. 12 | # 13 | # This has two general uses: 14 | # (1) when dumping iVectors for training purposes, it's helpful to have 15 | # a good variety of iVectors, and this can be accomplished by splitting 16 | # speakers up into multiple copies of those speakers. We typically 17 | # use the --utts-per-spk-max 2 option for this. 18 | # (2) when dealing with data that is not diarized, and given that we 19 | # haven't checked any diarization scripts into Kaldi yet, this 20 | # script can do a "dumb" diarization that just groups consecutive 21 | # utterances into groups based on length constraints. 22 | # There are two cases here: 23 | 24 | # a) With --respect-speaker-info true (the default), 25 | # it only splits within existing speakers. 26 | # This is suitable when you have existing speaker 27 | # info that's meaningful in some way, e.g. represents 28 | # individual recordings. 29 | # b) With --respect-speaker-info false, 30 | # it completely ignores the existing speaker information 31 | # and constructs new speaker identities based on 32 | # utterance names. This is suitable in scenarios when 33 | # you have a one-to-one map between speakers and 34 | # utterances. 35 | 36 | # begin configuration section 37 | utts_per_spk_max=-1 38 | seconds_per_spk_max=-1 39 | respect_speaker_info=true 40 | # end configuration section 41 | 42 | . utils/parse_options.sh 43 | 44 | if [ $# != 2 ]; then 45 | echo "Usage: " 46 | echo " $0 [options] " 47 | echo "e.g.:" 48 | echo " $0 --utts-per-spk-max 2 data/train data/train-max2" 49 | echo "Options" 50 | echo " --utts-per-spk-max # number of utterances per speaker maximum," 51 | echo " # default -1 (meaning no maximum). E.g. 2." 52 | echo " --seconds-per-spk-max # number of seconds per speaker maximum," 53 | echo " # default -1 (meaning no maximum). E.g. 60." 54 | echo " --respect-speaker-info # If true, respect the" 55 | echo " # existing speaker map (i.e. do not" 56 | echo " # assign utterances from different" 57 | echo " # speakers to the same generated speaker)." 58 | echo " # Default: true." 59 | echo "Note: one or both of the --utts-per-spk-max or --seconds-per-spk-max" 60 | echo "options is required." 61 | exit 1; 62 | fi 63 | 64 | export LC_ALL=C 65 | 66 | srcdir=$1 67 | destdir=$2 68 | 69 | if [ "$destdir" == "$srcdir" ]; then 70 | echo "$0: must be different from ." 71 | exit 1 72 | fi 73 | 74 | if [ "$seconds_per_spk_max" == "-1" ] && ! [ "$utts_per_spk_max" -gt 0 ]; then 75 | echo "$0: one or both of the --utts-per-spk-max or --seconds-per-spk-max options must be provided." 76 | fi 77 | 78 | if [ ! -f $srcdir/utt2spk ]; then 79 | echo "$0: no such file $srcdir/utt2spk" 80 | exit 1; 81 | fi 82 | 83 | set -e; 84 | set -o pipefail 85 | 86 | mkdir -p $destdir 87 | 88 | if [ "$seconds_per_spk_max" != -1 ]; then 89 | # we need the utt2dur file. 90 | utils/data/get_utt2dur.sh $srcdir 91 | utt2dur_opt="--utt2dur=$srcdir/utt2dur" 92 | else 93 | utt2dur_opt= 94 | fi 95 | 96 | utils/data/internal/modify_speaker_info.py \ 97 | $utt2dur_opt --respect-speaker-info=$respect_speaker_info \ 98 | --utts-per-spk-max=$utts_per_spk_max --seconds-per-spk-max=$seconds_per_spk_max \ 99 | <$srcdir/utt2spk >$destdir/utt2spk 100 | 101 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt 102 | 103 | # This script won't create the new cmvn.scp, it should be recomputed. 104 | if [ -f $destdir/cmvn.scp ]; then 105 | mkdir -p $destdir/.backup 106 | mv $destdir/cmvn.scp $destdir/.backup 107 | echo "$0: moving $destdir/cmvn.scp to $destdir/.backup/cmvn.scp" 108 | fi 109 | 110 | # these things won't be affected by the change of speaker mapping. 111 | for f in feats.scp segments wav.scp reco2file_and_channel text stm glm ctm; do 112 | [ -f $srcdir/$f ] && cp $srcdir/$f $destdir/ 113 | done 114 | 115 | 116 | orig_num_spk=$(wc -l <$srcdir/spk2utt) 117 | new_num_spk=$(wc -l <$destdir/spk2utt) 118 | 119 | echo "$0: copied data from $srcdir to $destdir, number of speakers changed from $orig_num_spk to $new_num_spk" 120 | opts= 121 | [ ! -f $srcdir/feats.scp ] && opts="--no-feats" 122 | [ ! -f $srcdir/text ] && opts="$opts --no-text" 123 | [ ! -f $srcdir/wav.scp ] && opts="$opts --no-wav" 124 | 125 | utils/validate_data_dir.sh $opts $destdir 126 | -------------------------------------------------------------------------------- /utils/data/extend_segment_times.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | import sys 5 | import argparse 6 | from collections import defaultdict 7 | 8 | 9 | parser = argparse.ArgumentParser(description=""" 10 | Usage: extend_segment_times.py [options] output-segments 11 | This program pads the times in a 'segments' file (e.g. data/train/segments) 12 | with specified left and right context (for cases where there was no 13 | silence padding in the original segments file)""") 14 | 15 | parser.add_argument("--start-padding", type = float, default = 0.1, 16 | help="Amount of padding, in seconds, for the start time of " 17 | "each segment (start times <0 will be set to zero).") 18 | parser.add_argument("--end-padding", type = float, default = 0.1, 19 | help="Amount of padding, in seconds, for the end time of " 20 | "each segment.") 21 | parser.add_argument("--last-segment-end-padding", type = float, default = 0.1, 22 | help="Amount of padding, in seconds, for the end time of " 23 | "the last segment of each file (maximum allowed).") 24 | parser.add_argument("--fix-overlapping-segments", type = str, 25 | default = 'true', choices=['true', 'false'], 26 | help="If true, prevent segments from overlapping as a result " 27 | "of the padding (or that were already overlapping)") 28 | args = parser.parse_args() 29 | 30 | 31 | # the input file will be a sequence of lines which are each of the form: 32 | # 33 | # e.g. 34 | # utt-1 recording-1 0.62 5.40 35 | # The output will be in the same format and in the same 36 | # order, except wiht modified times. 37 | 38 | # This variable maps from a recording-id to a listof the utterance 39 | # indexes (as integer indexes into 'entries'] 40 | # that are part of that recording. 41 | recording_to_utt_indexes = defaultdict(list) 42 | 43 | # This is an array of the entries in the segments file, in the fomrat: 44 | # (utterance-id as astring, recording-id as string, 45 | # start-time as float, end-time as float) 46 | entries = [] 47 | 48 | 49 | while True: 50 | line = sys.stdin.readline() 51 | if line == '': 52 | break 53 | try: 54 | [ utt_id, recording_id, start_time, end_time ] = line.split() 55 | start_time = float(start_time) 56 | end_time = float(end_time) 57 | except: 58 | sys.exit("extend_segment_times.py: could not interpret line: " + line) 59 | if not end_time > start_time: 60 | print("extend_segment_times.py: bad segment (ignoring): " + line, 61 | file = sys.stderr) 62 | recording_to_utt_indexes[recording_id].append(len(entries)) 63 | entries.append([utt_id, recording_id, start_time, end_time]) 64 | 65 | num_times_fixed = 0 66 | 67 | for recording, utt_indexes in recording_to_utt_indexes.items(): 68 | # this_entries is a list of lists, sorted on mid-time. 69 | # Notice: because lists are objects, when we change 'this_entries' 70 | # we change the underlying entries. 71 | this_entries = sorted([ entries[x] for x in utt_indexes ], 72 | key = lambda x : 0.5 * (x[2] + x[3])) 73 | min_time = 0 74 | max_time = max([ x[3] for x in this_entries ]) + args.last_segment_end_padding 75 | start_padding = args.start_padding 76 | end_padding = args.end_padding 77 | for n in range(len(this_entries)): 78 | this_entries[n][2] = max(min_time, this_entries[n][2] - start_padding) 79 | this_entries[n][3] = min(max_time, this_entries[n][3] + end_padding) 80 | 81 | for n in range(len(this_entries) - 1): 82 | this_end_time = this_entries[n][3] 83 | next_start_time = this_entries[n+1][2] 84 | if this_end_time > next_start_time and args.fix_overlapping_segments == 'true': 85 | midpoint = 0.5 * (this_end_time + next_start_time) 86 | this_entries[n][3] = midpoint 87 | this_entries[n+1][2] = midpoint 88 | num_times_fixed += 1 89 | 90 | 91 | # this prints a number with a certain number of digits after 92 | # the point, while removing trailing zeros. 93 | def FloatToString(f): 94 | num_digits = 6 # we want to print 6 digits after the zero 95 | g = f 96 | while abs(g) > 1.0: 97 | g *= 0.1 98 | num_digits += 1 99 | format_str = '%.{0}g'.format(num_digits) 100 | return format_str % f 101 | 102 | for entry in entries: 103 | [ utt_id, recording_id, start_time, end_time ] = entry 104 | if not start_time < end_time: 105 | print("extend_segment_times.py: bad segment after processing (ignoring): " + 106 | ' '.join(entry), file = sys.stderr) 107 | continue 108 | print(utt_id, recording_id, FloatToString(start_time), FloatToString(end_time)) 109 | 110 | 111 | print("extend_segment_times.py: extended {0} segments; fixed {1} " 112 | "overlapping segments".format(len(entries), num_times_fixed), 113 | file = sys.stderr) 114 | 115 | ## test: 116 | # (echo utt1 reco1 0.2 6.2; echo utt2 reco1 6.3 9.8 )| extend_segment_times.py 117 | # and also try the above with the options --last-segment-end-padding=0.0 --fix-overlapping-segments=false 118 | 119 | -------------------------------------------------------------------------------- /utils/data/normalize_data_range.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # This script is intended to read and write scp files possibly containing indexes for 4 | # sub-ranges of features, like 5 | # foo-123 bar.ark:431423[78:89] 6 | # meaning rows 78 through 89 of the matrix located at bar.ark:431423. 7 | # 8 | # Its purpose is to normalize lines which have ranges on top of ranges, like 9 | # 10 | # foo-123 bar.ark:431423[78:89][3:4] 11 | # 12 | # This program interprets the later [] expression as a sub-range of the matrix returned by the first [] 13 | # expression; in this case, we'd get 14 | # 15 | # foo-123 bar.ark:431423[81:82] 16 | # 17 | # Note that these ranges are based on zero-indexing, and have a 'first:last' 18 | # interpretation, so the range [0:0] is a matrix with one row. And also note 19 | # that column ranges are permitted, after row ranges, and the row range may be 20 | # empty, e.g. 21 | 22 | # foo-123 bar.ark:431423[81:82,0:13] 23 | # or 24 | # foo-123 bar.ark:431423[81:82,0:13] 25 | # 26 | 27 | # This program reads from the standard input (or command-line file or files), 28 | # and writes to the standard output. 29 | 30 | 31 | # This function combines ranges, either row or column ranges. start1 and end1 32 | # are the first range, and start2 and end2 are interpreted as a sub-range of the 33 | # first range. It is acceptable for either start1 and end1, or start2 and end2, to 34 | # be empty. 35 | # This function returns the start and end of the range, as an array. 36 | sub combine_ranges { 37 | ($row_or_column, $start1, $end1, $start2, $end2) = @_; 38 | 39 | if ($start1 eq "" && $end1 eq "") { 40 | return ($start2, $end2); 41 | } elsif ($start2 eq "" && $end2 eq "") { 42 | return ($start1, $end1); 43 | } else { 44 | # For now this script doesn't support the case of ranges like [20:], even 45 | # though they are supported at the C++ level. 46 | if ($start1 eq "" || $start2 eq "" || $end1 eq "" || $end2 == "") { 47 | chop $line; 48 | print("normalize_data_range.pl: could not make sense of line $line\n"); 49 | exit(1) 50 | } 51 | if ($start1 + $end2 > $end1) { 52 | chop $line; 53 | print("normalize_data_range.pl: could not make sense of line $line " . 54 | "[second $row_or_column range too large vs first range, $start1 + $end2 > $end1]\n"); 55 | exit(1); 56 | } 57 | return ($start2+$start1, $end2+$start1); 58 | } 59 | } 60 | 61 | 62 | while (<>) { 63 | $line = $_; 64 | # we only need to do something if we detect two of these ranges. 65 | # The following regexp matches strings of the form ...[foo][bar] 66 | # where foo and bar have no square brackets in them. 67 | if (m/\[([^][]*)\]\[([^][]*)\]\s*$/) { 68 | $before_range = $`; 69 | $first_range = $1; # e.g. '0:500,20:21', or '0:500', or ',0:13'. 70 | $second_range = $2; # has same general format as first_range. 71 | if ($_ =~ m/concat-feats /) { 72 | # sometimes in scp files, we use the command concat-feats to splice together 73 | # two feature matrices. Handling this correctly is complicated and we don't 74 | # anticipate needing it, so we just refuse to process this type of data. 75 | print "normalize_data_range.pl: this script cannot [yet] normalize the data ranges " . 76 | "if concat-feats was in the input data\n"; 77 | exit(1); 78 | } 79 | print STDERR "matched: $before_range $first_range $second_range\n"; 80 | if ($first_range !~ m/^((\d*):(\d*)|)(,(\d*):(\d*)|)$/) { 81 | print STDERR "normalize_data_range.pl: could not make sense of input line $_"; 82 | exit(1); 83 | } 84 | $row_start1 = $2; 85 | $row_end1 = $3; 86 | $col_start1 = $5; 87 | $col_end1 = $6; 88 | 89 | if ($second_range !~ m/^((\d*):(\d*)|)(,(\d*):(\d*)|)$/) { 90 | print STDERR "normalize_data_range.pl: could not make sense of input line $_"; 91 | exit(1); 92 | } 93 | $row_start2 = $2; 94 | $row_end2 = $3; 95 | $col_start2 = $5; 96 | $col_end2 = $6; 97 | 98 | ($row_start, $row_end) = combine_ranges("row", $row_start1, $row_end1, $row_start2, $row_end2); 99 | ($col_start, $col_end) = combine_ranges("column", $col_start1, $col_end1, $col_start2, $col_end2); 100 | 101 | 102 | if ($row_start ne "") { 103 | $range = "$row_start:$row_end"; 104 | } else { 105 | $range = ""; 106 | } 107 | if ($col_start ne "") { 108 | $range .= ",$col_start:$col_end"; 109 | } 110 | print $before_range . "[" . $range . "]\n"; 111 | } else { 112 | print; 113 | } 114 | } 115 | 116 | __END__ 117 | 118 | # Testing 119 | # echo foo | utils/data/normalize_data_range.pl -> foo 120 | # echo 'foo[bar:baz]' | utils/data/normalize_data_range.pl -> foo[bar:baz] 121 | # echo 'foo[bar:baz][bin:bang]' | utils/data/normalize_data_range.pl -> normalize_data_range.pl: could not make sense of input line foo[bar:baz][bin:bang] 122 | # echo 'foo[10:20][0:5]' | utils/data/normalize_data_range.pl -> foo[10:15] 123 | # echo 'foo[,10:20][,0:5]' | utils/data/normalize_data_range.pl -> foo[,10:15] 124 | # echo 'foo[,0:100][1:15]' | utils/data/normalize_data_range.pl -> foo[1:15,0:100] 125 | # echo 'foo[1:15][,0:100]' | utils/data/normalize_data_range.pl -> foo[1:15,0:100] 126 | # echo 'foo[10:20][0:11]' | utils/data/normalize_data_range.pl -> normalize_data_range.pl: could not make sense of line foo[10:20][0:11] [second row range too large vs first range, 10 + 11 > 20] 127 | # echo 'foo[,10:20][,0:11]' | utils/data/normalize_data_range.pl -> normalize_data_range.pl: could not make sense of line foo[,10:20][,0:11] [second column range too large vs first range, 10 + 11 > 20] 128 | --------------------------------------------------------------------------------