├── model
    ├── __init__.py
    └── model_utils.py
├── experiments
    └── .gitkeep
├── requirements.txt
├── paper.pdf
├── data
    ├── download.sh
    ├── data_prepare.sh
    ├── label.py
    └── zip.py
├── baseline.sh
├── utils
    ├── filt.py
    ├── data
    │   ├── get_num_frames.sh
    │   ├── get_segments_for_data.sh
    │   ├── perturb_data_dir_speed_3way.sh
    │   ├── get_frame_shift.sh
    │   ├── remove_dup_utts.sh
    │   ├── perturb_data_dir_volume.sh
    │   ├── combine_data.sh
    │   ├── copy_data_dir.sh
    │   ├── perturb_data_dir_speed.sh
    │   ├── get_utt2dur.sh
    │   ├── internal
    │   │   └── modify_speaker_info.py
    │   ├── modify_speaker_info.sh
    │   ├── extend_segment_times.py
    │   └── normalize_data_range.pl
    ├── fix_ctm.sh
    ├── spk2utt_to_utt2spk.pl
    ├── s2eps.pl
    ├── eps2disambig.pl
    ├── build_const_arpa_lm.sh
    ├── summarize_warnings.pl
    ├── utt2spk_to_spk2utt.pl
    ├── shuffle_list.pl
    ├── analyze_segments.pl
    ├── show_lattice.sh
    ├── best_wer.sh
    ├── remove_oovs.pl
    ├── add_disambig.pl
    ├── remove_data_links.sh
    ├── nnet
    │   ├── gen_hamm_mat.py
    │   ├── gen_splice.py
    │   ├── gen_dct_mat.py
    │   ├── make_lstm_proto.py
    │   └── make_blstm_proto.py
    ├── ln.pl
    ├── make_unigram_grammar.pl
    ├── int2sym.pl
    ├── scoring
    │   └── wer_report.pl
    ├── find_arpa_oovs.pl
    ├── prepare_online_nnet_dist_build.sh
    ├── format_lm.sh
    ├── convert_slf_parallel.sh
    ├── lang
    │   ├── check_phones_compatible.sh
    │   ├── validate_disambig_sym_file.pl
    │   ├── check_g_properties.pl
    │   ├── internal
    │   │   ├── apply_unk_lm.sh
    │   │   └── modify_unk_pron.py
    │   └── make_phone_bigram_lang.sh
    ├── create_split_dir.pl
    ├── apply_map.pl
    ├── filter_scp.pl
    ├── gen_topo.pl
    ├── subset_scp.pl
    ├── convert_ctm.pl
    ├── summarize_logs.pl
    ├── rnnlm_compute_scores.sh
    ├── sym2int.pl
    ├── format_lm_sri.sh
    ├── parse_options.sh
    ├── map_arpa_lm.pl
    ├── pinyin_map.pl
    ├── subset_data_dir_tr_cv.sh
    ├── combine_data.sh
    ├── create_data_link.pl
    ├── copy_data_dir.sh
    └── perturb_data_dir_speed.sh
├── train.sh
├── eval.sh
├── path.sh
├── local
    ├── cosine_scoring.sh
    ├── plda_scoring.sh
    ├── pca_plda_scoring.sh
    └── lda_plda_scoring.sh
├── baseline.py
├── .gitignore
├── eer.sh
├── README.md
└── main.py


/model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow-gpu==1.8
2 | numpy
3 | scipy
4 | 


--------------------------------------------------------------------------------
/paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSLT-THU/IS2019-VAE/HEAD/paper.pdf


--------------------------------------------------------------------------------
/data/download.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Author: Yang Zhang
4 | # Mail: zyziszy@foxmail.com
5 | # Apache 2.0.
6 | # 2019, CSLT
7 | 
8 | 


--------------------------------------------------------------------------------
/baseline.sh:
--------------------------------------------------------------------------------
1 | # this code is used to
2 | # get xvector.ark from xvector.npz and
3 | # calculate baseline EER
4 | 
5 | python -u baseline.py
6 | wait;
7 | 
8 | sh eer.sh


--------------------------------------------------------------------------------
/utils/filt.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Apache 2.0
 4 | 
 5 | import sys
 6 | 
 7 | vocab=set()
 8 | with open(sys.argv[1]) as vocabfile:
 9 |     for line in vocabfile:
10 |         vocab.add(line.strip())
11 | 
12 | with open(sys.argv[2]) as textfile:
13 |     for line in textfile:
14 |         print " ".join(map(lambda word: word if word in vocab else '<UNK>', line.strip().split()))
15 | 


--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2019    Yang Zhang
 3 | # Apache 2.0.
 4 | 
 5 | 
 6 | python -u main.py \
 7 | 	--epoch 200 \
 8 | 	--batch_size 200 \
 9 | 	--n_hidden 1800 \
10 | 	--learn_rate 0.00001 \
11 | 	--beta1 0.5 \
12 | 	--dataset_path ./data/voxceleb_combined_200000/xvector.npz \
13 | 	--spk_path ./data/voxceleb_combined_200000/spk.npz \
14 | 	--z_dim 200 \
15 | 	--KL_weigth 0.03 \
16 | 	--cohesive_weight 0 \
17 | 	--is_training 1 
18 | 
19 | 


--------------------------------------------------------------------------------
/eval.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2019    Yang Zhang
 3 | # Apache 2.0.
 4 | 
 5 | python -u main.py \
 6 | 	--epoch 200 \
 7 | 	--batch_size 200 \
 8 | 	--n_hidden 1800 \
 9 | 	--learn_rate 0.00001 \
10 | 	--beta1 0.5 \
11 | 	--dataset_path ./data/voxceleb_combined_200000/xvector.npz \
12 | 	--spk_path ./data/voxceleb_combined_200000/spk.npz \
13 | 	--z_dim 200 \
14 | 	--KL_weigth 0.03 \
15 | 	--cohesive_weight 0 \
16 | 	--is_training 0 
17 | 
18 | wait
19 | 
20 | bash eer.sh
21 | 


--------------------------------------------------------------------------------
/path.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2015   David Snyder
 3 | #           2019   Lantian Li
 4 | #           2019   Yang Zhang
 5 | # Apache 2.0.
 6 | 
 7 | export KALDI_ROOT=${replace it by your kaldi root path}
 8 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH
 9 | 
10 | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
11 | . $KALDI_ROOT/tools/config/common_path.sh
12 | export LC_ALL=C
13 | 


--------------------------------------------------------------------------------
/data/data_prepare.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Author: Yang Zhang
 4 | # Author: Xueyi Wang
 5 | # Apache 2.0.
 6 | # 2019, CSLT
 7 | 
 8 | # xvector
 9 | for ark in `find -name "xvector.ark"`
10 | do
11 | 	npz=`dirname $ark`"/xvector.npz"
12 | 	python -u zip.py \
13 | 		--source_path  $ark \
14 | 		--dest_path $npz 
15 | 	echo
16 | done
17 | echo
18 | 
19 | # utt2spk
20 | for utt2spk in `find -name "utt2spk"`
21 | do
22 | 	spknpz=`dirname $utt2spk`"/spk.npz"
23 | 	python -u label.py \
24 | 		--source_path  $utt2spk \
25 | 		--dest_path $spknpz
26 | 	echo
27 | done
28 | echo
29 | 
30 | echo data_prepare all DONE!
31 | 


--------------------------------------------------------------------------------
/utils/data/get_num_frames.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script works out the approximate number of frames in a training directory.
 4 | # This is sometimes needed by higher-level scripts
 5 | 
 6 | 
 7 | if [ -f path.sh ]; then . ./path.sh; fi
 8 | . parse_options.sh || exit 1;
 9 | 
10 | if [ $# -ne 1 ]; then
11 |   (
12 |     echo "Usage: $0 <data-dir>"
13 |     echo "Prints the number of frames of data in the data-dir"
14 |   ) 1>&2
15 | fi
16 | 
17 | data=$1
18 | 
19 | if [ ! -f $data/utt2dur ]; then
20 |   utils/data/get_utt2dur.sh $data 1>&2 || exit 1
21 | fi
22 | 
23 | frame_shift=$(utils/data/get_frame_shift.sh $data) || exit 1
24 | 
25 | awk -v s=$frame_shift '{n += $2} END{printf("%d\n", int(n / s))}' <$data/utt2dur
26 | 


--------------------------------------------------------------------------------
/local/cosine_scoring.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2015   David Snyder
 3 | #           2019   Lantian Li
 4 | # Apache 2.0.
 5 | #
 6 | # This script trains an LDA transform and does cosine scoring.
 7 | 
 8 | #echo "$0 $@"  # Print the command line for logging
 9 | 
10 | if [ -f path.sh ]; then . ./path.sh; fi
11 | . parse_options.sh || exit 1;
12 | 
13 | if [ $# != 4 ]; then
14 | 	echo "Usage: $0 <enroll-data-dir> <test-data-dir> <trials-file> <scores-dir>"
15 | fi
16 | 
17 | enroll_data_dir=$1
18 | test_data_dir=$2
19 | trials=$3
20 | scores_dir=$4
21 | 
22 | mkdir -p $scores_dir/log
23 | run.pl $scores_dir/log/cosine_scoring.log \
24 | 	cat $trials \| awk '{print $1" "$2}' \| \
25 | 	ivector-compute-dot-products - \
26 | 	"ark:ivector-normalize-length ark:${enroll_data_dir}/xvector.ark ark:- |" \
27 | 	"ark:ivector-normalize-length ark:${test_data_dir}/xvector.ark ark:- |" \
28 | 	$scores_dir/cosine_scores || exit 1;
29 | 


--------------------------------------------------------------------------------
/utils/data/get_segments_for_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script operates on a data directory, such as in data/train/,
 4 | # and writes new segments to stdout. The file 'segments' maps from
 5 | # utterance to time offsets into a recording, with the format:
 6 | #   <utterance-id> <recording-id> <segment-begin> <segment-end>
 7 | # This script assumes utterance and recording ids are the same (i.e., that
 8 | # wav.scp is indexed by utterance), and uses durations from 'utt2dur', 
 9 | # created if necessary by get_utt2dur.sh.
10 | 
11 | . ./path.sh
12 | 
13 | if [ $# != 1 ]; then
14 |   echo "Usage: $0 [options] <datadir>"
15 |   echo "e.g.:"
16 |   echo " $0 data/train > data/train/segments"
17 |   exit 1
18 | fi
19 | 
20 | data=$1
21 | 
22 | if [ ! -f $data/utt2dur ]; then
23 |   utils/data/get_utt2dur.sh $data 1>&2 || exit 1;
24 | fi
25 | 
26 | # <utt-id> <utt-id> 0 <utt-dur>
27 | awk '{ print $1, $1, 0, $2 }' $data/utt2dur
28 | 
29 | exit 0
30 | 


--------------------------------------------------------------------------------
/utils/fix_ctm.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | stmfile=$1
 4 | ctmfile=$2
 5 | 
 6 | segments_stm=`cat $stmfile | cut -f 1 -d ' ' | sort -u`
 7 | segments_ctm=`cat $ctmfile | cut -f 1 -d ' ' | sort -u`
 8 | 
 9 | segments_stm_count=`echo "$segments_stm" | wc -l `
10 | segments_ctm_count=`echo "$segments_ctm" | wc -l `
11 | 
12 | #echo $segments_stm_count
13 | #echo $segments_ctm_count
14 | 
15 | if [ "$segments_stm_count" -gt "$segments_ctm_count"  ] ; then
16 |   pp=$( diff <(echo "$segments_stm") <(echo "$segments_ctm" ) | grep "^<" | sed "s/^< *//g")
17 |   (
18 |     for elem in $pp ; do
19 |       echo "$elem 1 0 0 EMPTY_RECOGNIZED_PHRASE"
20 |     done
21 |   ) >> $ctmfile
22 |   echo "FIXED CTM FILE"
23 |   exit 0
24 | elif [ "$segments_stm_count" -lt "$segments_ctm_count"  ] ; then
25 |   echo "Segment STM count: $segments_stm_count"
26 |   echo "Segment CTM count: $segments_ctm_count"
27 |   echo "FAILURE FIXING CTM FILE"
28 |   exit 1
29 | else
30 |   exit 0
31 | fi
32 | 
33 | 


--------------------------------------------------------------------------------
/utils/spk2utt_to_utt2spk.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | while(<>){ 
19 |     @A = split(" ", $_);
20 |     @A > 1 || die "Invalid line in spk2utt file: $_";
21 |     $s = shift @A;
22 |     foreach $u ( @A ) {
23 |         print "$u $s\n";
24 |     }
25 | }
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/utils/s2eps.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script replaces <s> and </s> with <eps> (on both input and output sides),
18 | # for the G.fst acceptor.
19 | 
20 | while(<>){
21 |     @A = split(" ", $_);
22 |     if ( @A >= 4 ) {
23 |         if ($A[2] eq "<s>" || $A[2] eq "</s>") { $A[2] = "<eps>"; }
24 |         if ($A[3] eq "<s>" || $A[3] eq "</s>") { $A[3] = "<eps>"; }
25 |     }
26 |     print join("\t", @A) . "\n";
27 | }
28 | 


--------------------------------------------------------------------------------
/utils/eps2disambig.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | #                2015 Guoguo Chen
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # This script replaces epsilon with #0 on the input side only, of the G.fst
19 | # acceptor.  
20 | 
21 | while(<>){
22 |   if (/\s+#0\s+/) {
23 |     print STDERR "$0: ERROR: LM has word #0, " .
24 |                  "which is reserved as disambiguation symbol\n";
25 |     exit 1;
26 |   }
27 |   s:^(\d+\s+\d+\s+)\<eps\>(\s+):$1#0$2:;
28 |   print;
29 | }
30 | 


--------------------------------------------------------------------------------
/baseline.py:
--------------------------------------------------------------------------------
 1 | # this code is used to
 2 | # get xvector.ark from xvector.npz and
 3 | # calculate baseline EER
 4 | 
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | import os
 8 | 
 9 | 
10 | paths = ["./data/voxceleb_combined_200000/xvector",
11 |          "./data/sitw_dev/enroll/xvector",
12 |          "./data/sitw_dev/test/xvector",
13 |          "./data/sitw_eval/enroll/xvector",
14 |          "./data/sitw_eval/test/xvector"
15 |          ]
16 | 
17 | # delete
18 | for path in paths:
19 |     if os.path.exists(path+'.ark') == True:
20 |         os.remove(path+'.ark')
21 |         print('delete {}.ark'.format(path))
22 | 
23 | # write
24 | for path in paths:
25 |     # load npz data
26 |     vector = np.load(path+'.npz')['vector']
27 |     labels = np.load(path+'.npz')['utt']
28 |     with open(path+'.ark', 'w') as f:
29 |         for i in range(vector.shape[0]):
30 |             f.write(str(labels[i]))
31 |             f.write('  [ ')
32 |             for j in vector[i]:
33 |                 f.write(str(j))
34 |                 f.write(' ')
35 |             f.write(']')
36 |             f.write('\n')
37 |     print('{}.ark is done!'.format(path))
38 |     
39 | print('\nall done!')
40 | 


--------------------------------------------------------------------------------
/utils/build_const_arpa_lm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014  Guoguo Chen
 4 | # Apache 2.0
 5 | 
 6 | # This script reads in an Arpa format language model, and converts it into the
 7 | # ConstArpaLm format language model.
 8 | 
 9 | # begin configuration section
10 | # end configuration section
11 | 
12 | [ -f path.sh ] && . ./path.sh;
13 | 
14 | . utils/parse_options.sh
15 | 
16 | if [ $# != 3 ]; then
17 |   echo "Usage: "
18 |   echo "  $0 [options] <arpa-lm-path> <old-lang-dir> <new-lang-dir>"
19 |   echo "e.g.:"
20 |   echo "  $0 data/local/lm/3-gram.full.arpa.gz data/lang/ data/lang_test_tgmed"
21 |   echo "Options"
22 |   exit 1;
23 | fi
24 | 
25 | export LC_ALL=C
26 | 
27 | arpa_lm=$1
28 | old_lang=$2
29 | new_lang=$3
30 | 
31 | mkdir -p $new_lang
32 | 
33 | mkdir -p $new_lang
34 | cp -r $old_lang/* $new_lang
35 | 
36 | unk=`cat $new_lang/oov.int`
37 | bos=`grep "<s>" $new_lang/words.txt | awk '{print $2}'`
38 | eos=`grep "</s>" $new_lang/words.txt | awk '{print $2}'`
39 | if [[ -z $bos || -z $eos ]]; then
40 |   echo "$0: <s> and </s> symbols are not in $new_lang/words.txt"
41 |   exit 1
42 | fi
43 | 
44 | 
45 | arpa-to-const-arpa --bos-symbol=$bos \
46 |   --eos-symbol=$eos --unk-symbol=$unk \
47 |   "gunzip -c $arpa_lm | utils/map_arpa_lm.pl $new_lang/words.txt|"  $new_lang/G.carpa  || exit 1;
48 | 
49 | exit 0;
50 | 


--------------------------------------------------------------------------------
/utils/summarize_warnings.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | 
 5 |  @ARGV != 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;
 6 | 
 7 | $dir = $ARGV[0];
 8 | 
 9 | ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" && exit 1;
10 | 
11 | $dir =~ s:/$::; # Remove trailing slash.
12 | 
13 | 
14 | # Group the files into categories where all have the same base-name.
15 | foreach $f (glob ("$dir/*.log")) {
16 |   $f_category = $f;
17 |   # do next expression twice; s///g doesn't work as they overlap.
18 |   $f_category =~ s:\.\d+\.:.*.:;
19 |   $f_category =~ s:\.\d+\.:.*.:;
20 |   $fmap{$f_category} .= " $f";
21 | }
22 | 
23 | sub split_hundreds { # split list of filenames into groups of 100.
24 |   my $names = shift @_;
25 |   my @A = split(" ", $names);
26 |   my @ans = ();
27 |   while (@A > 0) {
28 |     my $group = "";
29 |     for ($x = 0; $x < 100 && @A>0; $x++) {
30 |       $fname = pop @A;
31 |       $group .= "$fname ";
32 |     }
33 |     push @ans, $group;
34 |   }
35 |   return @ans;
36 | }
37 | 
38 | foreach $c (keys %fmap) {
39 |   $n = 0;
40 |   foreach $fgroup (split_hundreds($fmap{$c})) {
41 |     $n += `grep -w WARNING $fgroup | wc -l`;
42 |   }
43 |   if ($n != 0) {
44 |     print "$n warnings in $c\n"
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/utils/utt2spk_to_spk2utt.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # converts an utt2spk file to a spk2utt file.
18 | # Takes input from the stdin or from a file argument;
19 | # output goes to the standard out.
20 | 
21 | if ( @ARGV > 1 ) {
22 |     die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
23 | }
24 | 
25 | while(<>){ 
26 |     @A = split(" ", $_);
27 |     @A == 2 || die "Invalid line in utt2spk file: $_";
28 |     ($u,$s) = @A;
29 |     if(!$seen_spk{$s}) {
30 |         $seen_spk{$s} = 1;
31 |         push @spklist, $s;
32 |     }
33 |     push (@{$spk_hash{$s}}, "$u");
34 | }
35 | foreach $s (@spklist) {
36 |     $l = join(' ',@{$spk_hash{$s}});
37 |     print "$s $l\n";
38 | }
39 | 


--------------------------------------------------------------------------------
/utils/shuffle_list.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | if ($ARGV[0] eq "--srand") {
20 |   $n = $ARGV[1];
21 |   $n =~ m/\d+/ || die "Bad argument to --srand option: \"$n\"";
22 |   srand($ARGV[1]);
23 |   shift;
24 |   shift;
25 | } else {
26 |   srand(0); # Gives inconsistent behavior if we don't seed.
27 | }
28 | 
29 | if (@ARGV > 1 || $ARGV[0] =~ m/^-.+/) { # >1 args, or an option we 
30 |   # don't understand.
31 |   print "Usage: shuffle_list.pl [--srand N] [input file]  > output\n";
32 |   print "randomizes the order of lines of input.\n";
33 |   exit(1);
34 | }
35 | 
36 | @lines;
37 | while (<>) {
38 |   push @lines, [ (rand(), $_)] ;
39 | }
40 | 
41 | @lines = sort { $a->[0] cmp $b->[0] } @lines;
42 | foreach $l (@lines) {
43 |     print $l->[1];
44 | }
45 | 


--------------------------------------------------------------------------------
/utils/analyze_segments.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Copyright 2015 GoVivace Inc. (Author: Nagendra Kumar Goel)
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Analyze a segments file and print important stats on it.
18 | 
19 | $dur = $total = 0;
20 | $maxDur = 0;
21 | $minDur = 9999999999;
22 | $n = 0;
23 | while(<>){
24 |     chomp;
25 |     @t = split(/\s+/);
26 |     $dur = $t[3] - $t[2];
27 |     $total += $dur;
28 |     if ($dur > $maxDur) {
29 |         $maxSegId = $t[0];
30 |         $maxDur = $dur;
31 |     }
32 |     if ($dur < $minDur) {
33 |         $minSegId = $t[0];
34 |         $minDur = $dur;
35 |     }
36 |     $n++;
37 | }
38 | $avg=$total/$n;
39 | $hrs = $total/3600;
40 | print "Total $hrs hours of data\n";
41 | print "Average segment length $avg seconds\n";
42 | print "Segment $maxSegId has length of $maxDur seconds\n";
43 | print "Segment $minSegId has length of $minDur seconds\n";
44 | 


--------------------------------------------------------------------------------
/utils/show_lattice.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | format=pdf # pdf svg
 4 | mode=save # display save
 5 | lm_scale=0.0
 6 | acoustic_scale=0.0
 7 | #end of config
 8 | 
 9 | . utils/parse_options.sh
10 | 
11 | if [ $# != 3 ]; then
12 |    echo "usage: $0 [--mode display|save] [--format pdf|svg] <utt-id> <lattice-ark> <word-list>"
13 |    echo "e.g.:  $0 utt-0001 \"test/lat.*.gz\" tri1/graph/words.txt"
14 |    exit 1;
15 | fi
16 | 
17 | . path.sh
18 | 
19 | uttid=$1
20 | lat=$2
21 | words=$3
22 | 
23 | tmpdir=$(mktemp -d /tmp/kaldi.XXXX); # trap "rm -r $tmpdir" EXIT # cleanup
24 | 
25 | gunzip -c $lat | lattice-to-fst --lm-scale=$lm_scale --acoustic-scale=$acoustic_scale ark:- "scp,p:echo $uttid $tmpdir/$uttid.fst|" || exit 1;
26 | ! [ -s $tmpdir/$uttid.fst ] && \
27 |   echo "Failed to extract lattice for utterance $uttid (not present?)" && exit 1;
28 | fstdraw --portrait=true --osymbols=$words $tmpdir/$uttid.fst | dot -T${format} > $tmpdir/$uttid.${format}
29 | 
30 | if [ "$(uname)" == "Darwin" ]; then
31 |     doc_open=open
32 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then
33 |     doc_open=xdg-open
34 | elif [ $mode == "display" ] ; then
35 |         echo "Can not automaticaly open file on your operating system"
36 |         mode=save
37 | fi
38 | 
39 | [ $mode == "display" ] && $doc_open $tmpdir/$uttid.${format}
40 | [[ $mode == "display" && $? -ne 0 ]] && echo "Failed to open ${format} format." && mode=save
41 | [ $mode == "save" ] && echo "Saving to $uttid.${format}" && cp $tmpdir/$uttid.${format} .
42 | 
43 | exit 0
44 | 


--------------------------------------------------------------------------------
/utils/best_wer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2010-2011 Microsoft Corporation
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # To be run from one directory above this script.
19 | 
20 | perl -e 'while(<>){ 
21 |     s/\|(\d)/\| $1/g; s/(\d)\|/$1 \|/g;
22 |     if (m/[WS]ER (\S+)/ && (!defined $bestwer || $bestwer > $1)){ $bestwer = $1; $bestline=$_; } # kaldi "compute-wer" tool.
23 |     elsif (m: (Mean|Sum/Avg|)\s*\|\s*\S+\s+\S+\s+\|\s+\S+\s+\S+\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\|:
24 |         && (!defined $bestwer || $bestwer > $2)){ $bestwer = $2; $bestline=$_; } }  # sclite.
25 |    if (defined $bestline){ print $bestline; } ' | \
26 |   awk 'BEGIN{ FS="%WER"; } { if(NF == 2) { print FS$2" "$1; } else { print $0; }}' | \
27 |   awk 'BEGIN{ FS="Sum/Avg"; } { if(NF == 2) { print $2" "$1; } else { print $0; }}' | \
28 |   awk '{ if($1!~/%WER/) { print "%WER "$9" "$0; } else { print $0; }}' | \
29 |   sed -e 's|\s\s*| |g' -e 's|\:$||' -e 's|\:\s*\|\s*$||'
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/utils/remove_oovs.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script removes lines that contain these OOVs on either the
18 | # third or fourth fields  of the line.  It is intended to remove arcs
19 | # with OOVs on, from FSTs (probably compiled from ARPAs with OOVs in).
20 | 
21 | if (  @ARGV < 1 && @ARGV > 2) {
22 |     die "Usage: remove_oovs.pl unk_list.txt [ printed-fst ]\n";
23 | }
24 | 
25 | $unklist = shift @ARGV;
26 | open(S, "<$unklist") || die "Failed opening unknown-symbol list $unklist\n";
27 | while(<S>){ 
28 |     @A = split(" ", $_);
29 |     @A == 1 || die "Bad line in unknown-symbol list: $_";
30 |     $unk{$A[0]} = 1;
31 | }
32 | 
33 | $num_removed = 0;
34 | while(<>){ 
35 |     @A = split(" ", $_);
36 |     if(defined $unk{$A[2]} || defined $unk{$A[3]}) {
37 |         $num_removed++;
38 |     } else {
39 |         print;
40 |     }
41 | }
42 | print STDERR "remove_oovs.pl: removed $num_removed lines.\n";
43 | 
44 | 


--------------------------------------------------------------------------------
/data/label.py:
--------------------------------------------------------------------------------
 1 | # Author: Yang Zhang
 2 | # Author: Xueyi Wang
 3 | # Apache 2.0.
 4 | # 2019, CSLT
 5 | 
 6 | import argparse
 7 | import numpy as np
 8 | 
 9 | 
10 | def prepare_label_data(source_path, dest_path):
11 |     print("source_path: ", source_path)
12 |     print("dest_path: ", dest_path)
13 |     print("start zip...")
14 |     print("waiting...")
15 | 
16 |     utt2spk = np.loadtxt(source_path, dtype=bytes).astype(str)
17 |     all_labels = []
18 |     for i in utt2spk:
19 |         all_labels.append(i[1].strip('id'))
20 | 
21 |     spker = []
22 |     for i in all_labels:
23 |         if i not in spker:
24 |             spker.append(i)
25 |     spk = []
26 | 
27 |     temp = []
28 |     for i in all_labels:
29 |         for j in range(len(spker)):
30 |             if spker[j] == i:
31 |                 temp.append(j)
32 |                 # print(j)
33 |     spk = np.array(temp)
34 |     spk = spk.reshape(-1, 1)
35 | 
36 |     spker = []
37 |     for i in temp:
38 |         if i not in spker:
39 |             spker.append(i)
40 | 
41 |     spker = np.array(spker)
42 | 
43 |     print('spk', spk.shape)
44 |     print('spker', spker.shape)
45 | 
46 |     np.savez(dest_path, spk_list=spk, spker=spker)
47 | 
48 |     print("prepare_label_data {} is done".format(dest_path))
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     parser = argparse.ArgumentParser()
53 |     parser.add_argument("--source_path", help="source_path of utt2spk")
54 |     parser.add_argument("--dest_path", help="destination of spk.npz")
55 |     args = parser.parse_args()
56 | 
57 |     source_path = args.source_path
58 |     dest_path = args.dest_path
59 | 
60 |     prepare_label_data(source_path, dest_path)
61 | 


--------------------------------------------------------------------------------
/utils/add_disambig.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | # Adds some specified number of disambig symbols to a symbol table.
19 | # Adds these as #1, #2, etc.
20 | # If the --include-zero option is specified, includes an extra one
21 | # #0.
22 | 
23 | $include_zero = 0;
24 | if($ARGV[0] eq "--include-zero") {
25 |     $include_zero = 1;
26 |     shift @ARGV;
27 | }
28 | 
29 | if(@ARGV != 2) {
30 |     die "Usage: add_disambig.pl [--include-zero] symtab.txt num_extra > symtab_out.txt ";
31 | }
32 | 
33 | 
34 | $input = $ARGV[0];
35 | $nsyms = $ARGV[1];
36 | 
37 | open(F, "<$input") || die "Opening file $input";
38 | 
39 | while(<F>) {
40 |     @A = split(" ", $_);
41 |     @A == 2 || die "Bad line $_";
42 |     $lastsym = $A[1];
43 |     print;
44 | }
45 | 
46 | if(!defined($lastsym)){
47 |  die "Empty symbol file?";
48 | }
49 | 
50 | if($include_zero) {
51 |     $lastsym++;
52 |     print "#0  $lastsym\n";
53 | }
54 | 
55 | for($n = 1; $n <= $nsyms; $n++) {
56 |     $y = $n + $lastsym;
57 |     print "#$n  $y\n";
58 | }
59 | 


--------------------------------------------------------------------------------
/utils/data/perturb_data_dir_speed_3way.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Apache 2.0
 6 | 
 7 | # This script does the standard 3-way speed perturbing of
 8 | # a data directory (it operates on the wav.scp).
 9 | 
10 | . utils/parse_options.sh
11 | 
12 | if [ $# != 2 ]; then
13 |   echo "Usage: perturb_data_dir_speed_3way.sh <srcdir> <destdir>"
14 |   echo "Applies standard 3-way speed perturbation using factors of 0.9, 1.0 and 1.1."
15 |   echo "e.g.:"
16 |   echo " $0 data/train data/train_sp"
17 |   echo "Note: if <destdir>/feats.scp already exists, this will refuse to run."
18 |   exit 1
19 | fi
20 | 
21 | srcdir=$1
22 | destdir=$2
23 | 
24 | if [ ! -f $srcdir/wav.scp ]; then
25 |   echo "$0: expected $srcdir/wav.scp to exist"
26 |   exit 1
27 | fi
28 | 
29 | if [ -f $destdir/feats.scp ]; then
30 |   echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)"
31 |   exit 1
32 | fi
33 | 
34 | echo "$0: making sure the utt2dur file is present in ${srcdir}, because "
35 | echo "... obtaining it after speed-perturbing would be very slow, and"
36 | echo "... you might need it."
37 | utils/data/get_utt2dur.sh ${srcdir}
38 | 
39 | utils/data/perturb_data_dir_speed.sh 0.9 ${srcdir} ${destdir}_speed0.9 || exit 1
40 | utils/data/perturb_data_dir_speed.sh 1.1 ${srcdir} ${destdir}_speed1.1 || exit 1
41 | utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1
42 | 
43 | rm -r ${destdir}_speed0.9 ${destdir}_speed1.1
44 | 
45 | echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir"
46 | utils/validate_data_dir.sh --no-feats $destdir
47 | 
48 | 


--------------------------------------------------------------------------------
/utils/remove_data_links.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This program searches within a directory for soft links that
 4 | # appear to be created by 'create_data_link.pl' to a 'storage/' subdirectory,
 5 | # and it removes both the soft links and the things they point to.
 6 | # for instance, if you have a soft link 
 7 | #   foo/egs/1.1.egs -> storage/2/1.1.egs
 8 | # it will remove both foo/egs/storage/2/1.1.egs, and foo/egs/1.1.egs.
 9 | 
10 | ret=0
11 | 
12 | dry_run=false
13 | 
14 | if [ "$1" == "--dry-run" ]; then
15 |   dry_run=true
16 |   shift
17 | fi
18 | 
19 | if [ $# == 0 ]; then
20 |   echo "Usage:  $0 [--dry-run] <list-of-directories>"
21 |   echo "e.g.: $0 exp/nnet4a/egs/"
22 |   echo " Removes from any subdirectories of the command-line arguments, soft links that "
23 |   echo " appear to have been created by utils/create_data_link.pl, as well as the things"
24 |   echo " that those soft links point to.  Will typically be called on a directory prior"
25 |   echo " to 'rm -r' on that directory, to ensure that data that was distributed on other"
26 |   echo " volumes also gets deleted."
27 |   echo " With --dry-run, just prints what it would do."
28 | fi
29 | 
30 | for dir in $*; do
31 |   if [ ! -d $dir ]; then
32 |     echo "$0: not a directory: $dir"
33 |     ret=1
34 |   else
35 |     for subdir in $(find $dir -type d); do
36 |       if [ -d $subdir/storage ]; then
37 |         for x in $(ls $subdir); do
38 |           f=$subdir/$x
39 |           if [ -L $f ] && [[ $(readlink $f) == storage/* ]]; then
40 |             target=$subdir/$(readlink $f)
41 |             if $dry_run; then
42 |               echo rm $f $target
43 |             else
44 |               rm $f $target
45 |             fi
46 |           fi
47 |         done
48 |       fi
49 |     done
50 |   fi
51 | done
52 | 
53 | exit $ret
54 | 


--------------------------------------------------------------------------------
/utils/nnet/gen_hamm_mat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # ./gen_hamm_mat.py
19 | # script generates diagonal matrix with hamming window values
20 | 
21 | from math import *
22 | import sys
23 | 
24 | 
25 | from optparse import OptionParser
26 | 
27 | parser = OptionParser()
28 | parser.add_option('--fea-dim', dest='dim', help='feature dimension')
29 | parser.add_option('--splice', dest='splice', help='applied splice value')
30 | (options, args) = parser.parse_args()
31 | 
32 | if(options.dim == None):
33 |     parser.print_help()
34 |     sys.exit(1)
35 | 
36 | dim=int(options.dim)
37 | splice=int(options.splice)
38 | 
39 | 
40 | #generate the diagonal matrix with hammings
41 | M_2PI = 6.283185307179586476925286766559005
42 | 
43 | dim_mat=(2*splice+1)*dim
44 | timeContext=2*splice+1
45 | print '['
46 | for row in range(dim_mat):
47 |     for col in range(dim_mat):
48 |         if col!=row:
49 |             print '0',
50 |         else:
51 |             i=int(row/dim)
52 |             print str(0.54 - 0.46*cos((M_2PI * i) / (timeContext-1))),
53 |     print
54 | 
55 | print ']'
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # vscode
  2 | .vscode/
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # pyenv
 79 | .python-version
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # SageMath parsed files
 85 | *.sage.py
 86 | 
 87 | # Environments
 88 | .env
 89 | .venv
 90 | env/
 91 | venv/
 92 | ENV/
 93 | env.bak/
 94 | venv.bak/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | 


--------------------------------------------------------------------------------
/utils/nnet/gen_splice.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # ./gen_splice.py
19 | # generates <splice> Component
20 | 
21 | from math import *
22 | import sys
23 | 
24 | 
25 | from optparse import OptionParser
26 | 
27 | parser = OptionParser()
28 | parser.add_option('--fea-dim', dest='dim_in', help='feature dimension')
29 | parser.add_option('--splice', dest='splice', help='number of frames to concatenate with the central frame')
30 | parser.add_option('--splice-step', dest='splice_step', help='splicing step (frames dont need to be consecutive, --splice 3 --splice-step 2 will select offsets: -6 -4 -2 0 2 4 6)', default='1' )
31 | (options, args) = parser.parse_args()
32 | 
33 | if(options.dim_in == None):
34 |     parser.print_help()
35 |     sys.exit(1)
36 | 
37 | dim_in=int(options.dim_in)
38 | splice=int(options.splice)
39 | splice_step=int(options.splice_step)
40 | 
41 | dim_out=(2*splice+1)*dim_in
42 | 
43 | print '<splice>', dim_out, dim_in
44 | print '[',
45 | 
46 | splice_vec = range(-splice*splice_step, splice*splice_step+1, splice_step)
47 | for idx in range(len(splice_vec)):
48 |     print splice_vec[idx],
49 | 
50 | print ']'
51 | 
52 | 


--------------------------------------------------------------------------------
/utils/data/get_frame_shift.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
 4 | # Apache 2.0
 5 | 
 6 | # This script takes as input a data directory, such as data/train/, preferably
 7 | # with utt2dur file already existing (or the utt2dur file will be created if
 8 | # not), and it attempts to work out the approximate frame shift by comparing the
 9 | # utt2dur with the output of feat-to-len on the feats.scp.  It prints it out.
10 | # if the shift is very close to, but above, 0.01 (the normal frame shift) it
11 | # rounds it down.
12 | 
13 | . utils/parse_options.sh
14 | . ./path.sh
15 | 
16 | if [ $# != 1 ]; then
17 |   echo "Usage: $0 <datadir>"
18 |   echo "e.g.:"
19 |   echo " $0 data/train"
20 |   echo "This script prints the frame-shift (e.g. 0.01) to the standard out."
21 |   echo "If <datadir> does not contain utt2dur, this script may call utils/data/get_utt2dur.sh,"
22 |   echo "which will require write permission to <datadir>"
23 |   exit 1
24 | fi
25 | 
26 | export LC_ALL=C
27 | 
28 | dir=$1
29 | 
30 | 
31 | if [ ! -s $dir/utt2dur ]; then
32 |   if [ ! -e $dir/wav.scp ] && [ ! -s $dir/segments ]; then
33 |     echo "$0: neither $dir/wav.scp nor $dir/segments exist; assuming a frame shift of 0.01." 1>&2
34 |     echo 0.01
35 |     exit 0
36 |   fi
37 |   echo "$0: $dir/utt2dur does not exist: creating it" 1>&2
38 |   utils/data/get_utt2dur.sh $dir 1>&2
39 | fi
40 | 
41 | if [ ! -f $dir/feats.scp ]; then
42 |   echo "$0: $dir/feats.scp does not exist" 1>&2
43 |   exit 1
44 | fi
45 | 
46 | temp=$(mktemp /tmp/tmp.XXXX)
47 | 
48 | feat-to-len scp:$dir/feats.scp ark,t:- | head -n 10 > $temp
49 | 
50 | if [ -z $temp ]; then
51 |   echo "$0: error running feat-to-len" 1>&2
52 |   exit 1
53 | fi
54 | 
55 | head -n 10 $dir/utt2dur | paste - $temp | \
56 |    awk '{ dur += $2; frames += $4; } END { shift = dur / frames; if (shift > 0.01 && shift < 0.0102) shift = 0.01; print shift; }' || exit 1;
57 | 
58 | rm $temp
59 | 
60 | exit 0
61 | 


--------------------------------------------------------------------------------
/model/model_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Author: Yang Zhang
 4 | # Mail: zyziszy@foxmail.com
 5 | # Apache 2.0.
 6 | 
 7 | import math
 8 | import numpy as np
 9 | import tensorflow as tf
10 | from scipy import stats
11 | import random
12 | 
13 | def get_skew_and_kurt(data):
14 |     '''calculate skew and kurt'''
15 |     data = np.array(data)
16 |     data = data.transpose()
17 |     print(data.shape)  # test
18 |     skew = []
19 |     kurt = []
20 |     for i in data:
21 |         # print(len(i))
22 |         skew.append(stats.skew(i))
23 |         kurt.append(stats.kurtosis(i))
24 | 
25 |     skew_mean = sum(skew)/len(skew)  
26 |     kurt_mean = sum(kurt)/len(kurt)
27 | 
28 |     # print('skew:', skew_mean)  # test
29 |     # print('kurt:', kurt_mean)  # test
30 |     return skew_mean, kurt_mean
31 | 
32 | def shuffle_data_table(data, table):
33 |     '''random shuffle data and table'''
34 |     index = [i for i in range(len(data))]
35 |     random.shuffle(index)
36 |     data = data[index]
37 |     table = table[index]
38 |     return data, table
39 | 
40 | def shuffle_data(data):
41 |     '''random shuffle data'''
42 |     index = [i for i in range(len(data))]
43 |     random.shuffle(index)
44 |     data = data[index]
45 |     return data
46 | 
47 | def MLP_net(input, layer_name, n_hidden, acitvate="elu"):
48 |     '''tensorflow-layer'''
49 |     w_init = tf.contrib.layers.variance_scaling_initializer()
50 |     b_init = tf.constant_initializer(0.)
51 | 
52 |     w_str = 'w_'+str(layer_name)
53 |     b_str = 'b_'+str(layer_name)
54 | 
55 |     w = tf.get_variable(
56 |         w_str, [input.get_shape()[1], n_hidden], initializer=w_init)
57 |     b = tf.get_variable(b_str, [n_hidden], initializer=b_init)
58 | 
59 |     output = tf.matmul(input, w) + b
60 | 
61 |     if acitvate == 'tanh':
62 |         output = tf.nn.tanh(output)
63 |     elif acitvate == 'sigmoid':
64 |         output = tf.nn.sigmoid(output)
65 |     else:
66 |         output = tf.nn.elu(output)
67 |     return output
68 | 


--------------------------------------------------------------------------------
/utils/ln.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use File::Spec;
 3 | 
 4 | if ( @ARGV < 2 ) {
 5 |   print STDERR "usage: ln.pl input1 input2 dest-dir\n" .
 6 |     "This script does a soft link of input1, input2, etc." .
 7 |     "to dest-dir, using relative links where possible\n" .
 8 |     "Note: input-n and dest-dir may both be absolute pathnames,\n" .
 9 |     "or relative pathnames, relative to the current directlory.\n";
10 |   exit(1);
11 | }  
12 | 
13 | $dir = pop @ARGV;
14 | if ( ! -d $dir ) {
15 |   print STDERR "ln.pl: last argument must be a directory ($dir is not a directory)\n";
16 |   exit(1);
17 | }
18 | 
19 | $ans = 1; # true.
20 | 
21 | $absdir = File::Spec->rel2abs($dir); # Get $dir as abs path.
22 | defined $absdir || die "No such directory $dir";
23 | foreach $file (@ARGV) {
24 |   $absfile =  File::Spec->rel2abs($file); # Get $file as abs path.
25 |   defined $absfile || die "No such file or directory: $file";
26 |   @absdir_split = split("/", $absdir);
27 |   @absfile_split = split("/", $absfile);
28 | 
29 |   $newfile = $absdir . "/" . $absfile_split[$#absfile_split]; # we'll use this
30 |   # as the destination in the link command.
31 |   $num_removed = 0;
32 |   while (@absdir_split > 0 && $absdir_split[0] eq $absfile_split[0]) {
33 |     shift @absdir_split;
34 |     shift @absfile_split;
35 |     $num_removed++;
36 |   }
37 |   if (-l $newfile) { # newfile is already a link -> safe to delete it.
38 |     unlink($newfile); # "unlink" just means delete.
39 |   }
40 |   if ($num_removed == 0) { # will use absolute pathnames.
41 |     $oldfile = "/" . join("/", @absfile_split);
42 |     $ret = symlink($oldfile, $newfile);
43 |   } else {
44 |     $num_dots = @absdir_split;
45 |     $oldfile = join("/", @absfile_split);
46 |     for ($n = 0; $n < $num_dots; $n++) {
47 |       $oldfile = "../" . $oldfile;
48 |     }
49 |     $ret = symlink($oldfile, $newfile);
50 |   }
51 |   $ans = $ans && $ret;
52 |   if (! $ret) {
53 |     print STDERR "Error linking $oldfile to $newfile\n";
54 |   }
55 | }
56 | 
57 | exit ($ans == 1 ? 0 : 1);
58 | 
59 | 


--------------------------------------------------------------------------------
/local/plda_scoring.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2015   David Snyder
 3 | #           2019   Lantian Li
 4 | # Apache 2.0.
 5 | #
 6 | # This script trains PLDA models and does scoring.
 7 | 
 8 | simple_length_norm=true # If true, replace the default length normalization
 9 | # performed in PLDA  by an alternative that
10 | # normalizes the length of the iVectors to be equal
11 | # to the square root of the iVector dimension.
12 | 
13 | #echo "$0 $@"  # Print the command line for logging
14 | 
15 | if [ -f path.sh ]; then . ./path.sh; fi
16 | . parse_options.sh || exit 1;
17 | 
18 | if [ $# != 5 ]; then
19 | 	echo "Usage: $0 <plda-data-dir> <enroll-data-dir> <test-data-dir> <trials-file> <scores-dir>"
20 | fi
21 | 
22 | plda_data_dir=$1
23 | enroll_data_dir=$2
24 | test_data_dir=$3
25 | trials=$4
26 | scores_dir=$5
27 | 
28 | mkdir -p $plda_data_dir/log
29 | run.pl $plda_data_dir/log/compute_mean.log \
30 | 	ivector-normalize-length ark:${plda_data_dir}/xvector.ark \
31 | 	ark:- \| ivector-mean ark:- ${plda_data_dir}/mean.vec || exit 1;
32 | run.pl $plda_data_dir/log/plda.log \
33 | 	ivector-compute-plda ark:$plda_data_dir/spk2utt \
34 | 	"ark:ivector-normalize-length ark:${plda_data_dir}/xvector.ark ark:- |" \
35 | 	$plda_data_dir/plda || exit 1;
36 | 
37 | mkdir -p $scores_dir/log
38 | run.pl $scores_dir/log/plda_scoring.log \
39 | 	ivector-plda-scoring --normalize-length=true \
40 | 	--simple-length-normalization=$simple_length_norm \
41 | 	--num-utts=ark:${enroll_data_dir}/num_utts.ark \
42 | 	"ivector-copy-plda --smoothing=0.0 ${plda_data_dir}/plda - |" \
43 | 	"ark:ivector-normalize-length ark:${enroll_data_dir}/xvector.ark ark:- | ivector-subtract-global-mean ${plda_data_dir}/mean.vec ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
44 | 	"ark:ivector-normalize-length ark:${test_data_dir}/xvector.ark ark:- | ivector-subtract-global-mean ${plda_data_dir}/mean.vec ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
45 | 	"cat '$trials' | cut -d\  --fields=1,2 |" $scores_dir/plda_scores || exit 1;
46 | 
47 | rm $plda_data_dir/{plda,mean.vec}
48 | 


--------------------------------------------------------------------------------
/data/zip.py:
--------------------------------------------------------------------------------
 1 | # Author: Yang Zhang
 2 | # Author: Xueyi Wang
 3 | # Apache 2.0.
 4 | # 2019, CSLT
 5 | 
 6 | import argparse
 7 | import os
 8 | import numpy as np
 9 | 
10 | 
11 | def ark2npz(source_path, dest_path):
12 |     print("source_path: ", source_path)
13 |     print("dest_path: ", dest_path)
14 |     print("start zip...")
15 |     print("waiting...")
16 | 
17 |     count = 0
18 |     labels = []
19 |     vector = []
20 |     with open(source_path) as f:
21 |         lines = f.readlines()
22 |         for line in lines:
23 |             count += 1
24 |             # print("load {} success!".format(count))
25 |             line.strip('\n')
26 |             vector_string = ""
27 |             id = ""
28 |             is_vector = False
29 |             for c in line:
30 |                 if c == '[':
31 |                     is_vector = True
32 |                 if is_vector:
33 |                     if c != '[' and c != ']':
34 |                         vector_string += c
35 |                 if (not is_vector) and c != " ":
36 |                     id += c
37 |             labels.append(id)
38 |             num_list = vector_string.split(' ')
39 |             num_list.pop()
40 |             del(num_list[0])
41 |             num_list = list(map(eval, num_list))
42 | 
43 |             vector.append(num_list)
44 |     labels = np.array(labels, dtype="<U64")
45 | 
46 |     vector = np.array(vector, dtype="float64")
47 | 
48 |     print("vector shape:")
49 |     print(labels.shape)
50 |     print(vector.shape)
51 | 
52 |     np.savez(dest_path, vector=vector, utt=labels)
53 | 
54 |     print("sucessfully convert {} to {} ".format(source_path, dest_path))
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     parser = argparse.ArgumentParser()
59 |     parser.add_argument("--source_path", help="source_path of xvector(ark)")
60 |     parser.add_argument("--dest_path", help="destination of xvector(npz)")
61 |     args = parser.parse_args()
62 | 
63 |     source_path = args.source_path
64 |     dest_path = args.dest_path
65 | 
66 |     ark2npz(source_path, dest_path)
67 | 


--------------------------------------------------------------------------------
/utils/make_unigram_grammar.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script is used in discriminative training.
18 | # This script makes a simple unigram-loop version of G.fst
19 | # using a unigram grammar estimated from some training transcripts.
20 | # This is for MMI training.
21 | # We don't have any silences in G.fst; these are supplied by the
22 | # optional silences in the lexicon.
23 | 
24 | # Note: the symbols in the transcripts become the input and output
25 | # symbols of G.txt; these can be numeric or not.
26 | 
27 | if(@ARGV != 0) {
28 |     die "Usage: make_unigram_grammar.pl < text-transcripts > G.txt"
29 | }
30 | 
31 | $totcount = 0;
32 | $nl = 0;
33 | while (<>) {
34 |   @A = split(" ", $_);
35 |   foreach $a (@A) {
36 |     $count{$a}++;
37 |     $totcount++;
38 |   }
39 |   $nl++;
40 |   $totcount++; # Treat end-of-sentence as a symbol for purposes of
41 |   # $totcount, so the grammar is properly stochastic.  This doesn't
42 |   # become </s>, it just becomes the final-prob.
43 | }
44 | 
45 | foreach $a (keys %count) {
46 |   $prob = $count{$a} / $totcount;
47 |   $cost = -log($prob);          # Negated natural-log probs.
48 |   print "0\t0\t$a\t$a\t$cost\n";
49 | }
50 | # Zero final-cost.
51 | $final_prob = $nl / $totcount;
52 | $final_cost = -log($final_prob);
53 | print "0\t$final_cost\n";
54 | 
55 | 


--------------------------------------------------------------------------------
/utils/data/remove_dup_utts.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Remove excess utterances once they appear  more than a specified
 4 | # number of times with the same transcription, in a data set.
 5 | # E.g. useful for removing excess "uh-huh" from training.
 6 | 
 7 | if [ $# != 3 ]; then
 8 |   echo "Usage: remove_dup_utts.sh max-count <src-data-dir> <dest-data-dir>"
 9 |   echo "e.g.: remove_dup_utts.sh 10 data/train data/train_nodup"
10 |   echo "This script is used to filter out utterances that have from over-represented"
11 |   echo "transcriptions (such as 'uh-huh'), by limiting the number of repetitions of"
12 |   echo "any given word-sequence to a specified value.  It's often used to get"
13 |   echo "subsets for early stages of training."
14 |   exit 1;
15 | fi
16 | 
17 | maxcount=$1
18 | srcdir=$2
19 | destdir=$3
20 | mkdir -p $destdir
21 | 
22 | [ ! -f $srcdir/text ] && echo "$0: Invalid input directory $srcdir" && exit 1;
23 | 
24 | ! mkdir -p $destdir && echo "$0: could not create directory $destdir" && exit 1;
25 | 
26 | ! [ "$maxcount" -gt 1 ] && echo "$0: invalid max-count '$maxcount'" && exit 1;
27 | 
28 | cp $srcdir/* $destdir
29 | cat $srcdir/text | \
30 |   perl -e '
31 |   $maxcount = shift @ARGV;
32 |   @all = ();
33 |    $p1 = 103349; $p2 = 71147; $k = 0;
34 |    sub random { # our own random number generator: predictable.
35 |      $k = ($k + $p1) % $p2;
36 |      return ($k / $p2);
37 |   }
38 |   while(<>) {
39 |     push @all, $_;
40 |     @A = split(" ", $_);
41 |     shift @A;
42 |     $text = join(" ", @A);
43 |     $count{$text} ++;
44 |   }
45 |   foreach $line (@all) {
46 |     @A = split(" ", $line);
47 |     shift @A;
48 |     $text = join(" ", @A);
49 |     $n = $count{$text};
50 |     if ($n < $maxcount || random() < ($maxcount / $n)) {
51 |       print $line;
52 |     }
53 |   }'  $maxcount >$destdir/text
54 | 
55 | echo "Reduced number of utterances from `cat $srcdir/text | wc -l` to `cat $destdir/text | wc -l`"
56 | 
57 | echo "Using fix_data_dir.sh to reconcile the other files."
58 | utils/fix_data_dir.sh $destdir
59 | rm -r $destdir/.backup
60 | 
61 | exit 0
62 | 


--------------------------------------------------------------------------------
/utils/int2sym.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 3 | # Apache 2.0.
 4 | 
 5 | undef $field_begin;
 6 | undef $field_end;
 7 | 
 8 | 
 9 | if ($ARGV[0] eq "-f") {
10 |   shift @ARGV; 
11 |   $field_spec = shift @ARGV; 
12 |   if ($field_spec =~ m/^\d+$/) {
13 |     $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
14 |   }
15 |   if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
16 |     if ($1 ne "") {
17 |       $field_begin = $1 - 1; # Change to zero-based indexing.
18 |     }
19 |     if ($2 ne "") {
20 |       $field_end = $2 - 1; # Change to zero-based indexing.
21 |     }
22 |   }
23 |   if (!defined $field_begin && !defined $field_end) {
24 |     die "Bad argument to -f option: $field_spec"; 
25 |   }
26 | }
27 | $symtab = shift @ARGV;
28 | if(!defined $symtab) {
29 |     print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" .
30 |       "options: [-f (<field>|<field_start>-<field-end>)]\n" .
31 |       "e.g.: -f 2, or -f 3-4\n";
32 |     exit(1);
33 | }
34 | 
35 | open(F, "<$symtab") || die "Error opening symbol table file $symtab";
36 | while(<F>) {
37 |     @A = split(" ", $_);
38 |     @A == 2 || die "bad line in symbol table file: $_";
39 |     $int2sym{$A[1]} = $A[0];
40 | }
41 | 
42 | sub int2sym {
43 |     my $a = shift @_;
44 |     my $pos = shift @_;
45 |     if($a !~  m:^\d+$:) { # not all digits..
46 |       $pos1 = $pos+1; # make it one-based.
47 |       die "int2sym.pl: found noninteger token $a [in position $pos1]\n";
48 |     }
49 |     $s = $int2sym{$a};
50 |     if(!defined ($s)) {
51 |       die "int2sym.pl: integer $a not in symbol table $symtab.";
52 |     }
53 |     return $s;
54 | }
55 | 
56 | $error = 0;
57 | while (<>) {
58 |   @A = split(" ", $_);
59 |   for ($pos = 0; $pos <= $#A; $pos++) {
60 |     $a = $A[$pos];
61 |     if ( (!defined $field_begin || $pos >= $field_begin)
62 |          && (!defined $field_end || $pos <= $field_end)) {
63 |       $a = int2sym($a, $pos);
64 |     }
65 |     print $a . " ";
66 |   }
67 |   print "\n";
68 | }
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/utils/scoring/wer_report.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2015 Johns Hopkins University (author: Jan Trmal <jtrmal@gmail.com>)
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | # This script reads per-utt table generated for example during scoring
19 | # and outpus the WER similar to the format the compute-wer utility 
20 | # or the utils/best_wer.pl produces
21 | # i.e. from table containing lines in this format
22 | # SUM raw 23344 243230 176178 46771 9975 20281 77027 16463
23 | # produces something output like this
24 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] 
25 | # NB: if the STDIN stream will contain more of the SUM raw entries,
26 | #     the best one will be found and printed 
27 | #
28 | # If the script is called with parameters, it uses them pro provide 
29 | # a description of the output
30 | # i.e.
31 | # cat per-spk-report | utils/scoring/wer_report.pl Full set
32 | # the following output will be produced
33 | # %WER 31.67 [ 77027 / 243230, 9975 ins, 20281 del, 46771 sub ] Full set
34 | 
35 | 
36 | while (<STDIN>) {
37 |   if ( m:SUM\s+raw:) {
38 |     @F = split;
39 |     if ((!defined $wer) || ($wer > $F[8])) {
40 |       $corr=$F[4];
41 |       $sub=$F[5];
42 |       $ins=$F[6];
43 |       $del=$F[7];
44 |       $wer=$F[8];
45 |       $words=$F[3];
46 |     }
47 |   }
48 | }
49 | 
50 | if (defined $wer) {
51 |   $wer_str = sprintf("%.2f", (100.0 * $wer) / $words);
52 |   print "%WER $wer_str [ $wer / $words,  $ins ins, $del del, $sub sub ]";
53 |   print " " . join(" ", @ARGV) if @ARGV > 0;
54 |   print "\n";
55 | }
56 | 


--------------------------------------------------------------------------------
/utils/nnet/gen_dct_mat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # ./gen_dct_mat.py
19 | # script generates matrix with DCT transform, which is sparse 
20 | # and takes into account that data-layout is along frequency axis, 
21 | # while DCT is done along temporal axis.
22 | 
23 | from math import *
24 | import sys
25 | 
26 | 
27 | from optparse import OptionParser
28 | 
29 | parser = OptionParser()
30 | parser.add_option('--fea-dim', dest='dim', help='feature dimension')
31 | parser.add_option('--splice', dest='splice', help='applied splice value')
32 | parser.add_option('--dct-basis', dest='dct_basis', help='number of DCT basis')
33 | (options, args) = parser.parse_args()
34 | 
35 | if(options.dim == None):
36 |     parser.print_help()
37 |     sys.exit(1)
38 | 
39 | dim=int(options.dim)
40 | splice=int(options.splice)
41 | dct_basis=int(options.dct_basis)
42 | 
43 | timeContext=2*splice+1
44 | 
45 | 
46 | #generate the DCT matrix
47 | M_PI = 3.1415926535897932384626433832795
48 | M_SQRT2 = 1.4142135623730950488016887
49 | 
50 | 
51 | #generate sparse DCT matrix
52 | print '['
53 | for k in range(dct_basis):
54 |     for m in range(dim):
55 |         for n in range(timeContext):
56 |           if(n==0): 
57 |               print m*'0 ',
58 |           else: 
59 |               print (dim-1)*'0 ',
60 |           print str(sqrt(2.0/timeContext)*cos(M_PI/timeContext*k*(n+0.5))),
61 |           if(n==timeContext-1):
62 |               print (dim-m-1)*'0 ',
63 |         print
64 |     print 
65 | 
66 | print ']'
67 | 
68 | 


--------------------------------------------------------------------------------
/local/pca_plda_scoring.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2015   David Snyder
 3 | # Apache 2.0.
 4 | #
 5 | # This script trains PLDA models and does scoring.
 6 | 
 7 | use_existing_models=false
 8 | pca_dim=150
 9 | simple_length_norm=false # If true, replace the default length normalization
10 | # performed in PLDA  by an alternative that
11 | # normalizes the length of the iVectors to be equal
12 | # to the square root of the iVector dimension.
13 | 
14 | #echo "$0 $@"  # Print the command line for logging
15 | 
16 | if [ -f path.sh ]; then . ./path.sh; fi
17 | . parse_options.sh || exit 1;
18 | 
19 | if [ $# != 5 ]; then
20 | 	echo "Usage: $0 <plda-data-dir> <enroll-data-dir> <test-data-dir> <trials-file> <scores-dir>"
21 | fi
22 | 
23 | plda_data_dir=$1
24 | enroll_data_dir=$2
25 | test_data_dir=$3
26 | trials=$4
27 | scores_dir=$5
28 | 
29 | mkdir -p $plda_data_dir/log
30 | run.pl $plda_data_dir/log/compute_mean.log \
31 | 	ivector-mean ark:$plda_data_dir/xvector.ark \
32 | 	$plda_data_dir/mean.vec || exit 1;
33 | 
34 | run.pl $plda_data_dir/log/pca.log \
35 | 	est-pca --dim=$pca_dim --read-vectors=true --normalize-mean=true \
36 | 	"ark:ivector-subtract-global-mean ark:$plda_data_dir/xvector.ark ark:- |" \
37 | 	$plda_data_dir/transform_pca.mat || exit 1;
38 | 
39 | run.pl $plda_data_dir/log/pca_plda.log \
40 | 	ivector-compute-plda ark:$plda_data_dir/spk2utt \
41 | 	"ark:ivector-subtract-global-mean ark:$plda_data_dir/xvector.ark ark:- | transform-vec $plda_data_dir/transform_pca.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
42 | 	$plda_data_dir/pca_plda || exit 1;
43 | 
44 | mkdir -p $scores_dir/log
45 | run.pl $scores_dir/log/pca_plda_scoring.log \
46 | 	ivector-plda-scoring --normalize-length=true \
47 | 	--num-utts=ark:${enroll_data_dir}/num_utts.ark \
48 | 	"ivector-copy-plda --smoothing=0.0 ${plda_data_dir}/pca_plda - |" \
49 | 	"ark:ivector-subtract-global-mean $plda_data_dir/mean.vec ark:$enroll_data_dir/xvector.ark ark:- | transform-vec $plda_data_dir/transform_pca.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
50 | 	"ark:ivector-subtract-global-mean $plda_data_dir/mean.vec ark:$test_data_dir/xvector.ark ark:- | transform-vec $plda_data_dir/transform_pca.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
51 | 	"cat '$trials' | cut -d\  --fields=1,2 |" $scores_dir/pca_plda_scores || exit 1;
52 | 
53 | rm $plda_data_dir/{transform_pca.mat,pca_plda,mean.vec}
54 | 


--------------------------------------------------------------------------------
/utils/data/perturb_data_dir_volume.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
 4 | # Apache 2.0
 5 | 
 6 | # This script operates on a data directory, such as in data/train/, and modifies
 7 | # the wav.scp to perturb the volume (typically useful for training data when
 8 | # using systems that don't have cepstral mean normalization).
 9 | 
10 | . utils/parse_options.sh
11 | 
12 | if [ $# != 1 ]; then
13 |   echo "Usage: $0 <datadir>"
14 |   echo "e.g.:"
15 |   echo " $0 data/train"
16 |   exit 1
17 | fi
18 | 
19 | export LC_ALL=C
20 | 
21 | data=$1
22 | 
23 | if [ ! -f $data/wav.scp ]; then
24 |   echo "$0: Expected $data/wav.scp to exist"
25 |   exit 1
26 | fi
27 | 
28 | if grep -q "sox --vol" $data/wav.scp; then
29 |   echo "$0: It looks like the data was already volume perturbed.  Not doing anything."
30 |   exit 0
31 | fi
32 | 
33 | cat $data/wav.scp | python -c "
34 | import sys, os, subprocess, re, random
35 | random.seed(0)
36 | scale_low = 1.0/8
37 | scale_high = 2.0
38 | for line in sys.stdin.readlines():
39 |   if len(line.strip()) == 0:
40 |     continue
41 |   # Handle three cases of rxfilenames appropriately; 'input piped command', 'file offset' and 'filename'
42 |   if line.strip()[-1] == '|':
43 |     print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high))
44 |   elif re.search(':[0-9]+$', line.strip()) is not None:
45 |     parts = line.split()
46 |     print '{id} wav-copy {wav} - | sox --vol {vol} -t wav - -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high))
47 |   else:
48 |     parts = line.split()
49 |     print '{id} sox --vol {vol} -t wav {wav} -t wav - |'.format(id = parts[0], wav=' '.join(parts[1:]), vol = random.uniform(scale_low, scale_high))
50 | "  > $data/wav.scp_scaled || exit 1;
51 | 
52 | len1=$(cat $data/wav.scp | wc -l)
53 | len2=$(cat $data/wav.scp_scaled | wc -l)
54 | if [ "$len1" != "$len2" ]; then
55 |   echo "$0: error detected: number of lines changed $len1 vs $len2";
56 |   exit 1
57 | fi
58 | 
59 | mv $data/wav.scp_scaled $data/wav.scp
60 | 
61 | if [ -f $data/feats.scp ]; then
62 |   echo "$0: $data/feats.scp exists; moving it to $data/.backup/ as it wouldn't be valid any more."
63 |   mkdir -p $data/.backup/
64 |   mv $data/feats.scp $data/.backup/
65 | fi
66 | 
67 | echo "$0: added volume perturbation to the data in $data"
68 | exit 0
69 | 
70 | 


--------------------------------------------------------------------------------
/local/lda_plda_scoring.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2015   David Snyder
 3 | #           2019   Lantian Li
 4 | # Apache 2.0.
 5 | #
 6 | # This script trains PLDA models and does scoring.
 7 | 
 8 | lda_dim=150
 9 | covar_factor=0.1
10 | simple_length_norm=false # If true, replace the default length normalization
11 | # performed in PLDA  by an alternative that
12 | # normalizes the length of the iVectors to be equal
13 | # to the square root of the iVector dimension.
14 | 
15 | #echo "$0 $@"  # Print the command line for logging
16 | 
17 | if [ -f path.sh ]; then . ./path.sh; fi
18 | . parse_options.sh || exit 1;
19 | 
20 | if [ $# != 5 ]; then
21 | 	echo "Usage: $0 <plda-data-dir> <enroll-data-dir> <test-data-dir> <trials-file> <scores-dir>"
22 | fi
23 | 
24 | plda_data_dir=$1
25 | enroll_data_dir=$2
26 | test_data_dir=$3
27 | trials=$4
28 | scores_dir=$5
29 | 
30 | mkdir -p $plda_data_dir/log
31 | run.pl $plda_data_dir/log/compute_mean.log \
32 | 	ivector-mean ark:$plda_data_dir/xvector.ark \
33 | 	$plda_data_dir/mean.vec || exit 1;
34 | 
35 | run.pl $plda_data_dir/log/lda.log \
36 | 	ivector-compute-lda --total-covariance-factor=$covar_factor --dim=$lda_dim \
37 | 	"ark:ivector-subtract-global-mean ark:$plda_data_dir/xvector.ark ark:- |" \
38 | 	ark:$plda_data_dir/utt2spk $plda_data_dir/transform_lda.mat || exit 1;
39 | 
40 | run.pl $plda_data_dir/log/lda_plda.log \
41 | 	ivector-compute-plda ark:$plda_data_dir/spk2utt \
42 | 	"ark:ivector-subtract-global-mean ark:$plda_data_dir/xvector.ark ark:- | transform-vec $plda_data_dir/transform_lda.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
43 | 	$plda_data_dir/lda_plda || exit 1;
44 | 
45 | mkdir -p $scores_dir/log
46 | run.pl $scores_dir/log/lda_plda_scoring.log \
47 | 	ivector-plda-scoring --normalize-length=true \
48 | 	--num-utts=ark:${enroll_data_dir}/num_utts.ark \
49 | 	"ivector-copy-plda --smoothing=0.0 ${plda_data_dir}/lda_plda - |" \
50 | 	"ark:ivector-subtract-global-mean $plda_data_dir/mean.vec ark:$enroll_data_dir/xvector.ark ark:- | transform-vec $plda_data_dir/transform_lda.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
51 | 	"ark:ivector-subtract-global-mean $plda_data_dir/mean.vec ark:$test_data_dir/xvector.ark ark:- | transform-vec $plda_data_dir/transform_lda.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
52 | 	"cat '$trials' | cut -d\  --fields=1,2 |" $scores_dir/lda_plda_scores || exit 1;
53 | 
54 | rm $plda_data_dir/{transform_lda.mat,lda_plda,mean.vec}
55 | 


--------------------------------------------------------------------------------
/utils/find_arpa_oovs.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2011 Microsoft Corporation
 3 | 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | if (  @ARGV < 1 && @ARGV > 2) {
19 |     die "Usage: find_arpa_oovs.pl words.txt [lm.arpa]\n";
20 |     # This program finds words in the arpa file that are not symbols
21 |     # in the OpenFst-format symbol table words.txt.  It prints them
22 |     # on the standard output, one per line.
23 | }
24 | 
25 | $symtab = shift @ARGV;
26 | open(S, "<$symtab") || die "Failed opening symbol table file $symtab\n";
27 | while(<S>){
28 |     @A = split(" ", $_);
29 |     @A == 2 || die "Bad line in symbol table file: $_";
30 |     $seen{$A[0]} = 1;
31 | }
32 | 
33 | $found_data=0;
34 | $curgram=0;
35 | while(<>) { # Find the \data\ marker.
36 |     if(m:^\\data\\\s*$:) { $found_data=1; last; }
37 | }
38 | 
39 | if ($found_data==0) {
40 |   print STDERR "find_arpa_oovs.pl: found no \\data\\ marker in the ARPA input.\n";
41 |   exit(1);
42 | }
43 | 
44 | while(<>) {
45 |     if(m/^\\(\d+)\-grams:\s*$/) {
46 |         $curgram = $1;
47 |         if($curgram > 1) {
48 |             last; # This is an optimization as we can get the vocab from the 1-grams
49 |         }
50 |     } elsif($curgram > 0) {
51 |         @A = split(" ", $_);
52 |         if(@A > 1) {
53 |             shift @A;
54 |             for($n=0;$n<$curgram;$n++) {
55 |                 $word = $A[$n];
56 |                 if(!defined $word) { print STDERR "Unusual line $_ (line $.) in arpa file.\n"; }
57 |                 $in_arpa{$word} = 1;
58 |             }
59 |         } else {
60 |             if(@A > 0 && $A[0] !~ m:\\end\\:) {
61 |                 print STDERR "Unusual line $_ (line $.) in arpa file\n";
62 |             }
63 |         }
64 |     }
65 | }
66 | 
67 | foreach $w (keys %in_arpa) {
68 |     if(!defined $seen{$w} && $w ne "<s>" && $w ne "</s>") {
69 |         print "$w\n";
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/utils/prepare_online_nnet_dist_build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2015  Johns Hopkins University (Author: Vijayaditya Peddinti)
 4 | #                 Guoguo Chen
 5 | # Apache 2.0
 6 | # Script to prepare the distribution from the online-nnet build
 7 | 
 8 | other_files= #other files to be included in the build
 9 | other_dirs=
10 | conf_files="ivector_extractor.conf mfcc.conf online_cmvn.conf online_nnet2_decoding.conf splice.conf"
11 | ivec_extractor_files="final.dubm final.ie final.mat global_cmvn.stats online_cmvn.conf splice_opts"
12 | 
13 | echo "$0 $@"  # Print the command line for logging
14 | [ -f path.sh ] && . ./path.sh;
15 | . parse_options.sh || exit 1;
16 | 
17 | if [ $# -ne 3 ]; then
18 |    echo "Usage: $0 <lang-dir> <model-dir> <output-tgz>"
19 |    echo "e.g.: $0 data/lang exp/nnet2_online/nnet_ms_a_online tedlium.tgz"
20 |    exit 1;
21 | fi
22 | 
23 | lang=$1
24 | modeldir=$2
25 | tgzfile=$3
26 | 
27 | for f in $lang/phones.txt $other_files; do
28 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
29 | done
30 | 
31 | build_files=
32 | for d in $modeldir/conf $modeldir/ivector_extractor; do
33 |   [ ! -d $d ] && echo "$0: no such directory $d" && exit 1;
34 | done
35 | 
36 | for f in $ivec_extractor_files; do
37 |   f=$modeldir/ivector_extractor/$f
38 |   [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
39 |   build_files="$build_files $f"
40 | done
41 | 
42 | # Makes a copy of the original config files, as we will change the absolute path
43 | # to relative.
44 | rm -rf $modeldir/conf_abs_path
45 | mkdir -p $modeldir/conf_abs_path
46 | cp -r $modeldir/conf/* $modeldir/conf_abs_path
47 | 
48 | for f in $conf_files; do 
49 |   [ ! -f $modeldir/conf/$f ] && \
50 |     echo "$0: no such file $modeldir/conf/$f" && exit 1;
51 |   # Changes absolute path to relative path. The path entries in the config file
52 |   # are generated by scripts and it is safe to assume that they have structure:
53 |   # variable=path
54 |   cat $modeldir/conf_abs_path/$f | perl -e '
55 |     use File::Spec;
56 |     while(<STDIN>) {
57 |       chomp;
58 |       @col = split("=", $_);
59 |       if (@col == 2 && (-f $col[1])) {
60 |         $col[1] = File::Spec->abs2rel($col[1]);
61 |         print "$col[0]=$col[1]\n";
62 |       } else {
63 |         print "$_\n";
64 |       }
65 |     }
66 |   ' > $modeldir/conf/$f
67 |   build_files="$build_files $modeldir/conf/$f"
68 | done
69 | 
70 | tar -hczvf $tgzfile $lang $build_files $other_files $other_dirs \
71 |   $modeldir/final.mdl $modeldir/tree >/dev/null
72 | 
73 | # Changes back to absolute path.
74 | rm -rf $modeldir/conf
75 | mv $modeldir/conf_abs_path $modeldir/conf
76 | 


--------------------------------------------------------------------------------
/utils/format_lm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -u
 2 | 
 3 | # Copyright 2012  Arnab Ghoshal
 4 | # Copyright 2010-2011  Microsoft Corporation
 5 | 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #  http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
16 | # See the Apache 2 License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | set -o errexit
20 | 
21 | if [ $# -ne 4 ]; then
22 |   echo "Usage: $0 <lang_dir> <arpa-LM> <lexicon> <out_dir>"
23 |   echo "E.g.: $0 data/lang data/local/lm/foo.kn.gz data/local/dict/lexicon.txt data/lang_test"
24 |   echo "Convert ARPA-format language models to FSTs.";
25 |   exit 1;
26 | fi
27 | 
28 | lang_dir=$1
29 | lm=$2
30 | lexicon=$3
31 | out_dir=$4
32 | mkdir -p $out_dir
33 | 
34 | [ -f ./path.sh ] && . ./path.sh
35 | 
36 | echo "Converting '$lm' to FST"
37 | 
38 | for f in phones.txt words.txt topo L.fst L_disambig.fst phones/ oov.int oov.txt; do
39 |   cp -r $lang_dir/$f $out_dir
40 | done
41 | 
42 | lm_base=$(basename $lm '.gz')
43 | gunzip -c $lm \
44 |   | arpa2fst --disambig-symbol=#0 \
45 |              --read-symbol-table=$out_dir/words.txt - $out_dir/G.fst
46 | set +e
47 | fstisstochastic $out_dir/G.fst
48 | set -e
49 | # The output is like:
50 | # 9.14233e-05 -0.259833
51 | # we do expect the first of these 2 numbers to be close to zero (the second is
52 | # nonzero because the backoff weights make the states sum to >1).
53 | 
54 | # Everything below is only for diagnostic.
55 | # Checking that G has no cycles with empty words on them (e.g. <s>, </s>);
56 | # this might cause determinization failure of CLG.
57 | # #0 is treated as an empty word.
58 | mkdir -p $out_dir/tmpdir.g
59 | awk '{if(NF==1){ printf("0 0 %s %s\n", $1,$1); }}
60 |      END{print "0 0 #0 #0"; print "0";}' \
61 |      < "$lexicon" > $out_dir/tmpdir.g/select_empty.fst.txt
62 | 
63 | fstcompile --isymbols=$out_dir/words.txt --osymbols=$out_dir/words.txt \
64 |   $out_dir/tmpdir.g/select_empty.fst.txt \
65 |   | fstarcsort --sort_type=olabel \
66 |   | fstcompose - $out_dir/G.fst > $out_dir/tmpdir.g/empty_words.fst
67 | 
68 | fstinfo $out_dir/tmpdir.g/empty_words.fst | grep cyclic | grep -w 'y' \
69 |   && echo "Language model has cycles with empty words" && exit 1
70 | 
71 | rm -r $out_dir/tmpdir.g
72 | 
73 | 
74 | echo "Succeeded in formatting LM: '$lm'"
75 | 


--------------------------------------------------------------------------------
/utils/convert_slf_parallel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright Brno University of Technology (Author: Karel Vesely) 2014.  Apache 2.0.
 3 | 
 4 | # This script converts lattices to HTK format compatible with other toolkits.
 5 | # We can choose to put words to nodes or arcs, as both is valid in the SLF format.
 6 | 
 7 | # begin configuration section.
 8 | cmd=run.pl
 9 | dirname=lats-in-htk-slf
10 | parallel_opts="-tc 50" # We should limit disk stress
11 | word_to_node=false # Words in arcs or nodes? [default:arcs]
12 | #end configuration section.
13 | 
14 | echo "$0 $@"
15 | 
16 | [ -f ./path.sh ] && . ./path.sh
17 | . parse_options.sh || exit 1;
18 | 
19 | if [ $# -ne 3 ]; then
20 |   echo "Usage: $0 [options] <data-dir> <lang-dir|graph-dir> <decode-dir>"
21 |   echo " Options:"
22 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
23 |   echo "    --word-to-link (true|false)     # put word symbols on links or nodes."
24 |   echo "    --parallel-opts STR             # parallelization options (def.: '-tc 50')."
25 |   echo "e.g.:"
26 |   echo "$0 data/dev data/lang exp/tri4a/decode_dev"
27 |   exit 1;
28 | fi
29 | 
30 | data=$1
31 | lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
32 | dir=$3
33 | 
34 | model=$(dirname $dir)/final.mdl # assume model one level up from decoding dir.
35 | 
36 | for f in $lang/words.txt $lang/phones/align_lexicon.int $model $dir/lat.1.gz; do
37 |   [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
38 | done
39 | 
40 | [ ! -d $dir/$dirname/log ] && mkdir -p $dir/$dirname
41 | 
42 | echo "$0: Converting lattices into '$dir/$dirname'"
43 | 
44 | # Words in arcs or nodes? [default:nodes]
45 | word_to_link_arg=
46 | $word_to_node && word_to_node_arg="--word-to-node"
47 | 
48 | nj=$(cat $dir/num_jobs)
49 | 
50 | # convert the lattices (individually, gzipped)
51 | $cmd $parallel_opts JOB=1:$nj $dir/$dirname/log/lat_convert.JOB.log \
52 |   mkdir -p $dir/$dirname/JOB/ '&&' \
53 |   lattice-align-words-lexicon --output-error-lats=true --output-if-empty=true \
54 |     $lang/phones/align_lexicon.int $model "ark:gunzip -c $dir/lat.JOB.gz |" ark,t:- \| \
55 |   utils/int2sym.pl -f 3 $lang/words.txt \| \
56 |   utils/convert_slf.pl $word_to_node_arg - $dir/$dirname/JOB/ || exit 1
57 | 
58 | # make list of lattices
59 | find -L $PWD/$dir/$dirname -name *.lat.gz > $dir/$dirname/lat_htk.scp || exit 1
60 | 
61 | # check number of lattices:
62 | nseg=$(cat $data/segments | wc -l)
63 | nlat_out=$(cat $dir/$dirname/lat_htk.scp | wc -l)
64 | echo "segments $nseg, saved-lattices $nlat_out"
65 | #
66 | [ $nseg -ne $nlat_out ] && echo "WARNING: missing $((nseg-nlat_out)) lattices for some segments!" \
67 |   && exit 1
68 | 
69 | echo "success, converted lats to HTK : $PWD/$dir/$dirname/lat_htk.scp"
70 | exit 0
71 | 
72 | 


--------------------------------------------------------------------------------
/utils/lang/check_phones_compatible.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2016 Hang Lyu
 3 | 
 4 | # Licensed udner the Apache License, Version 2.0 (the "Lincense");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
11 | # KIND, EITHER EXPRESS OF IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
14 | # See the Apache 2 License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # This script exits with status zero if the phone symbols tables are the same
18 | # except for possible differences in disambiguation symbols (meaning that all
19 | # symbols except those beginning with a # are mapped to the same values).
20 | # Otherwise it prints a warning and exits with status 1.
21 | # For the sake of compatibility with other scripts that did not write the 
22 | # phones.txt to model directories, this script exits silently with status 0 
23 | # if one of the phone symbol tables does not exist.
24 | # For the sake of compatibility with other scripts that did not write the 
25 | # phones.txt to model directories, this script exits silently with status 0 
26 | # if one of the phone symbol tables does not exist.
27 | 
28 | . utils/parse_options.sh || exit 1;
29 | 
30 | if [ $# -ne 2 ]; then
31 |   echo "Usage: utils/lang/check_phones_compatible.sh <phones-symbol-table1> <phones-symbol-table2>"
32 |   echo "e.g.: utils/lang/check_phones_compatible.sh data/lang/phones.txt exp/tri3/phones.txt"
33 |   exit 1;
34 | fi
35 | 
36 | table_first=$1
37 | table_second=$2
38 | 
39 | # check the files exist or not 
40 | if [ ! -f $table_first ]; then
41 |   if [ ! -f $table_second ]; then
42 |     echo "$0: Error! Both of the two phones-symbol tables are absent."
43 |     echo "Please check your command"
44 |     exit 1;
45 |   else
46 |     #The phones-symbol-table1 is absent. The model directory maybe created by old script.
47 |     #For back compatibility, this script exits silently with status 0.
48 |     exit 0;
49 |   fi
50 | elif [ ! -f $table_second ]; then
51 |   #The phones-symbol-table2 is absent. The model directory maybe created by old script.
52 |   #For back compatibility, this script exits silently with status 0.
53 |   exit 0;
54 | fi
55 | 
56 | #Check the two tables are same or not (except for possible difference in disambiguation symbols).
57 | if ! cmp -s <(grep -v "^#" $table_first) <(grep -v "^#" $table_second); then
58 |   echo "$0: phone symbol tables $table_first and $table_second are not compatible."
59 |   exit 1;
60 | fi
61 | 
62 | exit 0;
63 | 


--------------------------------------------------------------------------------
/utils/lang/validate_disambig_sym_file.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2016 FAU Erlangen (Author: Axel Horndasch)
 4 | # Apache 2.0.
 5 | #
 6 | # Concept: Dan Povey
 7 | 
 8 | use strict;
 9 | use warnings;
10 | use Getopt::Long;
11 | 
12 | my $Usage = <<EOU;
13 | Usage:  validate_disambig_sym_file.pl [options] disambig_syms.txt
14 | 
15 | This scripts checks if the entries of a file containing disambiguation symbols
16 | (word or phone level) are all valid. To be valid the symbols
17 | - must start with the hash mark '#',
18 | - must not contain any whitespace,
19 | - must not be equal to '#-1' (disallowed because it is used internally in some
20 |   FST stuff).
21 | 
22 | In case the option '--allow-numeric' is used with 'false', the symbols must
23 | also be non-numeric (to avoid overlap with the automatically generated symbols).
24 | 
25 | Allowed options:
26 |   --allow-numeric (true|false) : Default true. If false, disallow numeric
27 |                                  disambiguation symbols like #0, #1 and so on.
28 | 
29 | EOU
30 | 
31 | # Command line options
32 | my $allow_numeric = "true";
33 | 
34 | # Get the optional command line options
35 | GetOptions(
36 |     "allow-numeric=s" => \$allow_numeric,
37 |     ) or die ($Usage);
38 | 
39 | if (@ARGV != 1) {
40 |   die($Usage);
41 | }
42 | 
43 | my $disambig_sym_file = shift @ARGV;
44 | 
45 | print "$0: Checking validity of file \"$disambig_sym_file\" ...\n";
46 | if (-z $disambig_sym_file) {
47 |   print "$0: The file \"$disambig_sym_file\" is empty or does not exist, exiting ...\n"; exit 1;
48 | }
49 | 
50 | if (not open(SYMS, "<$disambig_sym_file")) {
51 |   print "$0: Could not open file \"$disambig_sym_file\", exiting ...\n"; exit 1;
52 | }
53 | 
54 | # Go through the file containing disambiguation symbols line by line
55 | while (<SYMS>) {
56 |   chomp;
57 |   my $symbol = $_;
58 | 
59 |   if ($symbol =~ /^#(.*)$/) {
60 |     my $sympart = $1;
61 |     if ($sympart eq "") {
62 |       print "$0: Only \"$symbol\" is not allowed as a disambiguation symbol, exiting ...\n"; exit 1;
63 |     }
64 |     if ($sympart =~/\s+/) {
65 |       print "$0: The disambiguation symbol \"$symbol\" contains whitespace, exiting ...\n"; exit 1;
66 |     }
67 |     if ($sympart eq "-1") {
68 |       print "$0: The disambiguation symbol \"$symbol\" is not allowed, exiting ...\n"; exit 1;
69 |     }
70 |     if ($allow_numeric eq "false" &&
71 | 	$sympart =~/^[0-9]+$/) {
72 |       print "$0: Since \"$symbol\" is supposed to be an extra disambiguation symbol, it must not be numeric, exiting ...\n"; exit 1;
73 |     }
74 |   } else {
75 |     print "$0: The disambiguation symbol \"$symbol\" does not start with a '#', exiting ...\n"; exit 1;
76 |   }
77 | }
78 | 
79 | print "--> SUCCESS [validating disambiguation symbol file \"$disambig_sym_file\"]\n";
80 | exit 0;
81 | 
82 | 


--------------------------------------------------------------------------------
/utils/create_split_dir.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2013  Guoguo Chen
 4 | # Apache 2.0.
 5 | #
 6 | # This script creates storage directories on different file systems, and creates
 7 | # symbolic links to those directories. For example, a command
 8 | #
 9 | #   utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage
10 | #
11 | # will mkdir -p all of those directories, and will create links
12 | #
13 | #   egs/storage/1 -> /export/gpu-03/egs/storage
14 | #   egs/storage/2 -> /export/gpu-03/egs/storage
15 | #   ...
16 | #
17 | use strict;
18 | use warnings;
19 | use File::Spec;
20 | use Getopt::Long;
21 | 
22 | my $Usage = <<EOU;
23 | create_split_dir.pl:
24 | This script creates storage directories on different file systems, and creates
25 | symbolic links to those directories.
26 | 
27 | Usage: utils/create_split_dir.pl <actual_storage_dirs> <pseudo_storage_dir>
28 |  e.g.: utils/create_split_dir.pl /export/gpu-0{3,4,5}/egs/storage egs/storage
29 | 
30 | Allowed options:
31 |   --suffix    : Common suffix to <actual_storage_dirs>    (string, default = "")
32 | 
33 | See also create_data_link.pl, which is intended to work with the resulting
34 | directory structure, and remove_data_links.sh
35 | EOU
36 | 
37 | my $suffix="";
38 | GetOptions('suffix=s' => \$suffix);
39 | 
40 | if (@ARGV < 2) {
41 |   die $Usage;
42 | }
43 | 
44 | my $ans = 1;
45 | 
46 | my $dir = pop(@ARGV);
47 | system("mkdir -p $dir 2>/dev/null");
48 | 
49 | my @all_actual_storage = ();
50 | foreach my $file (@ARGV) {
51 |   push @all_actual_storage, File::Spec->rel2abs($file . "/" . $suffix);
52 | }
53 | 
54 | my $index = 1;
55 | foreach my $actual_storage (@all_actual_storage) {
56 |   my $pseudo_storage = "$dir/$index";
57 | 
58 |   # If the symbolic link already exists, delete it.
59 |   if (-l $pseudo_storage) {
60 |     print STDERR "$0: link $pseudo_storage already exists, not overwriting.\n";
61 |     $index++;
62 |     next;
63 |   }
64 | 
65 |   # Create the destination directory and make the link.
66 |   system("mkdir -p $actual_storage 2>/dev/null");
67 |   if ($? != 0) {
68 |     print STDERR "$0: error creating directory $actual_storage\n";
69 |     exit(1);
70 |   }
71 |   { # create a README file for easier deletion.
72 |     open(R, ">$actual_storage/README.txt");
73 |     my $storage_dir = File::Spec->rel2abs($dir);
74 |     print R "# This directory is linked from $storage_dir, as part of Kaldi striped data\n";
75 |     print R "# The full list of directories where this data resides is:\n";
76 |     foreach my $d (@all_actual_storage) {
77 |       print R "$d\n";
78 |     }
79 |     close(R);
80 |   }
81 |   my $ret = symlink($actual_storage, $pseudo_storage);
82 | 
83 |   # Process the returned values
84 |   $ans = $ans && $ret;
85 |   if (! $ret) {
86 |     print STDERR "Error linking $actual_storage to $pseudo_storage\n";
87 |   }
88 | 
89 |   $index++;
90 | }
91 | 
92 | exit($ans == 1 ? 0 : 1);
93 | 


--------------------------------------------------------------------------------
/utils/apply_map.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use warnings; #sed replacement for -w perl parameter
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 4 | # Apache 2.0.
 5 | 
 6 | # This program is a bit like ./sym2int.pl in that it applies a map
 7 | # to things in a file, but it's a bit more general in that it doesn't
 8 | # assume the things being mapped to are single tokens, they could
 9 | # be sequences of tokens.  See the usage message.
10 | 
11 | 
12 | if (@ARGV > 0 && $ARGV[0] eq "-f") {
13 |   shift @ARGV;
14 |   $field_spec = shift @ARGV; 
15 |   if ($field_spec =~ m/^\d+$/) {
16 |     $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
17 |   }
18 |   if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
19 |     if ($1 ne "") {
20 |       $field_begin = $1 - 1;    # Change to zero-based indexing.
21 |     }
22 |     if ($2 ne "") {
23 |       $field_end = $2 - 1;      # Change to zero-based indexing.
24 |     }
25 |   }
26 |   if (!defined $field_begin && !defined $field_end) {
27 |     die "Bad argument to -f option: $field_spec"; 
28 |   }
29 | }
30 | 
31 | # Mapping is obligatory
32 | $permissive = 0;
33 | if (@ARGV > 0 && $ARGV[0] eq '--permissive') {
34 |   shift @ARGV;
35 |   # Mapping is optional (missing key is printed to output)
36 |   $permissive = 1;
37 | }
38 | 
39 | if(@ARGV != 1) {
40 |   print STDERR "Invalid usage: " . join(" ", @ARGV) . "\n";
41 |   print STDERR "Usage: apply_map.pl [options] map <input >output\n" .
42 |     "options: [-f <field-range> ]\n" .
43 |     "Applies the map 'map' to all input text, where each line of the map\n" .
44 |     "is interpreted as a map from the first field to the list of the other fields\n" .
45 |     "Note: <field-range> can look like 4-5, or 4-, or 5-, or 1, it means the field\n" .
46 |     "range in the input to apply the map to.\n" .
47 |     "e.g.: echo A B | apply_map.pl a.txt\n" .
48 |     "where a.txt is:\n" .
49 |     "A a1 a2\n" .
50 |     "B b\n" .
51 |     "will produce:\n" .
52 |     "a1 a2 b\n";
53 |   exit(1);
54 | }
55 | 
56 | ($map) = @ARGV;
57 | open(M, "<$map") || die "Error opening map file $map: $!";
58 | 
59 | while (<M>) {
60 |   @A = split(" ", $_);
61 |   @A >= 1 || die "apply_map.pl: empty line.";
62 |   $i = shift @A;
63 |   $o = join(" ", @A);
64 |   $map{$i} = $o;
65 | }
66 | 
67 | while(<STDIN>) {
68 |   @A = split(" ", $_);
69 |   for ($x = 0; $x < @A; $x++) {
70 |     if ( (!defined $field_begin || $x >= $field_begin)
71 |          && (!defined $field_end || $x <= $field_end)) {
72 |       $a = $A[$x];
73 |       if (!defined $map{$a}) {
74 |         if (!$permissive) {
75 |           die "apply_map.pl: undefined key $a\n"; 
76 |         } else {
77 |           print STDERR "apply_map.pl: warning! missing key $a\n";
78 |         }
79 |       } else {
80 |         $A[$x] = $map{$a}; 
81 |       }
82 |     }
83 |   }
84 |   print join(" ", @A) . "\n";
85 | }
86 | 


--------------------------------------------------------------------------------
/utils/lang/check_g_properties.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use IPC::Open2;
 4 | 
 5 | if (@ARGV != 1) {
 6 |   print "Usage: $0 [options] <lang_directory>\n";
 7 |   print "e.g.:  $0 data/lang\n";
 8 |   exit(1);
 9 | }
10 | 
11 | $lang = shift @ARGV;
12 | 
13 | # This script checks that G.fst in the lang.fst directory is OK with respect
14 | # to certain expected properties, and returns nonzero exit status if a problem was
15 | # detected.  It is called from validate_lang.pl.
16 | # This only checks the properties of G that relate to disambiguation symbols,
17 | # epsilons and forbidden symbols <s> and </s>.
18 | 
19 | if (! -e "$lang/G.fst") {
20 |   print "$0: error: $lang/G.fst does not exist\n";
21 |   exit(1);
22 | }
23 | 
24 | open(W, "<$lang/words.txt") || die "opening $lang/words.txt";
25 | $hash_zero = -1;
26 | while (<W>) {
27 |   @A = split(" ", $_);
28 |   ($sym, $int) = @A;
29 |   if ($sym eq "<s>" || $sym eq "</s>") { $is_forbidden{$int} = 1; }
30 |   if ($sym eq "#0") { $hash_zero = $int; }
31 | }
32 | 
33 | if (-e "$lang/phones/wdisambig_words.int") {
34 |   open(F, "<$lang/phones/wdisambig_words.int") || die "opening $lang/phones/wdisambig_words.int";
35 |   while (<F>) {
36 |     chop;
37 |     $is_disambig{$_} = 1;
38 |   }
39 | } else {
40 |   $is_disambig{$hash_zero} = 1;
41 | }
42 | 
43 | $input_cmd = ". ./path.sh; fstprint $lang/G.fst|";
44 | open(G, $input_cmd) || die "running command $input_cmd";
45 | 
46 | $info_cmd = ". ./path.sh; fstcompile | fstinfo ";
47 | open2(O, I, "$info_cmd") || die "running command $info_cmd";
48 | 
49 | $has_epsilons = 0;
50 | 
51 | while (<G>) {
52 |   @A = split(" ", $_);
53 |   if (@A >= 4) {
54 |     if ($is_forbidden{$A[2]} || $is_forbidden{$A[3]}) {
55 |       chop;
56 |       print "$0: validating $lang: error: line $_ in G.fst contains forbidden symbol <s> or </s>\n";
57 |       exit(1);
58 |     } elsif ($is_disambig{$A[2]}) {
59 |       print I $_;
60 |       if ($A[3] != 0) {
61 |         chop;
62 |         print "$0: validating $lang: error: line $_ in G.fst has disambig on input but no epsilon on output\n";
63 |         exit(1);
64 |       }
65 |     } elsif ($A[2] == 0) {
66 |       print I $_;
67 |       $has_epsilons = 1;
68 |     } elsif ($A[2] != $A[3]) {
69 |       chop;
70 |       print "$0: validating $lang: error: line $_ in G.fst has inputs and outputs different but input is not disambig symbol.\n";
71 |       exit(1);
72 |     }
73 |   }
74 | }
75 | 
76 | close(I);  # tell 'fstcompile | fstinfo' pipeline that its input is done.
77 | while (<O>) {
78 |   if (m/cyclic\s+y/) {
79 |     print "$0: validating $lang: error: G.fst has cycles containing only disambig symbols and epsilons.  Would cause determinization failure\n";
80 |     exit(1);
81 |   }
82 | }
83 | 
84 | if ($has_epsilons) {
85 |   print "$0: warning: validating $lang: G.fst has epsilon-input arcs.  We don't expect these in most setups.\n";
86 | }
87 | 
88 | print "--> $0 successfully validated $lang/G.fst\n";
89 | exit(0);
90 | 


--------------------------------------------------------------------------------
/eer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright   2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 3 | #             2017   Johns Hopkins University (Author: Daniel Povey)
 4 | #        2017-2018   David Snyder
 5 | #             2018   Ewald Enzinger
 6 | #             2019   Tsinghua University (Author: Lantian Li)
 7 | #             2019   Yang Zhang
 8 | # Apache 2.0.
 9 | #
10 | # This is an x-vector-based recipe for Speakers in the Wild (SITW).
11 | 
12 | . ./path.sh
13 | 
14 | 
15 | for sub in dev eval; do
16 |     # Cosine metric.
17 |     echo "Test on SITW $sub:"
18 |     
19 |     local/cosine_scoring.sh data/sitw_$sub/enroll \
20 |                             data/sitw_$sub/test \
21 |                             data/sitw_$sub/test/core-core.lst \
22 |                             data/sitw_$sub/foo
23 |     
24 |     eer=$(paste data/sitw_$sub/test/core-core.lst data/sitw_$sub/foo/cosine_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
25 |     echo "Cosine EER: $eer%"
26 |     
27 |     # Create a PLDA model and do scoring.
28 |     local/plda_scoring.sh   data/voxceleb_combined_200000 \
29 |                             data/sitw_$sub/enroll \
30 |                             data/sitw_$sub/test \
31 |                             data/sitw_$sub/test/core-core.lst \
32 |                             data/sitw_$sub/foo
33 |     
34 |     eer=$(paste data/sitw_$sub/test/core-core.lst data/sitw_$sub/foo/plda_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
35 |     echo "PLDA EER: $eer%"
36 |     
37 |     # Create a LDA-PLDA model and do scoring.
38 |     for lda_dim in 150;do
39 |         
40 |         local/lda_plda_scoring.sh --lda-dim $lda_dim --covar-factor 0.0 \
41 |                                     data/voxceleb_combined_200000 \
42 |                                     data/sitw_$sub/enroll \
43 |                                     data/sitw_$sub/test \
44 |                                     data/sitw_$sub/test/core-core.lst \
45 |                                     data/sitw_$sub/foo
46 |                                     eer=$(paste data/sitw_$sub/test/core-core.lst data/sitw_$sub/foo/lda_plda_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
47 |                                     echo "LDA_PLDA EER(${lda_dim}): $eer%"
48 |                                     
49 |     done
50 |     
51 |     # Create a PCA-PLDA model and do scoring.
52 |     for pca_dim in 150;do
53 |         
54 |         local/pca_plda_scoring.sh --pca-dim $pca_dim \
55 |                                     data/voxceleb_combined_200000 \
56 |                                     data/sitw_$sub/enroll \
57 |                                     data/sitw_$sub/test \
58 |                                     data/sitw_$sub/test/core-core.lst \
59 |                                     data/sitw_$sub/foo
60 |         
61 |         eer=$(paste data/sitw_$sub/test/core-core.lst data/sitw_$sub/foo/pca_plda_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
62 |         echo "PCA_PLDA EER(${pca_dim}): $eer%"
63 |     done
64 |     
65 |     echo
66 | done
67 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # v-vector-tf
  2 | 
  3 | Tensorflow and kaldi implementation of our Interspeech2019 paper [VAE-based regularization for deep speaker embedding](https://github.com/zyzisyz/v-vector-tf/raw/master/paper.pdf)
  4 | 
  5 | **note: the repo is not the final release, I will clean up our experiemental code and update soon**
  6 | 
  7 | ## Dependency
  8 | 
  9 | 1. computer
 10 | 2. Linux (centos 7)
 11 | 3. conda (Python 3.6)
 12 | 4. Tensorflow-gpu 1.8
 13 | 5. kaldi-toolkit
 14 | 
 15 | ## Datasets and X-vector
 16 | 
 17 | 1. VoxCeleb
 18 | 2. SITW
 19 | 3. CSLT_SITW
 20 | 
 21 | ## Steps
 22 | 
 23 | 1. use kaldi to extract x-vector from uttrance and get `xvector.ark` files
 24 | 2. covert the kaldi `xvector.ark` files to numpy binary data format (`xvector.ark` -> `xvector.npz`)
 25 | 3. use tensorflow to train a VAE model, and get the V-vectors
 26 | 4. use kaldi recipes to calculate EER (equal error rate)
 27 | 
 28 | ## Usage
 29 | 
 30 | 1. [install kaldi](https://github.com/kaldi-asr/kaldi) (note: if you are one of CSLT members, you can referance[Dr. tzy's Kaldi](https://github.com/tzyll/kaldi) or [CSLT Kaldi](https://github.com/csltstu/kaldi))
 31 | 
 32 | 2. create a conda environment and install the necessary Python package
 33 | 
 34 | ```bash
 35 | # for example
 36 | conda create -n tf python=3.6
 37 | conda activate tf
 38 | pip install -r requirements.txt
 39 | ```
 40 | 
 41 | 3. git clone the code and modify the `path.sh`, make sure that `path.sh` contains your kaldi path
 42 | 
 43 | ```bash
 44 | git clone https://github.com/zyzisyz/v-vector-tf.git
 45 | 
 46 | # edit path.sh
 47 | vim path.sh
 48 | # export KALDI_ROOT=${replace it by your kaldi root path}
 49 | ```
 50 | 
 51 | 4. calculate baseline EER
 52 | 
 53 | ```bash
 54 | bash baseline.sh
 55 | ```
 56 | 
 57 | 5. Train a model
 58 | 
 59 | ```bash
 60 | # first of all, activate the conda Python environment
 61 | conda activate tf
 62 | # you can edit train.sh to change VAE model's config
 63 | bash train.sh
 64 | ```
 65 | 
 66 | 6. Use kaldi-toolkit to train the backend scoring model and calculate EER
 67 | 
 68 | ```bash
 69 | bash eval.sh
 70 | ```
 71 | 
 72 | ## Our result
 73 | 
 74 | SITW Dev. Core
 75 | 
 76 | |          |  Cosine  |   PCA    |   PLDA   |  L-PLDA  |  P-PLDA  |
 77 | | :------: | :------: | :------: | :------: | :------: | :------: |
 78 | | x-vector |  15.67   |  16.17   |   9.09   | **3.12** |   4.16   |
 79 | | a-vector |  16.10   |  16.48   |  11.21   |   4.24   |   5.01   |
 80 | | v-vector |  10.32   |   9.94   |   3.62   |   3.54   |   4.31   |
 81 | | c-vector | **9.05** | **8.55** | **3.50** |   3.31   | **3.85** |
 82 | 
 83 | Read the paper for more detail
 84 | 
 85 | ## About
 86 | 
 87 | Licensed under the Apache License, Version 2.0, Copyright [zyzisyz](https://github.com/zyzisyz)
 88 | 
 89 | ### Repo Author
 90 | 
 91 | Yang Zhang (zyziszy@foxmail.com)
 92 | 
 93 | ### Contributors
 94 | 
 95 | - [@Lilt](http://166.111.134.19:8081/lilt/)
 96 | - [@fatejessie](https://github.com/fatejessie)
 97 | - [@xDarkLemon](https://github.com/xDarkLemon)
 98 | - [@AlanXiuxiu](https://github.com/AlanXiuxiu)
 99 | - @Z.K.
100 | 


--------------------------------------------------------------------------------
/utils/filter_scp.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Copyright 2010-2012 Microsoft Corporation
 3 | #                     Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | # This script takes a list of utterance-ids or any file whose first field
20 | # of each line is an utterance-id, and filters an scp
21 | # file (or any file whose "n-th" field is an utterance id), printing
22 | # out only those lines whose "n-th" field is in id_list. The index of
23 | # the "n-th" field is 1, by default, but can be changed by using
24 | # the -f <n> switch
25 | 
26 | $exclude = 0;
27 | $field = 1;
28 | $shifted = 0;
29 | 
30 | do {
31 |   $shifted=0;
32 |   if ($ARGV[0] eq "--exclude") {
33 |     $exclude = 1;
34 |     shift @ARGV;
35 |     $shifted=1;
36 |   }
37 |   if ($ARGV[0] eq "-f") {
38 |     $field = $ARGV[1];
39 |     shift @ARGV; shift @ARGV;
40 |     $shifted=1
41 |   }
42 | } while ($shifted);
43 | 
44 | if(@ARGV < 1 || @ARGV > 2) {
45 |   die "Usage: filter_scp.pl [--exclude] [-f <field-to-filter-on>] id_list [in.scp] > out.scp \n" .
46 |       "Prints only the input lines whose f'th field (default: first) is in 'id_list'.\n" .
47 |       "Note: only the first field of each line in id_list matters.  With --exclude, prints\n" .
48 |       "only the lines that were *not* in id_list.\n" .
49 |       "Caution: previously, the -f option was interpreted as a zero-based field index.\n" .
50 |       "If your older scripts (written before Oct 2014) stopped working and you used the\n" .
51 |       "-f option, add 1 to the argument.\n" .
52 |       "See also: utils/filter_scp.pl .\n";
53 | }
54 | 
55 | 
56 | $idlist = shift @ARGV;
57 | open(F, "<$idlist") || die "Could not open id-list file $idlist";
58 | while(<F>) {
59 |   @A = split;
60 |   @A>=1 || die "Invalid id-list file line $_";
61 |   $seen{$A[0]} = 1;
62 | }
63 | 
64 | if ($field == 1) { # Treat this as special case, since it is common.
65 |   while(<>) {
66 |     $_ =~ m/\s*(\S+)\s*/ || die "Bad line $_, could not get first field.";
67 |     # $1 is what we filter on.
68 |     if ((!$exclude && $seen{$1}) || ($exclude && !defined $seen{$1})) {
69 |       print $_;
70 |     }
71 |   }
72 | } else {
73 |   while(<>) {
74 |     @A = split;
75 |     @A > 0 || die "Invalid scp file line $_";
76 |     @A >= $field || die "Invalid scp file line $_";
77 |     if ((!$exclude && $seen{$A[$field-1]}) || ($exclude && !defined $seen{$A[$field-1]})) {
78 |       print $_;
79 |     }
80 |   }
81 | }
82 | 
83 | # tests:
84 | # the following should print "foo 1"
85 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl <(echo foo)
86 | # the following should print "bar 2".
87 | # ( echo foo 1; echo bar 2 ) | utils/filter_scp.pl -f 2 <(echo 2)
88 | 


--------------------------------------------------------------------------------
/utils/gen_topo.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 4 | 
 5 | # Generate a topology file.  This allows control of the number of states in the
 6 | # non-silence HMMs, and in the silence HMMs.
 7 | 
 8 | if (@ARGV != 4) {
 9 |   print STDERR "Usage: utils/gen_topo.pl <num-nonsilence-states> <num-silence-states> <colon-separated-nonsilence-phones> <colon-separated-silence-phones>\n";
10 |   print STDERR "e.g.:  utils/gen_topo.pl 3 5 4:5:6:7:8:9:10 1:2:3\n";
11 |   exit (1);
12 | }
13 | 
14 | ($num_nonsil_states, $num_sil_states, $nonsil_phones, $sil_phones) = @ARGV;
15 | 
16 | ( $num_nonsil_states >= 1 && $num_nonsil_states <= 100 ) ||
17 |   die "Unexpected number of nonsilence-model states $num_nonsil_states\n";
18 | (( $num_sil_states == 1 || $num_sil_states >= 3) && $num_sil_states <= 100 ) ||
19 |   die "Unexpected number of silence-model states $num_sil_states\n";
20 | 
21 | $nonsil_phones =~ s/:/ /g;
22 | $sil_phones =~ s/:/ /g;
23 | $nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n";
24 | $sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n";
25 | 
26 | print "<Topology>\n";
27 | print "<TopologyEntry>\n";
28 | print "<ForPhones>\n";
29 | print "$nonsil_phones\n";
30 | print "</ForPhones>\n";
31 | for ($state = 0; $state < $num_nonsil_states; $state++) {
32 |   $statep1 = $state+1;
33 |   print "<State> $state <PdfClass> $state <Transition> $state 0.75 <Transition> $statep1 0.25 </State>\n";
34 | }
35 | print "<State> $num_nonsil_states </State>\n"; # non-emitting final state.
36 | print "</TopologyEntry>\n";
37 | # Now silence phones.  They have a different topology-- apart from the first and
38 | # last states, it's fully connected, as long as you have >= 3 states.
39 | 
40 | if ($num_sil_states > 1) {
41 |   $transp = 1.0 / ($num_sil_states-1);
42 |   print "<TopologyEntry>\n";
43 |   print "<ForPhones>\n";
44 |   print "$sil_phones\n";
45 |   print "</ForPhones>\n";
46 |   print "<State> 0 <PdfClass> 0 ";
47 |   for ($nextstate = 0; $nextstate < $num_sil_states-1; $nextstate++) { # Transitions to all but last
48 |     # emitting state.
49 |     print "<Transition> $nextstate $transp ";
50 |   }
51 |   print "</State>\n";
52 |   for ($state = 1; $state < $num_sil_states-1; $state++) { # the central states all have transitions to
53 |     # themselves and to the last emitting state.
54 |     print "<State> $state <PdfClass> $state ";
55 |     for ($nextstate = 1; $nextstate < $num_sil_states; $nextstate++) {
56 |       print "<Transition> $nextstate $transp ";
57 |     }
58 |     print "</State>\n";
59 |   }
60 |   # Final emitting state (non-skippable).
61 |   $state = $num_sil_states-1;
62 |   print "<State> $state <PdfClass> $state <Transition> $state 0.75 <Transition> $num_sil_states 0.25 </State>\n";
63 |   # Final nonemitting state:
64 |   print "<State> $num_sil_states </State>\n";
65 |   print "</TopologyEntry>\n";
66 | } else {
67 |   print "<TopologyEntry>\n";
68 |   print "<ForPhones>\n";
69 |   print "$sil_phones\n";
70 |   print "</ForPhones>\n";
71 |   print "<State> 0 <PdfClass> 0 ";
72 |   print "<Transition> 0 0.75 ";
73 |   print "<Transition> 1 0.25 ";
74 |   print "</State>\n";
75 |   print "<State> $num_nonsil_states </State>\n"; # non-emitting final state.
76 |   print "</TopologyEntry>\n";
77 | }
78 | 
79 | print "</Topology>\n";
80 | 


--------------------------------------------------------------------------------
/utils/subset_scp.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | use warnings; #sed replacement for -w perl parameter
  3 | # Copyright 2010-2011 Microsoft Corporation
  4 | 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #  http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
 15 | # See the Apache 2 License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | # This program selects a subset of N elements in the scp.
 19 | 
 20 | # By default, it selects them evenly from throughout the scp, in order to avoid
 21 | # selecting too many from the same speaker.  It prints them on the standard
 22 | # output.
 23 | # With the option --first, it just selects the N first utterances.
 24 | # With the option --last, it just selects the N last utterances.
 25 | 
 26 | # Last modified by JHU & HKUST @2013
 27 | 
 28 | 
 29 | $quiet = 0;
 30 | $first = 0;
 31 | $last = 0;
 32 | 
 33 | if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
 34 |   shift;
 35 |   $quiet = 1;
 36 | }
 37 | if (@ARGV > 0 && $ARGV[0] eq "--first") {
 38 |   shift;
 39 |   $first = 1;
 40 | }
 41 | if (@ARGV > 0 && $ARGV[0] eq "--last") {
 42 |   shift;
 43 |   $last = 1;
 44 | }
 45 | 
 46 | if(@ARGV < 2 ) {
 47 |     die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
 48 |         " --quiet  causes it to not die if N < num lines in scp.\n" .
 49 |         " --first and --last make it equivalent to head or tail.\n" .
 50 |         "See also: filter_scp.pl\n";
 51 | }
 52 | 
 53 | $N = shift @ARGV;
 54 | if($N == 0) {
 55 |     die "First command-line parameter to subset_scp.pl must be an integer, got \"$N\"";
 56 | }
 57 | $inscp = shift @ARGV;
 58 | open(I, "<$inscp") || die "Opening input scp file $inscp";
 59 | 
 60 | @F = ();
 61 | while(<I>) {
 62 |     push @F, $_;
 63 | }
 64 | $numlines = @F;
 65 | if($N > $numlines) {
 66 |   if ($quiet) {
 67 |     $N = $numlines;
 68 |   } else {
 69 |     die "You requested from subset_scp.pl more elements than available: $N > $numlines";
 70 |   }
 71 | }
 72 | 
 73 | sub select_n {
 74 |   my ($start,$end,$num_needed) = @_;
 75 |   my $diff = $end - $start;
 76 |   if ($num_needed > $diff) {
 77 |     die "select_n: code error";
 78 |   }
 79 |   if ($diff == 1 ) {
 80 |     if ($num_needed  > 0) {
 81 |       print $F[$start];
 82 |     }
 83 |   } else {
 84 |     my $halfdiff = int($diff/2);
 85 |     my $halfneeded = int($num_needed/2);
 86 |     select_n($start, $start+$halfdiff, $halfneeded);
 87 |     select_n($start+$halfdiff, $end, $num_needed - $halfneeded);
 88 |   }
 89 | }
 90 | 
 91 | if ( ! $first && ! $last) {
 92 |   if ($N > 0) {
 93 |     select_n(0, $numlines, $N);
 94 |   }
 95 | } else {
 96 |   if ($first) { # --first option: same as head.
 97 |     for ($n = 0; $n < $N; $n++) {
 98 |       print $F[$n];
 99 |     }
100 |   } else { # --last option: same as tail.
101 |     for ($n = @F - $N; $n < @F; $n++) {
102 |       print $F[$n];
103 |     }
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/utils/convert_ctm.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 4 | 
 5 | # This takes as standard input a ctm file that's "relative to the utterance",
 6 | # i.e. times are measured relative to the beginning of the segments, and it
 7 | # uses a "segments" file (format:
 8 | # utterance-id recording-id start-time end-time
 9 | # ) and a "reco2file_and_channel" file (format:
10 | # recording-id basename-of-file
11 | 
12 | $skip_unknown=undef;
13 | if ( $ARGV[0] eq "--skip-unknown" ) {
14 |   $skip_unknown=1;
15 |   shift @ARGV;
16 | }
17 | 
18 | if (@ARGV < 2 || @ARGV > 3) {
19 |   print STDERR "Usage: convert_ctm.pl <segments-file> <reco2file_and_channel-file> [<utterance-ctm>] > real-ctm\n";
20 |   exit(1);
21 | }
22 | 
23 | $segments = shift @ARGV;
24 | $reco2file_and_channel = shift @ARGV;
25 | 
26 | open(S, "<$segments") || die "opening segments file $segments";
27 | while(<S>) {
28 |   @A = split(" ", $_);
29 |   @A == 4 || die "Bad line in segments file: $_";
30 |   ($utt, $recording_id, $begin_time, $end_time) = @A;
31 |   $utt2reco{$utt} = $recording_id;
32 |   $begin{$utt} = $begin_time;
33 |   $end{$utt} = $end_time;
34 | }
35 | close(S);
36 | open(R, "<$reco2file_and_channel") || die "open reco2file_and_channel file $reco2file_and_channel";
37 | while(<R>) {
38 |   @A = split(" ", $_);
39 |   @A == 3 || die "Bad line in reco2file_and_channel file: $_";
40 |   ($recording_id, $file, $channel) = @A;
41 |   $reco2file{$recording_id} = $file;
42 |   $reco2channel{$recording_id} = $channel;
43 | }
44 | 
45 | 
46 | # Now process the ctm file, which is either the standard input or the third
47 | # command-line argument.
48 | $num_done = 0;
49 | while(<>) {
50 |   @A= split(" ", $_);
51 |   ( @A == 5 || @A == 6 ) || die "Unexpected ctm format: $_";
52 |   # lines look like:
53 |   # <utterance-id> 1 <begin-time> <length> <word> [ confidence ]
54 |   ($utt, $one, $wbegin, $wlen, $w, $conf) = @A;
55 |   $reco = $utt2reco{$utt};
56 |   if (!defined $reco) { 
57 |       next if defined $skip_unknown;
58 |       die "Utterance-id $utt not defined in segments file $segments"; 
59 |   }
60 |   $file = $reco2file{$reco};
61 |   $channel = $reco2channel{$reco};
62 |   if (!defined $file || !defined $channel) { 
63 |     die "Recording-id $reco not defined in reco2file_and_channel file $reco2file_and_channel"; 
64 |   }
65 |   $b = $begin{$utt};
66 |   $e = $end{$utt};
67 |   $wbegin_r = $wbegin + $b; # Make it relative to beginning of the recording.
68 |   $wbegin_r = sprintf("%.2f", $wbegin_r);
69 |   $wlen = sprintf("%.2f", $wlen);
70 |   if (defined $conf) {
71 |     $line = "$file $channel $wbegin_r $wlen $w $conf\n"; 
72 |   } else {
73 |     $line = "$file $channel $wbegin_r $wlen $w\n"; 
74 |   }
75 |   if ($wbegin_r + $wlen > $e + 0.01) {
76 |     print STDERR "Warning: word appears to be past end of recording; line is $line";
77 |   }
78 |   print $line; # goes to stdout.
79 |   $num_done++;
80 | }
81 | 
82 | if ($num_done == 0) { exit 1; } else { exit 0; }
83 | 
84 | __END__
85 | 
86 | # Test example [also test it without the 0.5's]
87 | echo utt reco 10.0 20.0 > segments
88 | echo reco file A > reco2file_and_channel
89 | echo utt 1 8.0 1.0 word 0.5 > ctm_in
90 | echo file A 18.00 1.00 word 0.5 > ctm_out
91 | utils/convert_ctm.pl segments reco2file_and_channel ctm_in | cmp - ctm_out || echo error
92 | rm segments reco2file_and_channel ctm_in ctm_out
93 | 
94 | 
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/utils/summarize_logs.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
  4 | 
  5 | #scalar(@ARGV) >= 1 && print STDERR "Usage: summarize_warnings.pl <log-dir>\n" && exit 1;
  6 | 
  7 | sub split_hundreds { # split list of filenames into groups of 100.
  8 |   my $names = shift @_;
  9 |   my @A = split(" ", $names);
 10 |   my @ans = ();
 11 |   while (@A > 0) {
 12 |     my $group = "";
 13 |     for ($x = 0; $x < 100 && @A>0; $x++) {
 14 |       $fname = pop @A;
 15 |       $group .= "$fname ";
 16 |     }
 17 |     push @ans, $group;
 18 |   }
 19 |   return @ans;
 20 | }
 21 | 
 22 | sub parse_accounting_entry {
 23 |   $entry= shift @_;
 24 | 
 25 |   @elems = split " ", $entry;
 26 |   
 27 |   $time=undef;
 28 |   $threads=undef;
 29 |   foreach $elem (@elems) {
 30 |     if ( $elem=~ m/time=(\d+)/ ) {
 31 |       $elem =~ s/time=(\d+)/$1/;
 32 |       $time = $elem;
 33 |     } elsif ( $elem=~ m/threads=(\d+)/ ) {
 34 |       $elem =~ s/threads=(\d+)/$1/g;
 35 |       $threads = $elem;
 36 |     } else {
 37 |       die "Unknown entry \"$elem\" when parsing \"$entry\" \n";
 38 |     }
 39 |   }
 40 | 
 41 |   if (defined($time) and defined($threads) ) {
 42 |     return ($time, $threads);
 43 |   } else {
 44 |     die "The accounting entry \"$entry\" did not contain all necessary attributes";
 45 |   }
 46 | }
 47 | 
 48 | foreach $dir (@ARGV) {
 49 | 
 50 |   #$dir = $ARGV[0];
 51 |   print $dir
 52 | 
 53 |   ! -d $dir && print STDERR "summarize_warnings.pl: no such directory $dir\n" ;
 54 | 
 55 |   $dir =~ s:/$::; # Remove trailing slash.
 56 | 
 57 | 
 58 |   # Group the files into categories where all have the same base-name.
 59 |   foreach $f (glob ("$dir/*.log")) {
 60 |     $f_category = $f;
 61 |     # do next expression twice; s///g doesn't work as they overlap.
 62 |     $f_category =~ s:\.\d+\.(?!\d+):.*.:;
 63 |     #$f_category =~ s:\.\d+\.:.*.:;
 64 |     $fmap{$f_category} .= " $f";
 65 |   }
 66 | }
 67 | 
 68 | foreach $c (sort (keys %fmap) ) {
 69 |   $n = 0;
 70 |   foreach $fgroup (split_hundreds($fmap{$c})) {
 71 |     $n += `grep -w WARNING $fgroup | wc -l`;
 72 |   }
 73 |   if ($n != 0) {
 74 |     print "$n warnings in $c\n"
 75 |   }
 76 | }
 77 | foreach $c (sort (keys %fmap)) {
 78 |   $n = 0;
 79 |   foreach $fgroup (split_hundreds($fmap{$c})) {
 80 |     $n += `grep -w ERROR $fgroup | wc -l`;
 81 |   }
 82 |   if ($n != 0) {
 83 |     print "$n errors in $c\n"
 84 |   }
 85 | }
 86 | 
 87 | $supertotal_cpu_time=0.0;
 88 | $supertotal_clock_time=0.0;
 89 | $supertotal_threads=0.0;
 90 | 
 91 | foreach $c (sort (keys %fmap)) {
 92 |   $n = 0;
 93 | 
 94 |   $total_cpu_time=0.0;
 95 |   $total_clock_time=0.0;
 96 |   $total_threads=0.0;
 97 |   foreach $fgroup (split_hundreds($fmap{$c})) {
 98 |     $lines=`grep -a "# Accounting: " $fgroup |sed 's/.* Accounting: *//g'`;
 99 |     
100 |     #print $lines ."\n";
101 | 
102 |     @entries = split "\n", $lines;
103 | 
104 |     foreach $line (@entries) {
105 |       $time, $threads = parse_accounting_entry($line);
106 | 
107 |       $total_cpu_time += $time * $threads;
108 |       $total_threads += $threads;
109 |       if ( $time > $total_clock_time ) {
110 |         $total_clock_time = $time;
111 |       }
112 |     }
113 |   }
114 |   print "total_cpu_time=$total_cpu_time clock_time=$total_clock_time total_threads=$total_threads group=$c\n";
115 | 
116 |   $supertotal_cpu_time += $total_cpu_time;
117 |   $supertotal_clock_time += $total_clock_time;
118 |   $supertotal_threads += $total_threads;
119 | }
120 | print "total_cpu_time=$supertotal_cpu_time clock_time=$supertotal_clock_time total_threads=$supertotal_threads group=all\n";
121 | 
122 | 


--------------------------------------------------------------------------------
/utils/rnnlm_compute_scores.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Compute scores from RNNLM.  This script takes a directory
 4 | # $dir (e.g. dir=local/rnnlm/rnnlm.voc30.hl30 ),
 5 | # where it expects the files:
 6 | #  rnnlm  wordlist.rnn  unk.probs,
 7 | # and also an input file location where it can get the sentences to score, and
 8 | # an output file location to put the scores (negated logprobs) for each
 9 | # sentence.  This script uses the Kaldi-style "archive" format, so the input and
10 | # output files will have a first field that corresponds to some kind of
11 | # utterance-id or, in practice, utterance-id-1, utterance-id-2, etc., for the
12 | # N-best list.
13 | #
14 | # Here, "wordlist.rnn" is the set of words, like a vocabulary,
15 | # that the RNN was trained on (note, it won't include <s> or </s>),
16 | # plus <RNN_UNK> which is a kind of class where we put low-frequency
17 | # words; unk.probs gives the probs for words given this class, and it
18 | # has, on each line, "word prob".
19 | 
20 | rnnlm_ver=rnnlm-0.3e
21 | ensure_normalized_probs=false  # if true then we add the neccesary options to
22 |                                # normalize the probabilities of RNNLM
23 |                                # e.g. when using faster-rnnlm in the nce mode
24 | 
25 | . ./path.sh || exit 1;
26 | . utils/parse_options.sh
27 | 
28 | rnnlm=$KALDI_ROOT/tools/$rnnlm_ver/rnnlm
29 | 
30 | [ ! -f $rnnlm ] && echo No such program $rnnlm && exit 1;
31 | 
32 | if [ $# != 4 ]; then
33 |   echo "Usage: rnnlm_compute_scores.sh <rnn-dir> <temp-dir> <input-text> <output-scores>"
34 |   exit 1;
35 | fi
36 | 
37 | dir=$1
38 | tempdir=$2
39 | text_in=$3
40 | scores_out=$4
41 | 
42 | for x in rnnlm wordlist.rnn unk.probs; do
43 |   if [ ! -f $dir/$x ]; then 
44 |     echo "rnnlm_compute_scores.sh: expected file $dir/$x to exist."
45 |     exit 1;
46 |   fi
47 | done
48 | 
49 | mkdir -p $tempdir
50 | cat $text_in | awk '{for (x=2;x<=NF;x++) {printf("%s ", $x)} printf("\n");}' >$tempdir/text
51 | cat $text_in | awk '{print $1}' > $tempdir/ids # e.g. utterance ids.
52 | cat $tempdir/text | awk -v voc=$dir/wordlist.rnn -v unk=$dir/unk.probs \
53 |   -v logprobs=$tempdir/loglikes.oov \
54 |  'BEGIN{ while((getline<voc)>0) { invoc[$1]=1; } while ((getline<unk)>0){ unkprob[$1]=$2;} }
55 |   { logprob=0;
56 |     if (NF==0) { printf "<RNN_UNK>"; logprob = log(1.0e-07);
57 |       print "Warning: empty sequence." | "cat 1>&2"; }
58 |     for (x=1;x<=NF;x++) { w=$x;  
59 |     if (invoc[w]) { printf("%s ",w); } else {
60 |       printf("<RNN_UNK> ");
61 |       if (unkprob[w] != 0) { logprob += log(unkprob[w]); }
62 |       else { print "Warning: unknown word ", w | "cat 1>&2"; logprob += log(1.0e-07); }}}
63 |     printf("\n"); print logprob > logprobs } ' > $tempdir/text.nounk
64 | 
65 | # OK, now we compute the scores on the text with OOVs replaced
66 | # with <RNN_UNK>
67 | 
68 | if [ $rnnlm_ver == "faster-rnnlm" ]; then
69 |   extra_options=
70 |   if [ "$ensure_normalized_probs" = true ]; then
71 |     extra_options="--nce-accurate-test 1"
72 |   fi
73 |   $rnnlm $extra_options -independent -rnnlm $dir/rnnlm -test $tempdir/text.nounk -nbest -debug 0 | \
74 |      awk '{print $1*log(10);}' > $tempdir/loglikes.rnn
75 | else
76 |   # add the utterance_id as required by Mikolove's rnnlm
77 |   paste $tempdir/ids $tempdir/text.nounk > $tempdir/id_text.nounk
78 | 
79 |   $rnnlm -independent -rnnlm $dir/rnnlm -test $tempdir/id_text.nounk -nbest -debug 0 | \
80 |      awk '{print $1*log(10);}' > $tempdir/loglikes.rnn
81 | fi
82 | 
83 | [ `cat $tempdir/loglikes.rnn | wc -l` -ne `cat $tempdir/loglikes.oov | wc -l` ] && \
84 |   echo "rnnlm rescoring failed" && exit 1;
85 | 
86 | paste $tempdir/loglikes.rnn $tempdir/loglikes.oov | awk '{print -($1+$2);}' >$tempdir/scores
87 | 
88 | # scores out, with utterance-ids.
89 | paste $tempdir/ids $tempdir/scores  > $scores_out
90 | 
91 | 


--------------------------------------------------------------------------------
/utils/sym2int.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
  3 | 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #  http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 11 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 12 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 13 | # MERCHANTABLITY OR NON-INFRINGEMENT.
 14 | # See the Apache 2 License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | 
 18 | $ignore_oov = 0;
 19 | 
 20 | for($x = 0; $x < 2; $x++) {
 21 |   if ($ARGV[0] eq "--map-oov") {
 22 |     shift @ARGV; 
 23 |     $map_oov = shift @ARGV;
 24 |     if ($map_oov eq "-f" || $map_oov =~ m/words\.txt$/ || $map_oov eq "") {
 25 |       # disallow '-f', the empty string and anything ending in words.txt as the
 26 |       # OOV symbol because these are likely command-line errors.
 27 |       die "the --map-oov option requires an argument";
 28 |     }
 29 |   }
 30 |   if ($ARGV[0] eq "-f") {
 31 |     shift @ARGV;
 32 |     $field_spec = shift @ARGV; 
 33 |     if ($field_spec =~ m/^\d+$/) {
 34 |       $field_begin = $field_spec - 1; $field_end = $field_spec - 1;
 35 |     }
 36 |     if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
 37 |       if ($1 ne "") {
 38 |         $field_begin = $1 - 1;  # Change to zero-based indexing.
 39 |       }
 40 |       if ($2 ne "") {
 41 |         $field_end = $2 - 1;    # Change to zero-based indexing.
 42 |       }
 43 |     }
 44 |     if (!defined $field_begin && !defined $field_end) {
 45 |       die "Bad argument to -f option: $field_spec"; 
 46 |     }
 47 |   }
 48 | }
 49 | 
 50 | $symtab = shift @ARGV;
 51 | if (!defined $symtab) {
 52 |   print STDERR "Usage: sym2int.pl [options] symtab [input transcriptions] > output transcriptions\n" .
 53 |     "options: [--map-oov <oov-symbol> ]  [-f <field-range> ]\n" .
 54 |       "note: <field-range> can look like 4-5, or 4-, or 5-, or 1.\n";
 55 | }
 56 | open(F, "<$symtab") || die "Error opening symbol table file $symtab";
 57 | while(<F>) {
 58 |     @A = split(" ", $_);
 59 |     @A == 2 || die "bad line in symbol table file: $_";
 60 |     $sym2int{$A[0]} = $A[1] + 0;
 61 | }
 62 | 
 63 | if (defined $map_oov && $map_oov !~ m/^\d+$/) { # not numeric-> look it up
 64 |   if (!defined $sym2int{$map_oov}) { die "OOV symbol $map_oov not defined."; }
 65 |   $map_oov = $sym2int{$map_oov};
 66 | }
 67 | 
 68 | $num_warning = 0;
 69 | $max_warning = 20;
 70 | 
 71 | while (<>) {
 72 |   @A = split(" ", $_);
 73 |   @B = ();
 74 |   for ($n = 0; $n < @A; $n++) {
 75 |     $a = $A[$n];
 76 |     if ( (!defined $field_begin || $n >= $field_begin)
 77 |          && (!defined $field_end || $n <= $field_end)) {
 78 |       $i = $sym2int{$a};
 79 |       if (!defined ($i)) {
 80 |         if (defined $map_oov) {
 81 |           if ($num_warning++ < $max_warning) {
 82 |             print STDERR "sym2int.pl: replacing $a with $map_oov\n";
 83 |             if ($num_warning == $max_warning) {
 84 |               print STDERR "sym2int.pl: not warning for OOVs any more times\n";
 85 |             }
 86 |           }
 87 |           $i = $map_oov;
 88 |         } else {
 89 |           $pos = $n+1;
 90 |           die "sym2int.pl: undefined symbol $a (in position $pos)\n";
 91 |         }
 92 |       }
 93 |       $a = $i;
 94 |     }
 95 |     push @B, $a;
 96 |   }
 97 |   print join(" ", @B);
 98 |   print "\n";
 99 | }
100 | if ($num_warning > 0) {
101 |   print STDERR "** Replaced $num_warning instances of OOVs with $map_oov\n"; 
102 | }
103 | 
104 | exit(0);
105 | 


--------------------------------------------------------------------------------
/utils/lang/internal/apply_unk_lm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright      2016 Johns Hopkins University (Author: Daniel Povey);
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | # Begin configuration section.
20 | 
21 | # end configuration sections
22 | 
23 | echo "$0 $@"  # Print the command line for logging
24 | [ -f path.sh ] && . ./path.sh
25 | 
26 | 
27 | . utils/parse_options.sh
28 | 
29 | if [ $# -ne 2 ]; then
30 |   echo "Usage: $0 [options] <input-unk-lm-fst> <lang-dir>"
31 |   echo "e.g.: $0 exp/make_unk/unk_fst.txt data/lang_unk"
32 |   echo ""
33 |   echo "This script, which is called from the end of prepare_lang.sh,"
34 |   echo "inserts the unknown-word LM FST into the lexicon FSTs"
35 |   echo "<lang-dir>/L.fst and <lang-dir>/L_disambig.fst in place of"
36 |   echo "the special disambiguation symbol #2 (which was inserted by"
37 |   echo "add_lex_disambig.pl as a placeholder for this FST)."
38 |   echo ""
39 |   echo "  <input-unk-lm-fst>:  A text-form FST, typically with the name"
40 |   echo "                unk_fst.txt.  We will remove all symbols from the"
41 |   echo "                output before applying it."
42 |   echo "  <lang-dir>:  A partially built lang/ directory.  We modify"
43 |   echo "               L.fst and L_disambig.fst, and read only words.txt."
44 |   exit 1;
45 | fi
46 | 
47 | 
48 | unk_lm_fst=$1
49 | lang=$2
50 | 
51 | set -e
52 | 
53 | for f in "$unk_lm_fst" $lang/L.fst $lang/L_disambig.fst $lang/words.txt $lang/oov.int; do
54 |   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1;
55 | done
56 | 
57 | unused_phone_label=$(tail -n 1 $lang/phones.txt | awk '{print $2 + 1}')
58 | label_to_replace=$(awk '{if ($1 == "#2") {print $2;}}' <$lang/phones.txt)
59 | ! [ "$unused_phone_label" -eq "$unused_phone_label" -a "$label_to_replace" -eq "$label_to_replace" ] && \
60 |    echo "$0: error getting unused phone label or label for #2" && exit 1
61 | 
62 | 
63 | # OK, now fstreplace works based on olabels, but we actually want to deal with ilabels,
64 | # so we need to invert all the FSTs before and after doing fstreplace.
65 | awk '{if(NF>=4) $4 = "<eps>"; print }' <$unk_lm_fst | \
66 |   fstcompile --isymbols=$lang/phones.txt --osymbols=$lang/words.txt | \
67 |   fstinvert > $lang/unk_temp.fst
68 | 
69 | num_states_unk=$(fstinfo $lang/unk_temp.fst | grep '# of states' | awk '{print $NF}')
70 | 
71 | # fstreplace usage is:
72 | # Usage: fstreplace root.fst rootlabel [rule1.fst label1 ...] [out.fst]
73 | # ... the rootlabel should just be an otherwise unused symbol.
74 | # all the labels are olabels (word labels).. that is hardcoded in fstreplace.
75 | 
76 | for f in L.fst L_disambig.fst; do
77 | 
78 |   # with OpenFst tools, to refer to the standard input/output you need to use
79 |   # the empty string '' and not '-'.
80 |   fstinvert $lang/$f | fstreplace '' "$unused_phone_label" $lang/unk_temp.fst "$label_to_replace" | fstinvert > $lang/${f}.temp
81 | 
82 |   num_states_old=$(fstinfo $lang/$f | grep '# of states' | awk '{print $NF}')
83 |   num_states_new=$(fstinfo $lang/${f}.temp | grep '# of states' | awk '{print $NF}')
84 |   num_states_added=$[$num_states_new-$num_states_old]
85 |   echo "$0: in $f, substituting in the unknown-word LM (which had $num_states_unk states) added $num_states_added new FST states."
86 |   mv -f $lang/${f}.temp $lang/$f
87 | done
88 | 
89 | rm $lang/unk_temp.fst
90 | 
91 | exit 0;
92 | 


--------------------------------------------------------------------------------
/utils/format_lm_sri.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2012  Arnab Ghoshal
  4 | # Copyright 2010-2011  Microsoft Corporation
  5 | 
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #  http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
 16 | # See the Apache 2 License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | 
 19 | # Begin configuration section.
 20 | srilm_opts="-subset -prune-lowprobs -unk -tolower"
 21 | # end configuration sections
 22 | 
 23 | 
 24 | . utils/parse_options.sh
 25 | 
 26 | if [ $# -ne 4 ] && [ $# -ne 3 ]; then
 27 |   echo "Usage: $0 [options] <lang-dir> <arpa-LM> [<lexicon>] <out-dir>"
 28 |   echo "The <lexicon> argument is no longer needed but is supported for back compatibility"
 29 |   echo "E.g.: utils/format_lm_sri.sh data/lang data/local/lm/foo.kn.gz data/local/dict/lexicon.txt data/lang_test"
 30 |   echo "Converts ARPA-format language models to FSTs. Change the LM vocabulary using SRILM."
 31 |   echo "Note: if you want to just convert ARPA LMs to FSTs, there is a simpler way to do this"
 32 |   echo "that doesn't require SRILM: see utils/format_lm.sh"
 33 |   echo "options:"
 34 |   echo " --help                 # print this message and exit"
 35 |   echo " --srilm-opts STRING      # options to pass to SRILM tools (default: '$srilm_opts')"
 36 |   exit 1;
 37 | fi
 38 | 
 39 | 
 40 | if [ $# -eq 4 ] ; then
 41 |   lang_dir=$1
 42 |   lm=$2
 43 |   lexicon=$3
 44 |   out_dir=$4
 45 | else
 46 |   lang_dir=$1
 47 |   lm=$2
 48 |   out_dir=$3
 49 | fi
 50 | 
 51 | mkdir -p $out_dir
 52 | 
 53 | for f in $lm $lang_dir/words.txt; do
 54 |   if [ ! -f $f ]; then
 55 |     echo "$0: expected input file $f to exist."
 56 |     exit 1;
 57 |   fi
 58 | done
 59 | 
 60 | [ -f ./path.sh ] && . ./path.sh
 61 | 
 62 | loc=`which change-lm-vocab`
 63 | if [ -z $loc ]; then
 64 |   if uname -a | grep 64 >/dev/null; then # some kind of 64 bit...
 65 |     sdir=`pwd`/../../../tools/srilm/bin/i686-m64
 66 |   else
 67 |     sdir=`pwd`/../../../tools/srilm/bin/i686
 68 |   fi
 69 |   if [ -f $sdir/../change-lm-vocab ]; then
 70 |     echo Using SRILM tools from $sdir
 71 |     export PATH=$PATH:$sdir:$sdir/..
 72 |   else
 73 |     echo You appear to not have SRILM tools installed, either on your path,
 74 |     echo or installed in $sdir.  cd to ../../../tools and run
 75 |     echo extras/install_srilm.sh.
 76 |     exit 1
 77 |   fi
 78 | fi
 79 | 
 80 | echo "Converting '$lm' to FST"
 81 | tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
 82 | trap 'rm -rf "$tmpdir"' EXIT
 83 | 
 84 | mkdir -p $out_dir
 85 | cp -r $lang_dir/* $out_dir || exit 1;
 86 | 
 87 | lm_base=$(basename $lm '.gz')
 88 | awk '{print $1}' $out_dir/words.txt > $tmpdir/voc || exit 1;
 89 | 
 90 | # Change the LM vocabulary to be the intersection of the current LM vocabulary
 91 | # and the set of words in the pronunciation lexicon. This also renormalizes the
 92 | # LM by recomputing the backoff weights, and remove those ngrams whose
 93 | # probabilities are lower than the backed-off estimates.
 94 | change-lm-vocab -vocab $tmpdir/voc -lm $lm -write-lm - $srilm_opts | \
 95 |   arpa2fst --disambig-symbol=#0 \
 96 |            --read-symbol-table=$out_dir/words.txt - $out_dir/G.fst || exit 1
 97 | 
 98 | fstisstochastic $out_dir/G.fst
 99 | 
100 | # The output is like:
101 | # 9.14233e-05 -0.259833
102 | # we do expect the first of these 2 numbers to be close to zero (the second is
103 | # nonzero because the backoff weights make the states sum to >1).
104 | 
105 | echo "Succeeded in formatting LM '$lm' -> '$out_dir/G.fst'"
106 | 


--------------------------------------------------------------------------------
/utils/parse_options.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
 4 | #                 Arnab Ghoshal, Karel Vesely
 5 | 
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #  http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
14 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
15 | # MERCHANTABLITY OR NON-INFRINGEMENT.
16 | # See the Apache 2 License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | 
20 | # Parse command-line options.
21 | # To be sourced by another script (as in ". parse_options.sh").
22 | # Option format is: --option-name arg
23 | # and shell variable "option_name" gets set to value "arg."
24 | # The exception is --help, which takes no arguments, but prints the 
25 | # $help_message variable (if defined).
26 | 
27 | 
28 | ###
29 | ### The --config file options have lower priority to command line 
30 | ### options, so we need to import them first...
31 | ###
32 | 
33 | # Now import all the configs specified by command-line, in left-to-right order
34 | for ((argpos=1; argpos<$#; argpos++)); do
35 |   if [ "${!argpos}" == "--config" ]; then
36 |     argpos_plus1=$((argpos+1))
37 |     config=${!argpos_plus1}
38 |     [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
39 |     . $config  # source the config file.
40 |   fi
41 | done
42 | 
43 | 
44 | ###
45 | ### No we process the command line options
46 | ###
47 | while true; do
48 |   [ -z "${1:-}" ] && break;  # break if there are no arguments
49 |   case "$1" in
50 |     # If the enclosing script is called with --help option, print the help 
51 |     # message and exit.  Scripts should put help messages in $help_message
52 |   --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
53 | 	  else printf "$help_message\n" 1>&2 ; fi; 
54 | 	  exit 0 ;; 
55 |   --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
56 |        exit 1 ;;
57 |     # If the first command-line argument begins with "--" (e.g. --foo-bar), 
58 |     # then work out the variable name as $name, which will equal "foo_bar".
59 |   --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`; 
60 |     # Next we test whether the variable in question is undefned-- if so it's 
61 |     # an invalid option and we die.  Note: $0 evaluates to the name of the 
62 |     # enclosing script.
63 |     # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
64 |     # is undefined.  We then have to wrap this test inside "eval" because 
65 |     # foo_bar is itself inside a variable ($name).
66 |       eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
67 |       
68 |       oldval="`eval echo \\$$name`";
69 |     # Work out whether we seem to be expecting a Boolean argument.
70 |       if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then 
71 | 	was_bool=true;
72 |       else 
73 | 	was_bool=false;
74 |       fi
75 | 
76 |     # Set the variable to the right value-- the escaped quotes make it work if
77 |     # the option had spaces, like --cmd "queue.pl -sync y"
78 |       eval $name=\"$2\"; 
79 |         
80 |     # Check that Boolean-valued arguments are really Boolean.
81 |       if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
82 |         echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
83 |         exit 1;
84 |       fi
85 |       shift 2;
86 |       ;;
87 |   *) break;
88 |   esac
89 | done
90 | 
91 | 
92 | # Check for an empty argument to the --cmd option, which can easily occur as a 
93 | # result of scripting errors.
94 | [ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
95 | 
96 | 
97 | true; # so this script returns exit code 0.
98 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Author: Yang Zhang
  4 | # Mail: zyziszy@foxmail.com
  5 | # Apache 2.0.
  6 | 
  7 | import os
  8 | import numpy as np
  9 | import tensorflow as tf  # tf-gpu 1.8
 10 | 
 11 | from model.vae import *
 12 | from model.model_utils import *
 13 | 
 14 | 
 15 | '''flags'''
 16 | 
 17 | tf.app.flags.DEFINE_integer('epoch', 50, 'epoch num')
 18 | 
 19 | tf.app.flags.DEFINE_integer('batch_size', 200, 'batch size')
 20 | 
 21 | tf.app.flags.DEFINE_integer('n_hidden', 1800, 'dim of hidden')
 22 | 
 23 | tf.app.flags.DEFINE_integer('z_dim', 200, 'dim of z')
 24 | 
 25 | tf.app.flags.DEFINE_float('learn_rate', 0.00001, 'learn rate')
 26 | 
 27 | tf.app.flags.DEFINE_float('beta1', 0.5, 'beta1 for AdamOptimizer')
 28 | 
 29 | tf.app.flags.DEFINE_float('KL_weigth', 0.04, 'KL_weigth')
 30 | 
 31 | tf.app.flags.DEFINE_float('cohesive_weight', 0., 'cohesive loss')
 32 | 
 33 | tf.app.flags.DEFINE_string('dataset_path', './data/voxceleb_combined_200000/xvector.npz',
 34 |                            'x vector dataset path (npz format)')
 35 | 
 36 | tf.app.flags.DEFINE_string('spk_path', './data/voxceleb_combined_200000/spk.npz',
 37 |                            'utt2spk label dataset path (npz format)')
 38 | 
 39 | tf.app.flags.DEFINE_integer('is_training', 1, 'Training/Testing.')
 40 | 
 41 | params = tf.app.flags.FLAGS  # store flag
 42 | 
 43 | '''model's log and checkpoints paths'''
 44 | experiment_dir = '/experiments/'+'z' + \
 45 |     str(params.z_dim)+'_h' + str(params.n_hidden) + \
 46 |     '_kl'+str(params.KL_weigth)+'_c'+str(params.cohesive_weight)
 47 | 
 48 | experiment_dir = os.path.dirname(os.path.abspath(__file__))+experiment_dir
 49 | checkpoint_dir = experiment_dir+'/checkpoint'
 50 | log_dir = experiment_dir+'/train_log'
 51 | print('model/checkpoint/logs will save in {}.'.format(experiment_dir))
 52 | 
 53 | 
 54 | '''build the model and train'''
 55 | with tf.Session() as sess:
 56 |     vae_model = VAE(
 57 |         sess=sess,
 58 |         epoch=params.epoch,
 59 |         batch_size=params.batch_size,
 60 |         z_dim=params.z_dim,
 61 |         dataset_path=params.dataset_path,
 62 |         checkpoint_dir=checkpoint_dir,
 63 |         log_dir=log_dir,
 64 |         n_hidden=params.n_hidden,
 65 |         KL_weigth=params.KL_weigth,
 66 |         cohesive_weight=params.cohesive_weight,
 67 |         learning_rate=params.learn_rate,
 68 |         beta1=params.beta1,
 69 |         spk_path=params.spk_path
 70 |     )
 71 |     if params.is_training:
 72 |         vae_model.train()
 73 |         print('model / checkpoint / logs will save in {}.'.format(experiment_dir))
 74 | 
 75 |     else:
 76 |         paths = ["./data/voxceleb_combined_200000/xvector",
 77 |                  "./data/sitw_dev/enroll/xvector",
 78 |                  "./data/sitw_dev/test/xvector",
 79 |                  "./data/sitw_eval/enroll/xvector",
 80 |                  "./data/sitw_eval/test/xvector"
 81 |                  ]
 82 |         for path in paths:
 83 |             if os.path.exists(path+'.ark') == True:
 84 |                 os.remove(path+'.ark')
 85 |                 print('delete {}.ark'.format(path))
 86 | 
 87 |         for path in paths:
 88 |             # load data
 89 |             vector = np.load(path+'.npz')['vector']
 90 |             labels = np.load(path+'.npz')['utt']
 91 | 
 92 |             # predict
 93 |             predict_mu = vae_model.predict(vector)
 94 |             print(path)
 95 |             print(predict_mu.shape)
 96 |             # get_skew_and_kurt(predict_mu)
 97 |             with open(path+'.ark', 'w') as f:
 98 |                 for i in range(predict_mu.shape[0]):
 99 |                     f.write(str(labels[i]))
100 |                     f.write('  [ ')
101 |                     for j in predict_mu[i]:
102 |                         f.write(str(j))
103 |                         f.write(' ')
104 |                     f.write(']')
105 |                     f.write('\n')
106 |             print('{}.ark is done!'.format(path))
107 |         print('\nall done!')
108 | 
109 | print('done')
110 | 


--------------------------------------------------------------------------------
/utils/map_arpa_lm.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | # Copyright 2014  Guoguo Chen
  4 | #           2014  Johns Hopkins University (author: Daniel Povey)
  5 | # Apache 2.0.
  6 | #
  7 | use strict;
  8 | use warnings;
  9 | use Getopt::Long;
 10 | 
 11 | my $Usage = <<EOU;
 12 | This script reads the Arpa format language model, and maps the words into
 13 | integers or vice versa. It ignores the words that are not in the symbol table,
 14 | and updates the head information.
 15 | 
 16 | It will be used joinly with lmbin/arpa-to-const-arpa to build ConstArpaLm format
 17 | language model. We first map the words in an Arpa format language model to
 18 | integers, and then use lmbin/arpa-to-const-arpa to build a ConstArpaLm format
 19 | language model.
 20 | 
 21 | Usage: utils/map_arpa_lm.pl [options] <vocab-file> < input-arpa >output-arpa
 22 |  e.g.: utils/map_arpa_lm.pl words.txt <arpa_lm.txt >arpa_lm.int
 23 | 
 24 | Allowed options:
 25 |   --sym2int   : If true, maps words to integers, other wise maps integers to
 26 |                 words. (boolean, default = true)
 27 | 
 28 | EOU
 29 | 
 30 | my $sym2int = "true";
 31 | GetOptions('sym2int=s' => \$sym2int);
 32 | 
 33 | ($sym2int eq "true" || $sym2int eq "false") ||
 34 |   die "$0: Bad value for option --sym2int\n";
 35 | 
 36 | if (@ARGV != 1) {
 37 |   die $Usage;
 38 | }
 39 | 
 40 | # Gets parameters.
 41 | my $symtab = shift @ARGV;
 42 | my $arpa_in = shift @ARGV;
 43 | my $arpa_out = shift @ARGV;
 44 | 
 45 | # Opens files.
 46 | open(M, "<$symtab") || die "$0: Fail to open $symtab\n";
 47 | 
 48 | # Reads in the mapper.
 49 | my %mapper;
 50 | while (<M>) {
 51 |   chomp;
 52 |   my @col = split(/[\s]+/, $_);
 53 |   @col == 2 || die "$0: Bad line in mapper file \"$_\"\n";
 54 |   if ($sym2int eq "true") {
 55 |     if (defined($mapper{$col[0]})) {
 56 |       die "$0: Duplicate entry \"$col[0]\"\n";
 57 |     }
 58 |     $mapper{$col[0]} = $col[1];
 59 |   } else {
 60 |     if (defined($mapper{$col[1]})) {
 61 |       die "$0: Duplicate entry \"$col[1]\"\n";
 62 |     }
 63 |     $mapper{$col[1]} = $col[0];
 64 |   }
 65 | }
 66 | 
 67 | my $num_oov_lines = 0;
 68 | my $max_oov_warn = 20;
 69 | 
 70 | # Parses Arpa n-gram language model.
 71 | my $arpa = "";
 72 | my $current_order = -1;
 73 | my %head_ngram_count;
 74 | my %actual_ngram_count;
 75 | while (<STDIN>) {
 76 |   chomp;
 77 |   my @col = split(" ", $_);
 78 | 
 79 |   if ($current_order == -1 and ! m/^\\data\\$/) {
 80 |     next;
 81 |   }
 82 | 
 83 |   if (m/^\\data\\$/) {
 84 |     print STDERR "$0: Processing \"\\data\\\"\n";
 85 |     print "$_\n";
 86 |     $current_order = 0;
 87 |   } elsif (m/^\\[0-9]*-grams:$/) {
 88 |     $current_order = $_;
 89 |     $current_order =~ s/-grams:$//g;
 90 |     $current_order =~ s/^\\//g;
 91 |     print "$_\n";
 92 |     print STDERR "$0: Processing \"\\$current_order-grams:\\\"\n";
 93 |   } elsif (m/^\\end\\/) {
 94 |     print "$_\n";
 95 |   } elsif ($_ eq "") {
 96 |     if ($current_order >= 1) {
 97 |       print "\n";
 98 |     }
 99 |   } else {
100 |     if ($current_order == 0) {
101 |       # echo head section.
102 |       print "$_\n";
103 |     } else {
104 |       # Parses n-gram section.
105 |       if (@col > 2 + $current_order || @col < 1 + $current_order) {
106 |         die "$0: Bad line in arpa lm \"$_\"\n";
107 |       }
108 |       my $prob = shift @col;
109 |       my $is_oov = 0;
110 |       for (my $i = 0; $i < $current_order; $i++) {
111 |         my $temp = $mapper{$col[$i]};
112 |         if (!defined($temp)) {
113 |           $is_oov = 1;
114 |           $num_oov_lines++;
115 |           last;
116 |         } else {
117 |           $col[$i] = $temp;
118 |         }
119 |       }
120 |       if (!$is_oov) {
121 |         my $rest_of_line = join(" ", @col);
122 |         print "$prob\t$rest_of_line\n";
123 |       } else {
124 |         if ($num_oov_lines < $max_oov_warn) {
125 |           print STDERR "$0: Warning: OOV line $_\n";
126 |         }
127 |       }
128 |     }
129 |   }
130 | }
131 | 
132 | if ($num_oov_lines > 0) {
133 |   print STDERR "$0: $num_oov_lines lines of the Arpa file contained OOVs and ";
134 |   print STDERR "were not printed.\n";
135 | }
136 | 
137 | close(M);
138 | 


--------------------------------------------------------------------------------
/utils/pinyin_map.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use warnings; #sed replacement for -w perl parameter
 3 | 
 4 | $num_args = $#ARGV + 1;
 5 | if ($num_args != 1) {
 6 |   print "\nUsage: pinyin2phone.pl pinyin2phone\n";
 7 |   exit;
 8 | }
 9 | 
10 | open(MAPS, $ARGV[0]) or die("Could not open pinyin map file.");
11 | my %py2ph; foreach $line (<MAPS>) { @A = split(" ", $line);
12 |   $py = shift(@A);
13 |   $py2ph{$py} = [@A]; 
14 | }
15 | 
16 | #foreach $word ( keys %py2ph ) {
17 |      #foreach $i ( 0 .. $#{ $py2ph{$word} } ) {
18 |      #    print " $word = $py2ph{$word}[$i]";
19 |      #}
20 |      #print " $#{ $py2ph{$word} }";
21 |      #print "\n";
22 | #}
23 | 
24 | my @entry;
25 | 
26 | while (<STDIN>) {
27 |   @A = split(" ", $_);
28 |   @entry = (); 
29 |   $W = shift(@A);
30 |   push(@entry, $W);
31 |   for($i = 0; $i < @A; $i++) {
32 |     $initial= $A[$i]; $final = $A[$i];
33 |     #print $initial, " ", $final, "\n";
34 |     if ($A[$i] =~ /^CH[A-Z0-9]+$/) {$initial =~ s:(CH)[A-Z0-9]+:$1:; $final =~ s:CH([A-Z0-9]+):$1:;}
35 |     elsif ($A[$i] =~ /^SH[A-Z0-9]+$/) {$initial =~ s:(SH)[A-Z0-9]+:$1:; $final =~ s:SH([A-Z0-9]+):$1:;} 
36 |     elsif ($A[$i] =~ /^ZH[A-Z0-9]+$/) {$initial =~ s:(ZH)[A-Z0-9]+:$1:; $final =~ s:ZH([A-Z0-9]+):$1:;}
37 |     elsif ($A[$i] =~ /^B[A-Z0-9]+$/) {$initial =~ s:(B)[A-Z0-9]+:$1:; $final =~ s:B([A-Z0-9]+):$1:;}
38 |     elsif ($A[$i] =~ /^C[A-Z0-9]+$/) {$initial =~ s:(C)[A-Z0-9]+:$1:; $final =~ s:C([A-Z0-9]+):$1:;}
39 |     elsif ($A[$i] =~ /^D[A-Z0-9]+$/) {$initial =~ s:(D)[A-Z0-9]+:$1:; $final =~ s:D([A-Z0-9]+):$1:;}
40 |     elsif ($A[$i] =~ /^F[A-Z0-9]+$/) {$initial =~ s:(F)[A-Z0-9]+:$1:; $final =~ s:F([A-Z0-9]+):$1:;}
41 |     elsif ($A[$i] =~ /^G[A-Z0-9]+$/) {$initial =~ s:(G)[A-Z0-9]+:$1:; $final =~ s:G([A-Z0-9]+):$1:;}
42 |     elsif ($A[$i] =~ /^H[A-Z0-9]+$/) {$initial =~ s:(H)[A-Z0-9]+:$1:; $final =~ s:H([A-Z0-9]+):$1:;}
43 |     elsif ($A[$i] =~ /^J[A-Z0-9]+$/) {$initial =~ s:(J)[A-Z0-9]+:$1:; $final =~ s:J([A-Z0-9]+):$1:;}
44 |     elsif ($A[$i] =~ /^K[A-Z0-9]+$/) {$initial =~ s:(K)[A-Z0-9]+:$1:; $final =~ s:K([A-Z0-9]+):$1:;}
45 |     elsif ($A[$i] =~ /^L[A-Z0-9]+$/) {$initial =~ s:(L)[A-Z0-9]+:$1:; $final =~ s:L([A-Z0-9]+):$1:;}
46 |     elsif ($A[$i] =~ /^M[A-Z0-9]+$/) {$initial =~ s:(M)[A-Z0-9]+:$1:; $final =~ s:M([A-Z0-9]+):$1:;}
47 |     elsif ($A[$i] =~ /^N[A-Z0-9]+$/) {$initial =~ s:(N)[A-Z0-9]+:$1:; $final =~ s:N([A-Z0-9]+):$1:;}
48 |     elsif ($A[$i] =~ /^P[A-Z0-9]+$/) {$initial =~ s:(P)[A-Z0-9]+:$1:; $final =~ s:P([A-Z0-9]+):$1:;}
49 |     elsif ($A[$i] =~ /^Q[A-Z0-9]+$/) {$initial =~ s:(Q)[A-Z0-9]+:$1:; $final =~ s:Q([A-Z0-9]+):$1:;}
50 |     elsif ($A[$i] =~ /^R[A-Z0-9]+$/) {$initial =~ s:(R)[A-Z0-9]+:$1:; $final =~ s:R([A-Z0-9]+):$1:;}
51 |     elsif ($A[$i] =~ /^S[A-Z0-9]+$/) {$initial =~ s:(S)[A-Z0-9]+:$1:; $final =~ s:S([A-Z0-9]+):$1:;}
52 |     elsif ($A[$i] =~ /^T[A-Z0-9]+$/) {$initial =~ s:(T)[A-Z0-9]+:$1:; $final =~ s:T([A-Z0-9]+):$1:;}
53 |     elsif ($A[$i] =~ /^W[A-Z0-9]+$/) {$initial =~ s:(W)[A-Z0-9]+:$1:; $final =~ s:W([A-Z0-9]+):$1:;}
54 |     elsif ($A[$i] =~ /^X[A-Z0-9]+$/) {$initial =~ s:(X)[A-Z0-9]+:$1:; $final =~ s:X([A-Z0-9]+):$1:;}
55 |     elsif ($A[$i] =~ /^Y[A-Z0-9]+$/) {$initial =~ s:(Y)[A-Z0-9]+:$1:; $final =~ s:Y([A-Z0-9]+):$1:;}
56 |     elsif ($A[$i] =~ /^Z[A-Z0-9]+$/) {$initial =~ s:(Z)[A-Z0-9]+:$1:; $final =~ s:Z([A-Z0-9]+):$1:;}
57 |     if ($initial ne $A[$i]) {
58 |       $tone = $final;
59 |       $final =~ s:([A-Z]+)[0-9]:$1:;
60 |       $tone =~ s:[A-Z]+([0-9]):$1:;
61 |       if (!(exists $py2ph{$initial}) or !(exists $py2ph{$final})) { print "1: no entry find for ", $A[$i], " ", $initial, " ", $final;  exit;}
62 |       push(@entry, @{$py2ph{$initial}}); 
63 |       @tmp = @{$py2ph{$final}};
64 |       for($j = 0; $j < @tmp ; $j++) {$tmp[$j] = $tmp[$j].$tone;}
65 |       push(@entry, @tmp); 
66 |     }
67 |     else {
68 |       $tone = $A[$i];
69 |       $A[$i] =~ s:([A-Z]+)[0-9]:$1:;   
70 |       $tone =~ s:[A-Z]+([0-9]):$1:;
71 |       if (!(exists $py2ph{$A[$i]})) { print "2: no entry find for ", $A[$i];  exit;}
72 |       @tmp = @{$py2ph{$A[$i]}};
73 |       for($j = 0; $j < @tmp ; $j++) {$tmp[$j] = $tmp[$j].$tone;}
74 |       push(@entry, @tmp); 
75 |     }
76 |   } 
77 |   print "@entry";
78 |   print "\n";
79 | }
80 | 


--------------------------------------------------------------------------------
/utils/subset_data_dir_tr_cv.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2013  Hong Kong University of Science and Technology (Author: Ricky Chan Ho Yin);
  3 | #                 Brno University of Technology (Author: Karel Vesely);
  4 | #                 Johns Hopkins University (Author: Daniel Povey);
  5 | # Apache 2.0
  6 | 
  7 | # This script splits dataset to two parts : 
  8 | # training set from (100-P)% of speakers/utterances and 
  9 | # held-out set (or cross-validation) from P% of remaining speakers/remaining utterances,
 10 | # which will be later on used for neural network training
 11 | #
 12 | # There are two options for choosing held-out (or cross-validation) set, either by
 13 | # --cv-spk-percent P , which will give you CV set based on random chosen P% of speakers, or
 14 | # --cv-utt-percent P , which will give you CV set based on last P% utterances in the dataset
 15 | # 
 16 | # If you don't apply the above two options, by default the script will use --cv-utt-percent option,
 17 | # and the default cross validation percentage portion is equal to 10% (i.e. P=10)
 18 | #
 19 | # The --cv-spk-percent option is useful if you would like to have subset chosen from random speakers order, 
 20 | # especially for the cases where dataset contains multiple different corpora,
 21 | # where type of speakers or recording channels may be quite different 
 22 | 
 23 | # Begin configuration.
 24 | cv_spk_percent= # % of speakers is parsed by option
 25 | cv_utt_percent=10 # default 10% of total utterances 
 26 | seed=777 # use seed for speaker shuffling
 27 | # End configuration.
 28 | 
 29 | echo "$0 $@"  # Print the command line for logging
 30 | 
 31 | uttbase=true; # by default, we choose last 10% utterances for CV
 32 | 
 33 | if [ "$1" == "--cv-spk-percent" ]; then
 34 |   uttbase=false;
 35 |   spkbase=true;
 36 | fi
 37 | 
 38 | [ -f path.sh ] && . ./path.sh; 
 39 | 
 40 | . parse_options.sh || exit 1;
 41 | 
 42 | if [ $# != 3 ]; then
 43 |   echo "Usage: $0 [--cv-spk-percent P|--cv-utt-percent P] <srcdir> <traindir> <crossvaldir>"
 44 |   echo "  --cv-spk-percent P  Cross Validation portion of the total speakers, recommend value is 10% (i.e. P=10)"
 45 |   echo "  --cv-utt-percent P  Cross Validation portion of the total utterances, default is 10% (i.e. P=10)"
 46 |   echo "  "
 47 |   exit 1;
 48 | fi
 49 | 
 50 | srcdir=$1
 51 | trndir=$2
 52 | cvdir=$3
 53 | 
 54 | ## use simple last P% utterance for CV
 55 | if $uttbase; then
 56 |   if [ ! -f $srcdir/utt2spk ]; then
 57 |     echo "$0: no such file $srcdir/utt2spk"
 58 |     exit 1;
 59 |   fi
 60 | 
 61 |   #total number of lines
 62 |   N=$(cat $srcdir/utt2spk | wc -l)
 63 |   #get line number where (100-P)% of the data lies
 64 |   P_utt=$((N * cv_utt_percent / 100))
 65 |   N_head=$((N -P_utt))
 66 |   #move the boundary so it is located on speaker change
 67 |   N_head=$(cat $srcdir/utt2spk | uniq -f1 -c | awk 'BEGIN{n=0} { if(n+$1<='$N_head') { n += $1 } else { nextfile } } END{if(n==0)n='$N_head'; print n }')
 68 |   #the rest of the data will be that big
 69 |   N_tail=$((N-N_head))
 70 | 
 71 |   #now call the subset_data_dir.sh and fix the directories
 72 |   subset_data_dir.sh --first $srcdir $N_head $trndir
 73 |   subset_data_dir.sh --last $srcdir $N_tail $cvdir
 74 | 
 75 |   exit 0;
 76 | fi
 77 | 
 78 | ## use random chosen P% speakers for CV
 79 | if [ ! -f $srcdir/spk2utt ]; then
 80 |   echo "$0: no such file $srcdir/spk2utt" 
 81 |   exit 1;
 82 | fi
 83 | 
 84 | #total, cv, train number of speakers
 85 | N=$(cat $srcdir/spk2utt | wc -l)
 86 | N_spk_cv=$((N * cv_spk_percent / 100))
 87 | N_spk_trn=$((N - N_spk_cv))
 88 | 
 89 | mkdir -p $cvdir $trndir
 90 | 
 91 | #shuffle the speaker list
 92 | awk '{print $1}' $srcdir/spk2utt | shuffle_list.pl --srand $seed > $trndir/_tmpf_randspk
 93 | 
 94 | #split the train/cv
 95 | head -n $N_spk_cv $trndir/_tmpf_randspk > $cvdir/_tmpf_cvspk
 96 | tail -n $N_spk_trn $trndir/_tmpf_randspk > $trndir/_tmpf_trainspk
 97 | 
 98 | #now call the subset_data_dir.sh 
 99 | subset_data_dir.sh --spk-list $trndir/_tmpf_trainspk $srcdir $trndir
100 | subset_data_dir.sh --spk-list $cvdir/_tmpf_cvspk $srcdir $cvdir
101 | 
102 | #clean-up
103 | rm -f $trndir/_tmpf_randspk $trndir/_tmpf_trainspk $cvdir/_tmpf_cvspk
104 | 
105 | 


--------------------------------------------------------------------------------
/utils/combine_data.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
  3 | #           2014  David Snyder
  4 | 
  5 | # This script combines the data from multiple source directories into
  6 | # a single destination directory.
  7 | 
  8 | # See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
  9 | # about what these directories contain.
 10 | 
 11 | # Begin configuration section.
 12 | extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
 13 | skip_fix=false # skip the fix_data_dir.sh in the end
 14 | # End configuration section.
 15 | 
 16 | echo "$0 $@"  # Print the command line for logging
 17 | 
 18 | if [ -f path.sh ]; then . ./path.sh; fi
 19 | . parse_options.sh || exit 1;
 20 | 
 21 | if [ $# -lt 2 ]; then
 22 |   echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
 23 |   echo "Note, files that don't appear in all source dirs will not be combined,"
 24 |   echo "with the exception of utt2uniq and segments, which are created where necessary."
 25 |   exit 1
 26 | fi
 27 | 
 28 | dest=$1;
 29 | shift;
 30 | 
 31 | first_src=$1;
 32 | 
 33 | rm -r $dest 2>/dev/null
 34 | mkdir -p $dest;
 35 | 
 36 | export LC_ALL=C
 37 | 
 38 | for dir in $*; do
 39 |   if [ ! -f $dir/utt2spk ]; then
 40 |     echo "$0: no such file $dir/utt2spk"
 41 |     exit 1;
 42 |   fi
 43 | done
 44 | 
 45 | # W.r.t. utt2uniq file the script has different behavior compared to other files
 46 | # it is not compulsary for it to exist in src directories, but if it exists in
 47 | # even one it should exist in all. We will create the files where necessary
 48 | has_utt2uniq=false
 49 | for in_dir in $*; do
 50 |   if [ -f $in_dir/utt2uniq ]; then
 51 |     has_utt2uniq=true
 52 |     break
 53 |   fi
 54 | done
 55 | 
 56 | if $has_utt2uniq; then
 57 |   # we are going to create an utt2uniq file in the destdir
 58 |   for in_dir in $*; do
 59 |     if [ ! -f $in_dir/utt2uniq ]; then
 60 |       # we assume that utt2uniq is a one to one mapping
 61 |       cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
 62 |     else
 63 |       cat $in_dir/utt2uniq
 64 |     fi
 65 |   done | sort -k1 > $dest/utt2uniq
 66 |   echo "$0: combined utt2uniq"
 67 | else
 68 |   echo "$0 [info]: not combining utt2uniq as it does not exist"
 69 | fi
 70 | # some of the old scripts might provide utt2uniq as an extrafile, so just remove it
 71 | extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
 72 | 
 73 | # segments are treated similarly to utt2uniq. If it exists in some, but not all
 74 | # src directories, then we generate segments where necessary.
 75 | has_segments=false
 76 | for in_dir in $*; do
 77 |   if [ -f $in_dir/segments ]; then
 78 |     has_segments=true
 79 |     break
 80 |   fi
 81 | done
 82 | 
 83 | if $has_segments; then
 84 |   for in_dir in $*; do
 85 |     if [ ! -f $in_dir/segments ]; then
 86 |       echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
 87 |       utils/data/get_segments_for_data.sh $in_dir
 88 |     else
 89 |       cat $in_dir/segments
 90 |     fi
 91 |   done | sort -k1 > $dest/segments
 92 |   echo "$0: combined segments"
 93 | else
 94 |   echo "$0 [info]: not combining segments as it does not exist"
 95 | fi
 96 | 
 97 | for file in utt2spk utt2lang utt2dur feats.scp vad.scp ali.scp text cmvn.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
 98 |   exists_somewhere=false
 99 |   absent_somewhere=false
100 |   for d in $*; do
101 |     if [ -f $d/$file ]; then
102 |       exists_somewhere=true
103 |     else
104 |       absent_somewhere=true
105 |       fi
106 |   done
107 | 
108 |   if ! $absent_somewhere; then
109 |     set -o pipefail
110 |     ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
111 |     set +o pipefail
112 |     echo "$0: combined $file"
113 |   else
114 |     if ! $exists_somewhere; then
115 |       echo "$0 [info]: not combining $file as it does not exist"
116 |     else
117 |       echo "$0 [info]: **not combining $file as it does not exist everywhere**"
118 |     fi
119 |   fi
120 | done
121 | 
122 | utils/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
123 | 
124 | if ! $skip_fix ; then
125 |   utils/fix_data_dir.sh $dest || exit 1;
126 | fi
127 | 
128 | exit 0
129 | 


--------------------------------------------------------------------------------
/utils/data/combine_data.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
  3 | #           2014  David Snyder
  4 | 
  5 | # This script combines the data from multiple source directories into
  6 | # a single destination directory.
  7 | 
  8 | # See http://kaldi-asr.org/doc/data_prep.html#data_prep_data for information
  9 | # about what these directories contain.
 10 | 
 11 | # Begin configuration section.
 12 | extra_files= # specify additional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
 13 | skip_fix=false # skip the fix_data_dir.sh in the end
 14 | # End configuration section.
 15 | 
 16 | echo "$0 $@"  # Print the command line for logging
 17 | 
 18 | if [ -f path.sh ]; then . ./path.sh; fi
 19 | . parse_options.sh || exit 1;
 20 | 
 21 | if [ $# -lt 2 ]; then
 22 |   echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
 23 |   echo "Note, files that don't appear in all source dirs will not be combined,"
 24 |   echo "with the exception of utt2uniq and segments, which are created where necessary."
 25 |   exit 1
 26 | fi
 27 | 
 28 | dest=$1;
 29 | shift;
 30 | 
 31 | first_src=$1;
 32 | 
 33 | rm -r $dest 2>/dev/null
 34 | mkdir -p $dest;
 35 | 
 36 | export LC_ALL=C
 37 | 
 38 | for dir in $*; do
 39 |   if [ ! -f $dir/utt2spk ]; then
 40 |     echo "$0: no such file $dir/utt2spk"
 41 |     exit 1;
 42 |   fi
 43 | done
 44 | 
 45 | # W.r.t. utt2uniq file the script has different behavior compared to other files
 46 | # it is not compulsary for it to exist in src directories, but if it exists in
 47 | # even one it should exist in all. We will create the files where necessary
 48 | has_utt2uniq=false
 49 | for in_dir in $*; do
 50 |   if [ -f $in_dir/utt2uniq ]; then
 51 |     has_utt2uniq=true
 52 |     break
 53 |   fi
 54 | done
 55 | 
 56 | if $has_utt2uniq; then
 57 |   # we are going to create an utt2uniq file in the destdir
 58 |   for in_dir in $*; do
 59 |     if [ ! -f $in_dir/utt2uniq ]; then
 60 |       # we assume that utt2uniq is a one to one mapping
 61 |       cat $in_dir/utt2spk | awk '{printf("%s %s\n", $1, $1);}'
 62 |     else
 63 |       cat $in_dir/utt2uniq
 64 |     fi
 65 |   done | sort -k1 > $dest/utt2uniq
 66 |   echo "$0: combined utt2uniq"
 67 | else
 68 |   echo "$0 [info]: not combining utt2uniq as it does not exist"
 69 | fi
 70 | # some of the old scripts might provide utt2uniq as an extrafile, so just remove it
 71 | extra_files=$(echo "$extra_files"|sed -e "s/utt2uniq//g")
 72 | 
 73 | # segments are treated similarly to utt2uniq. If it exists in some, but not all
 74 | # src directories, then we generate segments where necessary.
 75 | has_segments=false
 76 | for in_dir in $*; do
 77 |   if [ -f $in_dir/segments ]; then
 78 |     has_segments=true
 79 |     break
 80 |   fi
 81 | done
 82 | 
 83 | if $has_segments; then
 84 |   for in_dir in $*; do
 85 |     if [ ! -f $in_dir/segments ]; then
 86 |       echo "$0 [info]: will generate missing segments for $in_dir" 1>&2
 87 |       utils/data/get_segments_for_data.sh $in_dir
 88 |     else
 89 |       cat $in_dir/segments
 90 |     fi
 91 |   done | sort -k1 > $dest/segments
 92 |   echo "$0: combined segments"
 93 | else
 94 |   echo "$0 [info]: not combining segments as it does not exist"
 95 | fi
 96 | 
 97 | for file in utt2spk utt2lang utt2dur feats.scp vad.scp ali.scp text cmvn.scp reco2file_and_channel wav.scp spk2gender $extra_files; do
 98 |   exists_somewhere=false
 99 |   absent_somewhere=false
100 |   for d in $*; do
101 |     if [ -f $d/$file ]; then
102 |       exists_somewhere=true
103 |     else
104 |       absent_somewhere=true
105 |       fi
106 |   done
107 | 
108 |   if ! $absent_somewhere; then
109 |     set -o pipefail
110 |     ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
111 |     set +o pipefail
112 |     echo "$0: combined $file"
113 |   else
114 |     if ! $exists_somewhere; then
115 |       echo "$0 [info]: not combining $file as it does not exist"
116 |     else
117 |       echo "$0 [info]: **not combining $file as it does not exist everywhere**"
118 |     fi
119 |   fi
120 | done
121 | 
122 | utils/utt2spk_to_spk2utt.pl <$dest/utt2spk >$dest/spk2utt
123 | 
124 | if ! $skip_fix ; then
125 |   utils/fix_data_dir.sh $dest || exit 1;
126 | fi
127 | 
128 | exit 0
129 | 


--------------------------------------------------------------------------------
/utils/create_data_link.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | # Copyright 2013  Guoguo Chen
  4 | #           2014  Johns Hopkins University (author: Daniel Povey)
  5 | # Apache 2.0.
  6 | #
  7 | # This script distributes data onto different file systems by making symbolic
  8 | # links. It is supposed to use together with utils/create_split_dir.pl, which
  9 | # creates a "storage" directory that links to different file systems.
 10 | #
 11 | # If a sub-directory egs/storage does not exist, it does nothing. If it exists,
 12 | # then it selects pseudo-randomly a number from those available in egs/storage/*
 13 | # creates a link such as
 14 | #
 15 | #   egs/egs.3.4.ark -> storage/4/egs.3.4.ark
 16 | #
 17 | use strict;
 18 | use warnings;
 19 | use File::Basename;
 20 | use File::Spec;
 21 | use Getopt::Long;
 22 | 
 23 | sub GetGCD {
 24 |   my ($a, $b) = @_;
 25 |   while ($a != $b) {
 26 |     if ($a > $b) {
 27 |       $a = $a - $b;
 28 |     } else {
 29 |       $b = $b - $a;
 30 |     }
 31 |   }
 32 |   return $a;
 33 | }
 34 | 
 35 | my $Usage = <<EOU;
 36 | create_data_link.pl:
 37 | This script distributes data onto different file systems by making symbolic
 38 | links. It is supposed to use together with utils/create_split_dir.pl, which
 39 | creates a "storage" directory that links to different file systems.
 40 | 
 41 | If a sub-directory foo/storage does not exist, it does nothing. If it exists,
 42 | then it selects pseudo-randomly a number from those available in foo/storage/*
 43 | creates a link such as
 44 | 
 45 |   foo/egs.3.4.ark -> storage/4/egs.3.4.ark
 46 | 
 47 | Usage: utils/create_data_link.pl <data-archive1> [<data-archive2> ... ]
 48 |  e.g.: utils/create_data_link.pl foo/bar/egs.3.4.ark foo/bar/egs.3.5.ark
 49 |  (note: the dirname, e.g. foo/bar/, must be the same in all cases).
 50 | 
 51 | See also utils/remove_data_links.sh
 52 | EOU
 53 | 
 54 | GetOptions();
 55 | 
 56 | if (@ARGV == 0) {
 57 |   die $Usage;
 58 | }
 59 | 
 60 | my $example_fullpath = $ARGV[0];
 61 | 
 62 | # Check if the storage has been created. If so, do nothing.
 63 | my $dirname = dirname($example_fullpath);
 64 | if (! -d "$dirname/storage") {
 65 |   exit(0);
 66 | }
 67 | 
 68 | # Storage exists, create symbolic links in the next few steps.
 69 | 
 70 | # First, get a list of the available storage directories, and check if they are
 71 | # properly created.
 72 | opendir(my $dh, "$dirname/storage/") || die "$0: Fail to open $dirname/storage/\n";
 73 | my @storage_dirs = grep(/^[0-9]*$/, readdir($dh));
 74 | closedir($dh);
 75 | my $num_storage = scalar(@storage_dirs);
 76 | for (my $x = 1; $x <= $num_storage; $x++) {
 77 |   (-d "$dirname/storage/$x") || die "$0: $dirname/storage/$x does not exist\n";
 78 | }
 79 | 
 80 | # Second, get the coprime list.
 81 | my @coprimes;
 82 | for (my $n = 1; $n < $num_storage; $n++) {
 83 |   if (GetGCD($n, $num_storage) == 1) {
 84 |     push(@coprimes, $n);
 85 |   }
 86 | }
 87 | 
 88 | my $ret = 0;
 89 | 
 90 | foreach my $fullpath (@ARGV) {
 91 |   if ($dirname ne dirname($fullpath)) {
 92 |     die "Mismatch in directory names of arguments: $example_fullpath versus $fullpath";
 93 |   }
 94 | 
 95 |   # Finally, work out the directory index where we should put the data to.
 96 |   my $basename = basename($fullpath);
 97 |   my $filename_numbers = $basename;
 98 |   $filename_numbers =~ s/[^0-9]+/ /g;
 99 |   my @filename_numbers = split(" ", $filename_numbers);
100 |   my $total = 0;
101 |   my $index = 0;
102 |   foreach my $x (@filename_numbers) {
103 |     if ($index >= scalar(@coprimes)) {
104 |       $index = 0;
105 |     }
106 |     $total += $x * $coprimes[$index];
107 |     $index++;
108 |   }
109 |   my $dir_index = $total % $num_storage + 1;
110 | 
111 |   # Make the symbolic link.
112 |   if (-e $fullpath) {
113 |     unlink($fullpath);
114 |   }
115 |   if (symlink("storage/$dir_index/$basename", $fullpath) != 1) { # failure
116 |     $ret = 1;  # will exit with error status.
117 |   }
118 | }
119 | 
120 | exit($ret);
121 | 
122 | ## testing:
123 | # rm -rf foo bar
124 | # mkdir -p bar/{1,2,3,4}
125 | # mkdir -p foo/storage
126 | # for x in 1 2 3 4; do ln -s ../../bar/$x foo/storage/$x; done
127 | # utils/create_data_link.pl utils/create_data_link.pl foo/1.3.ark  foo/2.3.ark
128 | # ls -l foo
129 | # total 0
130 | # lrwxrwxrwx 1 dpovey fax 17 Sep  2 17:41 1.3.ark -> storage/3/1.3.ark
131 | # lrwxrwxrwx 1 dpovey fax 17 Sep  2 17:41 2.3.ark -> storage/4/2.3.ark
132 | # drwxr-xr-x 2 dpovey fax 38 Sep  2 17:40 storage
133 | 


--------------------------------------------------------------------------------
/utils/nnet/make_lstm_proto.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2015-2016  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # Generated Nnet prototype, to be initialized by 'nnet-initialize'.
19 | 
20 | import sys
21 | 
22 | ###
23 | ### Parse options
24 | ###
25 | from optparse import OptionParser
26 | usage="%prog [options] <feat-dim> <num-leaves> >nnet-proto-file"
27 | parser = OptionParser(usage)
28 | # Required,
29 | parser.add_option('--cell-dim', dest='cell_dim', type='int', default=320,
30 |                    help='Number of cells for one direction in LSTM [default: %default]');
31 | parser.add_option('--proj-dim', dest='proj_dim', type='int', default=400,
32 |                    help='Number of LSTM recurrent units [default: %default]');
33 | parser.add_option('--num-layers', dest='num_layers', type='int', default=2,
34 |                    help='Number of LSTM layers [default: %default]');
35 | # Optional (default == 'None'),
36 | parser.add_option('--lstm-param-range', dest='lstm_param_range', type='float',
37 |                    help='Range of initial LSTM parameters [default: %default]');
38 | parser.add_option('--param-stddev', dest='param_stddev', type='float',
39 |                    help='Standard deviation for initial weights of Softmax layer [default: %default]');
40 | parser.add_option('--cell-clip', dest='cell_clip', type='float',
41 |                    help='Clipping cell values during propagation (per-frame) [default: %default]');
42 | parser.add_option('--diff-clip', dest='diff_clip', type='float',
43 |                    help='Clipping partial-derivatives during BPTT (per-frame) [default: %default]');
44 | parser.add_option('--cell-diff-clip', dest='cell_diff_clip', type='float',
45 |                    help='Clipping partial-derivatives of "cells" during BPTT (per-frame, those accumulated by CEC) [default: %default]');
46 | parser.add_option('--grad-clip', dest='grad_clip', type='float',
47 |                    help='Clipping the accumulated gradients (per-updates) [default: %default]');
48 | #
49 | 
50 | (o,args) = parser.parse_args()
51 | if len(args) != 2 :
52 |   parser.print_help()
53 |   sys.exit(1)
54 | 
55 | (feat_dim, num_leaves) = map(int,args);
56 | 
57 | # Original prototype from Jiayu,
58 | #<NnetProto>
59 | #<Transmit> <InputDim> 40 <OutputDim> 40
60 | #<LstmProjectedStreams> <InputDim> 40 <OutputDim> 512 <CellDim> 800 <ParamScale> 0.01 <NumStream> 4
61 | #<AffineTransform> <InputDim> 512 <OutputDim> 8000 <BiasMean> 0.000000 <BiasRange> 0.000000 <ParamStddev> 0.04
62 | #<Softmax> <InputDim> 8000 <OutputDim> 8000
63 | #</NnetProto>
64 | 
65 | lstm_extra_opts=""
66 | if None != o.lstm_param_range: lstm_extra_opts += "<ParamRange> %f "   % o.lstm_param_range
67 | if None != o.cell_clip:        lstm_extra_opts += "<CellClip> %f "     % o.cell_clip
68 | if None != o.diff_clip:        lstm_extra_opts += "<DiffClip> %f "     % o.diff_clip
69 | if None != o.cell_diff_clip:   lstm_extra_opts += "<CellDiffClip> %f " % o.cell_diff_clip
70 | if None != o.grad_clip:        lstm_extra_opts += "<GradClip> %f "     % o.grad_clip
71 | 
72 | softmax_affine_opts=""
73 | if None != o.param_stddev:     softmax_affine_opts += "<ParamStddev> %f " % o.param_stddev
74 | 
75 | # The LSTM layers,
76 | print "<LstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (feat_dim, o.proj_dim, o.cell_dim) + lstm_extra_opts
77 | for l in range(o.num_layers - 1):
78 |   print "<LstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (o.proj_dim, o.proj_dim, o.cell_dim) + lstm_extra_opts
79 | 
80 | # Adding <Tanh> for more stability,
81 | print "<Tanh> <InputDim> %d <OutputDim> %d" % (o.proj_dim, o.proj_dim)
82 | 
83 | # Softmax layer,
84 | print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0" % (o.proj_dim, num_leaves) + softmax_affine_opts
85 | print "<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves)
86 | 
87 | 


--------------------------------------------------------------------------------
/utils/lang/internal/modify_unk_pron.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright 2016  Johns Hopkins University (Author: Daniel Povey)
  4 | # Apache 2.0.
  5 | 
  6 | from __future__ import print_function
  7 | import sys
  8 | import os
  9 | import argparse
 10 | from collections import defaultdict
 11 | 
 12 | # note, this was originally based
 13 | 
 14 | parser = argparse.ArgumentParser(description="""
 15 | This script replaces the existing pronunciation of the
 16 | unknown word in the provided lexicon, with a pronunciation
 17 | consisting of three disambiguation symbols: #1 followed by #2
 18 | followed by #3.
 19 | The #2 will later be replaced by a phone-level LM by
 20 | apply_unk_lm.sh (called later on by prepare_lang.sh).
 21 | Caution: this script is sensitive to the basename of the
 22 | lexicon: it should be called either lexiconp.txt, in which
 23 | case the format is 'word pron-prob p1 p2 p3 ...'
 24 | or lexiconp_silprob.txt, in which case the format is
 25 | 'word pron-prob sil-prob1 sil-prob2 sil-prob3 p1 p2 p3....'.
 26 | It is an error if there is not exactly one pronunciation of
 27 | the unknown word in the lexicon.""",
 28 | epilog="""E.g.: modify_unk_pron.py data/local/lang/lexiconp.txt '<unk>'.
 29 | This script is called from prepare_lang.sh.""")
 30 | 
 31 | parser.add_argument('lexicon_file', type = str,
 32 |                     help = 'Filename of the lexicon file to operate on (this is '
 33 |                     'both an input and output of this script).')
 34 | parser.add_argument('unk_word', type = str,
 35 |                     help = "The printed form of the unknown/OOV word, normally '<unk>'.")
 36 | 
 37 | args = parser.parse_args()
 38 | 
 39 | if len(args.unk_word.split()) != 1:
 40 |     sys.exit("{0}: invalid unknown-word '{1}'".format(
 41 |         sys.argv[0], args.unk_word))
 42 | 
 43 | basename = os.path.basename(args.lexicon_file)
 44 | if basename != 'lexiconp.txt' and basename != 'lexiconp_silprob.txt':
 45 |     sys.exit("{0}: expected the basename of the lexicon file to be either "
 46 |              "'lexiconp.txt' or 'lexiconp_silprob.txt', got: {1}".format(
 47 |                  sys.argv[0], args.lexicon_file))
 48 | # the lexiconp.txt format is: word pron-prob p1 p2 p3...
 49 | # lexiconp_silprob.txt has 3 extra real-valued fields after the pron-prob.
 50 | num_fields_before_pron = 2 if basename == 'lexiconp.txt' else 5
 51 | 
 52 | print(' '.join(sys.argv), file = sys.stderr)
 53 | 
 54 | try:
 55 |     lexicon_in = open(args.lexicon_file, 'r')
 56 | except:
 57 |     sys.exit("{0}: failed to open lexicon file {1}".format(
 58 |         sys.argv[0], args.lexicon_file))
 59 | 
 60 | split_lines = []
 61 | unk_index = -1
 62 | while True:
 63 |     line = lexicon_in.readline()
 64 |     if line == '':
 65 |         break
 66 |     this_split_line = line.split()
 67 |     if this_split_line[0] == args.unk_word:
 68 |         if unk_index != -1:
 69 |             sys.exit("{0}: expected there to be exactly one pronunciation of the "
 70 |                      "unknown word {1} in {2}, but there are more than one.".format(
 71 |                          sys.argv[0], args.lexicon_file, args.unk_word))
 72 |         unk_index = len(split_lines)
 73 |     if len(this_split_line) <= num_fields_before_pron:
 74 |         sys.exit("{0}: input file {1} had a bad line (too few fields): {2}".format(
 75 |             sys.argv[0], args.lexicon_file, line[:-1]))
 76 |     split_lines.append(this_split_line)
 77 | 
 78 | if len(split_lines) == 0:
 79 |     sys.exit("{0}: read no data from lexicon file {1}.".format(
 80 |         sys.argv[0], args.lexicon_file))
 81 | 
 82 | 
 83 | if unk_index == -1:
 84 |     sys.exit("{0}: expected there to be exactly one pronunciation of the "
 85 |              "unknown word {1} in {2}, but there are none.".format(
 86 |                  sys.argv[0], args.unk_word, args.lexicon_file))
 87 | 
 88 | lexicon_in.close()
 89 | 
 90 | # now modify the pron.
 91 | split_lines[unk_index] = split_lines[unk_index][0:num_fields_before_pron] + [ '#1', '#2', '#3' ]
 92 | 
 93 | 
 94 | try:
 95 |     # write to the same file.
 96 |     lexicon_out = open(args.lexicon_file, 'w')
 97 | except:
 98 |     sys.exit("{0}: failed to open lexicon file {1} for writing (permissions probleM?)".format(
 99 |         sys.argv[0], args.lexicon_file))
100 | 
101 | for split_line in split_lines:
102 |     print(' '.join(split_line), file = lexicon_out)
103 | 
104 | try:
105 |     lexicon_out.close()
106 | except:
107 |     sys.exit("{0}: failed to close lexicon file {1} after writing (disk full?)".format(
108 |         sys.argv[0], args.lexicon_file))
109 | 


--------------------------------------------------------------------------------
/utils/copy_data_dir.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
  4 | # Apache 2.0
  5 | 
  6 | # This script operates on a directory, such as in data/train/,
  7 | # that contains some subset of the following files:
  8 | #  feats.scp
  9 | #  wav.scp
 10 | #  spk2utt
 11 | #  utt2spk
 12 | #  text
 13 | #
 14 | # It copies to another directory, possibly adding a specified prefix or a suffix
 15 | # to the utterance and/or speaker names.  Note, the recording-ids stay the same.
 16 | #
 17 | 
 18 | 
 19 | # begin configuration section
 20 | spk_prefix=
 21 | utt_prefix=
 22 | spk_suffix=
 23 | utt_suffix=
 24 | validate_opts=   # should rarely be needed.
 25 | # end configuration section
 26 | 
 27 | . utils/parse_options.sh
 28 | 
 29 | if [ $# != 2 ]; then
 30 |   echo "Usage: "
 31 |   echo "  $0 [options] <srcdir> <destdir>"
 32 |   echo "e.g.:"
 33 |   echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
 34 |   echo "Options"
 35 |   echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
 36 |   echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
 37 |   echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
 38 |   echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
 39 |   exit 1;
 40 | fi
 41 | 
 42 | 
 43 | export LC_ALL=C
 44 | 
 45 | srcdir=$1
 46 | destdir=$2
 47 | 
 48 | if [ ! -f $srcdir/utt2spk ]; then
 49 |   echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
 50 |   exit 1;
 51 | fi
 52 | 
 53 | if [ "$destdir" == "$srcdir" ]; then
 54 |   echo "$0: this script requires <srcdir> and <destdir> to be different."
 55 |   exit 1
 56 | fi
 57 | 
 58 | set -e;
 59 | 
 60 | mkdir -p $destdir
 61 | 
 62 | cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
 63 | cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
 64 | 
 65 | if [ ! -f $srcdir/utt2uniq ]; then
 66 |   if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
 67 |     cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
 68 |   fi
 69 | else
 70 |   cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
 71 | fi
 72 | 
 73 | cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
 74 |   utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
 75 | 
 76 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
 77 | 
 78 | if [ -f $srcdir/feats.scp ]; then
 79 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
 80 | fi
 81 | 
 82 | 
 83 | if [ -f $srcdir/segments ]; then
 84 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
 85 |   cp $srcdir/wav.scp $destdir
 86 |   if [ -f $srcdir/reco2file_and_channel ]; then
 87 |     cp $srcdir/reco2file_and_channel $destdir/
 88 |   fi
 89 | else # no segments->wav indexed by utt.
 90 |   if [ -f $srcdir/wav.scp ]; then
 91 |     utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
 92 |   fi
 93 | fi
 94 | 
 95 | if [ -f $srcdir/text ]; then
 96 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
 97 | fi
 98 | if [ -f $srcdir/utt2dur ]; then
 99 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
100 | fi
101 | if [ -f $srcdir/spk2gender ]; then
102 |   utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
103 | fi
104 | if [ -f $srcdir/cmvn.scp ]; then
105 |   utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
106 | fi
107 | for f in stm glm ctm; do
108 |   if [ -f $srcdir/$f ]; then
109 |     cp $srcdir/$f $destdir
110 |   fi
111 | done
112 | 
113 | rm $destdir/spk_map $destdir/utt_map
114 | 
115 | echo "$0: copied data from $srcdir to $destdir"
116 | 
117 | for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel stm glm ctm; do
118 |   if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
119 |     echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
120 |     echo " ... $destdir/.backup/$f"
121 |     mkdir -p $destdir/.backup
122 |     mv $destdir/$f $destdir/.backup/
123 |   fi
124 | done
125 | 
126 | 
127 | [ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
128 | [ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
129 | 
130 | utils/validate_data_dir.sh $validate_opts $destdir
131 | 


--------------------------------------------------------------------------------
/utils/data/copy_data_dir.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
  4 | # Apache 2.0
  5 | 
  6 | # This script operates on a directory, such as in data/train/,
  7 | # that contains some subset of the following files:
  8 | #  feats.scp
  9 | #  wav.scp
 10 | #  spk2utt
 11 | #  utt2spk
 12 | #  text
 13 | #
 14 | # It copies to another directory, possibly adding a specified prefix or a suffix
 15 | # to the utterance and/or speaker names.  Note, the recording-ids stay the same.
 16 | #
 17 | 
 18 | 
 19 | # begin configuration section
 20 | spk_prefix=
 21 | utt_prefix=
 22 | spk_suffix=
 23 | utt_suffix=
 24 | validate_opts=   # should rarely be needed.
 25 | # end configuration section
 26 | 
 27 | . utils/parse_options.sh
 28 | 
 29 | if [ $# != 2 ]; then
 30 |   echo "Usage: "
 31 |   echo "  $0 [options] <srcdir> <destdir>"
 32 |   echo "e.g.:"
 33 |   echo " $0 --spk-prefix=1- --utt-prefix=1- data/train data/train_1"
 34 |   echo "Options"
 35 |   echo "   --spk-prefix=<prefix>     # Prefix for speaker ids, default empty"
 36 |   echo "   --utt-prefix=<prefix>     # Prefix for utterance ids, default empty"
 37 |   echo "   --spk-suffix=<suffix>     # Suffix for speaker ids, default empty"
 38 |   echo "   --utt-suffix=<suffix>     # Suffix for utterance ids, default empty"
 39 |   exit 1;
 40 | fi
 41 | 
 42 | 
 43 | export LC_ALL=C
 44 | 
 45 | srcdir=$1
 46 | destdir=$2
 47 | 
 48 | if [ ! -f $srcdir/utt2spk ]; then
 49 |   echo "copy_data_dir.sh: no such file $srcdir/utt2spk"
 50 |   exit 1;
 51 | fi
 52 | 
 53 | if [ "$destdir" == "$srcdir" ]; then
 54 |   echo "$0: this script requires <srcdir> and <destdir> to be different."
 55 |   exit 1
 56 | fi
 57 | 
 58 | set -e;
 59 | 
 60 | mkdir -p $destdir
 61 | 
 62 | cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/utt_map
 63 | cat $srcdir/spk2utt | awk -v p=$spk_prefix -v s=$spk_suffix '{printf("%s %s%s%s\n", $1, p, $1, s);}' > $destdir/spk_map
 64 | 
 65 | if [ ! -f $srcdir/utt2uniq ]; then
 66 |   if [[ ! -z $utt_prefix  ||  ! -z $utt_suffix ]]; then
 67 |     cat $srcdir/utt2spk | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $1);}' > $destdir/utt2uniq
 68 |   fi
 69 | else
 70 |   cat $srcdir/utt2uniq | awk -v p=$utt_prefix -v s=$utt_suffix '{printf("%s%s%s %s\n", p, $1, s, $2);}' > $destdir/utt2uniq
 71 | fi
 72 | 
 73 | cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
 74 |   utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
 75 | 
 76 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
 77 | 
 78 | if [ -f $srcdir/feats.scp ]; then
 79 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/feats.scp >$destdir/feats.scp
 80 | fi
 81 | 
 82 | 
 83 | if [ -f $srcdir/segments ]; then
 84 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
 85 |   cp $srcdir/wav.scp $destdir
 86 |   if [ -f $srcdir/reco2file_and_channel ]; then
 87 |     cp $srcdir/reco2file_and_channel $destdir/
 88 |   fi
 89 | else # no segments->wav indexed by utt.
 90 |   if [ -f $srcdir/wav.scp ]; then
 91 |     utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp >$destdir/wav.scp
 92 |   fi
 93 | fi
 94 | 
 95 | if [ -f $srcdir/text ]; then
 96 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
 97 | fi
 98 | if [ -f $srcdir/utt2dur ]; then
 99 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2dur >$destdir/utt2dur
100 | fi
101 | if [ -f $srcdir/spk2gender ]; then
102 |   utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
103 | fi
104 | if [ -f $srcdir/cmvn.scp ]; then
105 |   utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/cmvn.scp >$destdir/cmvn.scp
106 | fi
107 | for f in stm glm ctm; do
108 |   if [ -f $srcdir/$f ]; then
109 |     cp $srcdir/$f $destdir
110 |   fi
111 | done
112 | 
113 | rm $destdir/spk_map $destdir/utt_map
114 | 
115 | echo "$0: copied data from $srcdir to $destdir"
116 | 
117 | for f in feats.scp cmvn.scp vad.scp utt2lang utt2uniq utt2dur utt2num_frames text wav.scp reco2file_and_channel stm glm ctm; do
118 |   if [ -f $destdir/$f ] && [ ! -f $srcdir/$f ]; then
119 |     echo "$0: file $f exists in dest $destdir but not in src $srcdir.  Moving it to"
120 |     echo " ... $destdir/.backup/$f"
121 |     mkdir -p $destdir/.backup
122 |     mv $destdir/$f $destdir/.backup/
123 |   fi
124 | done
125 | 
126 | 
127 | [ ! -f $srcdir/feats.scp ] && validate_opts="$validate_opts --no-feats"
128 | [ ! -f $srcdir/text ] && validate_opts="$validate_opts --no-text"
129 | 
130 | utils/validate_data_dir.sh $validate_opts $destdir
131 | 


--------------------------------------------------------------------------------
/utils/perturb_data_dir_speed.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
  4 | #           2014  Tom Ko
  5 | # Apache 2.0
  6 | 
  7 | # This script operates on a directory, such as in data/train/,
  8 | # that contains some subset of the following files:
  9 | #  wav.scp
 10 | #  spk2utt
 11 | #  utt2spk
 12 | #  text
 13 | #
 14 | # It generates the files which are used for perturbing the speed of the original data.
 15 | 
 16 | . utils/parse_options.sh
 17 | 
 18 | if [ $# != 3 ]; then
 19 |   echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
 20 |   echo "e.g.:"
 21 |   echo " $0 0.9 data/train_si284 data/train_si284p"
 22 |   exit 1
 23 | fi
 24 | 
 25 | export LC_ALL=C
 26 | 
 27 | factor=$1
 28 | srcdir=$2
 29 | destdir=$3
 30 | label="sp"
 31 | spk_prefix=$label$factor"-"
 32 | utt_prefix=$label$factor"-"
 33 | 
 34 | #check is sox on the path
 35 | which sox &>/dev/null
 36 | ! [ $? -eq 0 ] && echo "sox: command not found" && exit 1;
 37 | 
 38 | if [ ! -f $srcdir/utt2spk ]; then
 39 |   echo "$0: no such file $srcdir/utt2spk"
 40 |   exit 1;
 41 | fi
 42 | 
 43 | if [ "$destdir" == "$srcdir" ]; then
 44 |   echo "$0: this script requires <srcdir> and <destdir> to be different."
 45 |   exit 1
 46 | fi
 47 | 
 48 | set -e;
 49 | set -o pipefail
 50 | 
 51 | mkdir -p $destdir
 52 | 
 53 | cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/utt_map
 54 | cat $srcdir/spk2utt | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/spk_map
 55 | if [ ! -f $srcdir/utt2uniq ]; then
 56 |   cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s%s %s\n", p, $1, $1);}' > $destdir/utt2uniq
 57 | else
 58 |   cat $srcdir/utt2uniq | awk -v p=$utt_prefix '{printf("%s%s %s\n", p, $1, $2);}' > $destdir/utt2uniq
 59 | fi
 60 | 
 61 | 
 62 | cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
 63 |   utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
 64 | 
 65 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
 66 | 
 67 | if [ -f $srcdir/segments ]; then
 68 |   # also apply the spk_prefix to the recording-ids.
 69 |   cat $srcdir/wav.scp | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/reco_map
 70 | 
 71 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments | \
 72 |     utils/apply_map.pl -f 2 $destdir/reco_map | \
 73 |       awk -v factor=$factor \
 74 |         '{printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);}' >$destdir/segments
 75 | 
 76 |   utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \
 77 |     # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename" 
 78 |     awk -v factor=$factor \
 79 |         '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
 80 |           else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" } 
 81 |           else  {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp
 82 |   if [ -f $srcdir/reco2file_and_channel ]; then
 83 |     utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
 84 |   fi
 85 | 
 86 |   rm $destdir/reco_map 2>/dev/null
 87 | else # no segments->wav indexed by utterance.
 88 |   if [ -f $srcdir/wav.scp ]; then
 89 |     utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \
 90 |      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename" 
 91 |      awk -v factor=$factor \
 92 |        '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
 93 |          else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" } 
 94 |          else {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp
 95 |   fi
 96 | fi
 97 | 
 98 | if [ -f $srcdir/text ]; then
 99 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
100 | fi
101 | if [ -f $srcdir/spk2gender ]; then
102 |   utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
103 | fi
104 | 
105 | if [ ! -f $srcdir/utt2dur ]; then
106 |   # generate utt2dur if it does not exist in srcdir
107 |   utils/data/get_utt2dur.sh $srcdir
108 | fi
109 | 
110 | cat $srcdir/utt2dur | utils/apply_map.pl -f 1 $destdir/utt_map  | \
111 |   awk -v factor=$factor '{print $1, $2/factor;}' >$destdir/utt2dur
112 | 
113 | rm $destdir/spk_map $destdir/utt_map 2>/dev/null
114 | echo "$0: generated speed-perturbed version of data in $srcdir, in $destdir"
115 | utils/validate_data_dir.sh --no-feats $destdir
116 | 


--------------------------------------------------------------------------------
/utils/data/perturb_data_dir_speed.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
  4 | #           2014  Tom Ko
  5 | # Apache 2.0
  6 | 
  7 | # This script operates on a directory, such as in data/train/,
  8 | # that contains some subset of the following files:
  9 | #  wav.scp
 10 | #  spk2utt
 11 | #  utt2spk
 12 | #  text
 13 | #
 14 | # It generates the files which are used for perturbing the speed of the original data.
 15 | 
 16 | . utils/parse_options.sh
 17 | 
 18 | if [ $# != 3 ]; then
 19 |   echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>"
 20 |   echo "e.g.:"
 21 |   echo " $0 0.9 data/train_si284 data/train_si284p"
 22 |   exit 1
 23 | fi
 24 | 
 25 | export LC_ALL=C
 26 | 
 27 | factor=$1
 28 | srcdir=$2
 29 | destdir=$3
 30 | label="sp"
 31 | spk_prefix=$label$factor"-"
 32 | utt_prefix=$label$factor"-"
 33 | 
 34 | #check is sox on the path
 35 | which sox &>/dev/null
 36 | ! [ $? -eq 0 ] && echo "sox: command not found" && exit 1;
 37 | 
 38 | if [ ! -f $srcdir/utt2spk ]; then
 39 |   echo "$0: no such file $srcdir/utt2spk"
 40 |   exit 1;
 41 | fi
 42 | 
 43 | if [ "$destdir" == "$srcdir" ]; then
 44 |   echo "$0: this script requires <srcdir> and <destdir> to be different."
 45 |   exit 1
 46 | fi
 47 | 
 48 | set -e;
 49 | set -o pipefail
 50 | 
 51 | mkdir -p $destdir
 52 | 
 53 | cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/utt_map
 54 | cat $srcdir/spk2utt | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/spk_map
 55 | if [ ! -f $srcdir/utt2uniq ]; then
 56 |   cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s%s %s\n", p, $1, $1);}' > $destdir/utt2uniq
 57 | else
 58 |   cat $srcdir/utt2uniq | awk -v p=$utt_prefix '{printf("%s%s %s\n", p, $1, $2);}' > $destdir/utt2uniq
 59 | fi
 60 | 
 61 | 
 62 | cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map  | \
 63 |   utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk
 64 | 
 65 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
 66 | 
 67 | if [ -f $srcdir/segments ]; then
 68 |   # also apply the spk_prefix to the recording-ids.
 69 |   cat $srcdir/wav.scp | awk -v p=$spk_prefix '{printf("%s %s%s\n", $1, p, $1);}' > $destdir/reco_map
 70 | 
 71 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments | \
 72 |     utils/apply_map.pl -f 2 $destdir/reco_map | \
 73 |       awk -v factor=$factor \
 74 |         '{printf("%s %s %.2f %.2f\n", $1, $2, $3/factor, $4/factor);}' >$destdir/segments
 75 | 
 76 |   utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \
 77 |     # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename" 
 78 |     awk -v factor=$factor \
 79 |         '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
 80 |           else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" } 
 81 |           else  {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp
 82 |   if [ -f $srcdir/reco2file_and_channel ]; then
 83 |     utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel
 84 |   fi
 85 | 
 86 |   rm $destdir/reco_map 2>/dev/null
 87 | else # no segments->wav indexed by utterance.
 88 |   if [ -f $srcdir/wav.scp ]; then
 89 |     utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \
 90 |      # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename" 
 91 |      awk -v factor=$factor \
 92 |        '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"}
 93 |          else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" } 
 94 |          else {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp
 95 |   fi
 96 | fi
 97 | 
 98 | if [ -f $srcdir/text ]; then
 99 |   utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text
100 | fi
101 | if [ -f $srcdir/spk2gender ]; then
102 |   utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender
103 | fi
104 | 
105 | if [ ! -f $srcdir/utt2dur ]; then
106 |   # generate utt2dur if it does not exist in srcdir
107 |   utils/data/get_utt2dur.sh $srcdir
108 | fi
109 | 
110 | cat $srcdir/utt2dur | utils/apply_map.pl -f 1 $destdir/utt_map  | \
111 |   awk -v factor=$factor '{print $1, $2/factor;}' >$destdir/utt2dur
112 | 
113 | rm $destdir/spk_map $destdir/utt_map 2>/dev/null
114 | echo "$0: generated speed-perturbed version of data in $srcdir, in $destdir"
115 | utils/validate_data_dir.sh --no-feats $destdir
116 | 


--------------------------------------------------------------------------------
/utils/data/get_utt2dur.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
  4 | # Apache 2.0
  5 | 
  6 | # This script operates on a data directory, such as in data/train/, and adds the
  7 | # utt2dur file if it does not already exist.  The file 'utt2dur' maps from
  8 | # utterance to the duration of the utterance in seconds.  This script works it
  9 | # out from the 'segments' file, or, if not present, from the wav.scp file (it
 10 | # first tries interrogating the headers, and if this fails, it reads the wave
 11 | # files in entirely.)
 12 | 
 13 | frame_shift=0.01
 14 | 
 15 | . utils/parse_options.sh
 16 | . ./path.sh
 17 | 
 18 | if [ $# != 1 ]; then
 19 |   echo "Usage: $0 [options] <datadir>"
 20 |   echo "e.g.:"
 21 |   echo " $0 data/train"
 22 |   echo " Options:"
 23 |   echo " --frame-shift      # frame shift in seconds. Only relevant when we are"
 24 |   echo "                    # getting duration from feats.scp (default: 0.01). "
 25 |   exit 1
 26 | fi
 27 | 
 28 | export LC_ALL=C
 29 | 
 30 | data=$1
 31 | 
 32 | if [ -s $data/utt2dur ] && \
 33 |   [ $(cat $data/utt2spk | wc -l) -eq $(cat $data/utt2dur | wc -l) ]; then
 34 |   echo "$0: $data/utt2dur already exists with the expected length.  We won't recompute it."
 35 |   exit 0;
 36 | fi
 37 | 
 38 | if [ -f $data/segments ]; then
 39 |   echo "$0: working out $data/utt2dur from $data/segments"
 40 |   cat $data/segments | awk '{len=$4-$3; print $1, len;}' > $data/utt2dur
 41 | elif [ -f $data/wav.scp ]; then
 42 |   echo "$0: segments file does not exist so getting durations from wave files"
 43 | 
 44 |   # if the wav.scp contains only lines of the form
 45 |   # utt1  /foo/bar/sph2pipe -f wav /baz/foo.sph |
 46 |   if cat $data/wav.scp | perl -e '
 47 |      while (<>) { s/\|\s*$/ |/;  # make sure final | is preceded by space.
 48 |              @A = split; if (!($#A == 5 && $A[1] =~ m/sph2pipe$/ &&
 49 |                                $A[2] eq "-f" && $A[3] eq "wav" && $A[5] eq "|")) { exit(1); }
 50 |              $utt = $A[0]; $sphere_file = $A[4];
 51 | 
 52 |              if (!open(F, "<$sphere_file")) { die "Error opening sphere file $sphere_file"; }
 53 |              $sample_rate = -1;  $sample_count = -1;
 54 |              for ($n = 0; $n <= 30; $n++) {
 55 |                 $line = <F>;
 56 |                 if ($line =~ m/sample_rate -i (\d+)/) { $sample_rate = $1; }
 57 |                 if ($line =~ m/sample_count -i (\d+)/) { $sample_count = $1; }
 58 |                 if ($line =~ m/end_head/) { break; }
 59 |              }
 60 |              close(F);
 61 |              if ($sample_rate == -1 || $sample_count == -1) {
 62 |                die "could not parse sphere header from $sphere_file";
 63 |              }
 64 |              $duration = $sample_count * 1.0 / $sample_rate;
 65 |              print "$utt $duration\n";
 66 |      } ' > $data/utt2dur; then
 67 |     echo "$0: successfully obtained utterance lengths from sphere-file headers"
 68 |   else
 69 |     echo "$0: could not get utterance lengths from sphere-file headers, using wav-to-duration"
 70 |     if ! command -v wav-to-duration >/dev/null; then
 71 |       echo  "$0: wav-to-duration is not on your path"
 72 |       exit 1;
 73 |     fi
 74 | 
 75 |     read_entire_file=false
 76 |     if cat $data/wav.scp | grep -q 'sox.*speed'; then
 77 |       read_entire_file=true
 78 |       echo "$0: reading from the entire wav file to fix the problem caused by sox commands with speed perturbation. It is going to be slow."
 79 |       echo "... It is much faster if you call get_utt2dur.sh *before* doing the speed perturbation via e.g. perturb_data_dir_speed.sh or "
 80 |       echo "... perturb_data_dir_speed_3way.sh."
 81 |     fi
 82 | 
 83 |     if ! wav-to-duration --read-entire-file=$read_entire_file scp:$data/wav.scp ark,t:$data/utt2dur 2>&1 | grep -v 'nonzero return status'; then
 84 |       echo "$0: there was a problem getting the durations; moving $data/utt2dur to $data/.backup/"
 85 |       mkdir -p $data/.backup/
 86 |       mv $data/utt2dur $data/.backup/
 87 |     fi
 88 |   fi
 89 | elif [ -f $data/feats.scp ]; then
 90 |   echo "$0: wave file does not exist so getting durations from feats files"
 91 |   feat-to-len scp:$data/feats.scp ark,t:- | awk -v frame_shift=$frame_shift '{print $1, $2*frame_shift;}' >$data/utt2dur
 92 | else
 93 |   echo "$0: Expected $data/wav.scp, $data/segments or $data/feats.scp to exist"
 94 |   exit 1
 95 | fi
 96 | 
 97 | len1=$(cat $data/utt2spk | wc -l)
 98 | len2=$(cat $data/utt2dur | wc -l)
 99 | if [ "$len1" != "$len2" ]; then
100 |   echo "$0: warning: length of utt2dur does not equal that of utt2spk, $len2 != $len1"
101 |   if [ $len1 -gt $[$len2*2] ]; then
102 |     echo "$0: less than half of utterances got a duration: failing."
103 |     exit 1
104 |   fi
105 | fi
106 | 
107 | echo "$0: computed $data/utt2dur"
108 | 
109 | exit 0
110 | 


--------------------------------------------------------------------------------
/utils/lang/make_phone_bigram_lang.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Apache 2.0.  Copyright 2012, Johns Hopkins University (author: Daniel Povey)
  4 | 
  5 | # This script creates a "lang" directory of the "testing" type (including G.fst)
  6 | # given an existing "alignment" directory and an existing "lang" directory.
  7 | # The directory contains only single-phone words, and a bigram language model that
  8 | # is built without smoothing, on top of single phones.  The point of no smoothing
  9 | # is to limit the number of transitions, so we can decode reasonably fast, and the
 10 | # graph won't blow up.  This is probably going to be most useful for things like
 11 | # language-id.
 12 | 
 13 | 
 14 | # We might later have options here; if not, I'll emove this.
 15 | 
 16 | echo "$0 $@"  # Print the command line for logging
 17 | 
 18 | [ -f ./path.sh ] && . ./path.sh; # source the path.
 19 | . parse_options.sh || exit 1;
 20 | 
 21 | 
 22 | if [ $# != 3 ]; then
 23 |   echo "Usage: $0: [options] <lang-dir> <ali-dir> <output-lang-dir>"
 24 |   echo "e.g.: $0: data/lang exp/tri3b_ali data/lang_phone_bg"
 25 |   exit 1;
 26 | fi
 27 | 
 28 | lang=$1
 29 | alidir=$2
 30 | lang_out=$3
 31 | 
 32 | for f in $lang/phones.txt $alidir/ali.1.gz; do
 33 |   [ ! -f $f ] && echo "Expected file $f to exist" && exit 1;
 34 | done
 35 | 
 36 | mkdir -p $lang_out || exit 1;
 37 | 
 38 | grep -v '#' $lang/phones.txt >  $lang_out/phones.txt # no disambig symbols
 39 |       # needed; G and L . G will be deterministic.
 40 | cp $lang/topo $lang_out
 41 | rm -r $lang_out/phones 2>/dev/null
 42 | cp -r $lang/phones/ $lang_out/
 43 | rm $lang_out/phones/word_boundary.* 2>/dev/null # these would
 44 |   # no longer be valid.
 45 | rm $lang_out/phones/wdisambig* 2>/dev/null  # ditto this.
 46 | 
 47 | # List of disambig symbols will be empty: not needed, since G.fst and L.fst * G.fst
 48 | # are determinizable without any.
 49 | echo -n > $lang_out/phones/disambig.txt
 50 | echo -n > $lang_out/phones/disambig.int
 51 | echo -n > $lang_out/phones/disambig.csl
 52 | echo -n > $lang_out/phones/wdisambig.txt
 53 | echo -n > $lang_out/phones/wdisambig_phones.int
 54 | echo -n > $lang_out/phones/wdisambig_words.int
 55 | 
 56 | # Let OOV symbol be the first phone.  This is arbitrary, it's just
 57 | # so that validate_lang.pl succeeds.  We should never actually use
 58 | # this.
 59 | oov_sym=$(tail -n +2 $lang_out/phones.txt | head -n 1 | awk '{print $1}')
 60 | oov_int=$(tail -n +2 $lang_out/phones.txt | head -n 1 | awk '{print $2}')
 61 | echo $oov_sym > $lang_out/oov.txt
 62 | echo $oov_int > $lang_out/oov.int
 63 | 
 64 | 
 65 | # Get phone-level transcripts of training data and create a
 66 | # language model.
 67 | ali-to-phones $alidir/final.mdl "ark:gunzip -c $alidir/ali.*.gz|" ark,t:- | \
 68 |   perl -e 'while(<>) {
 69 |     @A = split(" ", $_);
 70 |     shift @A; # Remove the utterance-id.
 71 |     foreach $p ( @A ) { $phones{$p} = 1; } # assoc. array of phones.
 72 |     unshift @A, "<s>";
 73 |     push @A, "</s>";
 74 |     for ($n = 0; $n+1 < @A; $n++) {
 75 |       $p = $A[$n]; $q = $A[$n+1];
 76 |       $count{$p,$q}++;
 77 |       $histcount{$p}++;
 78 |     }
 79 |   }
 80 |   @phones = keys %phones;
 81 |   unshift @phones, "<s>";
 82 |   # @phones is now all real phones, plus <s>.
 83 |   for ($n = 0; $n < @phones; $n++) {
 84 |     $phn2state{$phones[$n]} = $n;
 85 |   }
 86 |   foreach $p (@phones) {
 87 |     $src = $phn2state{$p};
 88 |     $hist = $histcount{$p};
 89 |     $hist > 0 || die;
 90 |     foreach $q (@phones) {
 91 |       $c = $count{$p,$q};
 92 |       if (defined $c) {
 93 |         $cost = -log($c / $hist); # cost on FST arc.
 94 |         $dest = $phn2state{$q};
 95 |         print "$src $dest $q $cost\n";  # Note: q is actually numeric.
 96 |       }
 97 |     }
 98 |     $c = $count{$p,"</s>"};
 99 |     if (defined $c) {
100 |       $cost = -log($c / $hist); # cost on FST arc.
101 |       print "$src $cost\n"; # final-prob.
102 |     }
103 |   } ' | fstcompile --acceptor=true | \
104 |     fstarcsort --sort_type=ilabel > $lang_out/G.fst
105 | 
106 | # symbols for phones and words are the same.
107 | # Neither has disambig symbols.
108 | cp $lang_out/phones.txt $lang_out/words.txt
109 | 
110 | grep -v '<eps>' $lang_out/phones.txt | awk '{printf("0 0 %s %s\n", $2, $2);} END{print("0 0.0");}' | \
111 |    fstcompile  > $lang_out/L.fst
112 | 
113 | # note: first two fields of align_lexicon.txt are interpreted as the word; the remaining
114 | # fields are the phones that are in the pron of the word.  These are all the same, for us.
115 | for p in $(grep -v '<eps>' $lang_out/phones.txt | awk '{print $1}'); do echo $p $p $p; done > $lang_out/phones/align_lexicon.txt
116 | 
117 | # just use one sym2int.pl command, since phones.txt and words.txt are identical.
118 | utils/sym2int.pl $lang_out/phones.txt <$lang_out/phones/align_lexicon.txt >$lang_out/phones/align_lexicon.int
119 | 
120 | # L and L_disambig are the same.
121 | cp $lang_out/L.fst $lang_out/L_disambig.fst
122 | 
123 | utils/validate_lang.pl --skip-disambig-check $lang_out || exit 1;
124 | 


--------------------------------------------------------------------------------
/utils/data/internal/modify_speaker_info.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from __future__ import print_function
  4 | import argparse, sys,os
  5 | from collections import defaultdict
  6 | parser = argparse.ArgumentParser(description="""
  7 | Combine consecutive utterances into fake speaker ids for a kind of
  8 | poor man's segmentation.  Reads old utt2spk from standard input,
  9 | outputs new utt2spk to standard output.""")
 10 | parser.add_argument("--utts-per-spk-max", type = int, required = True,
 11 |                     help="Maximum number of utterances allowed per speaker")
 12 | parser.add_argument("--seconds-per-spk-max", type = float, required = True,
 13 |                     help="""Maximum duration in seconds allowed per speaker.
 14 |                          If this option is >0, --utt2dur option must be provided.""")
 15 | parser.add_argument("--utt2dur", type = str,
 16 |                     help="""Filename of input 'utt2dur' file (needed only if
 17 |                     --seconds-per-spk-max is provided)""")
 18 | parser.add_argument("--respect-speaker-info", type = str, default = 'true',
 19 |                     choices = ['true', 'false'],
 20 |                     help="""If true, the output speakers will be split from "
 21 |                     "existing speakers.""")
 22 | 
 23 | args = parser.parse_args()
 24 | 
 25 | utt2spk = dict()
 26 | # an undefined spk2utt entry will default to an empty list.
 27 | spk2utt = defaultdict(lambda: [])
 28 | 
 29 | while True:
 30 |     line = sys.stdin.readline()
 31 |     if line == '':
 32 |         break;
 33 |     a = line.split()
 34 |     if len(a) != 2:
 35 |         sys.exit("modify_speaker_info.py: bad utt2spk line from standard input (expected two fields): " +
 36 |                  line)
 37 |     [ utt, spk ] = a
 38 |     utt2spk[utt] = spk
 39 |     spk2utt[spk].append(utt)
 40 | 
 41 | if args.seconds_per_spk_max > 0:
 42 |     utt2dur = dict()
 43 |     try:
 44 |         f = open(args.utt2dur)
 45 |         while True:
 46 |             line = f.readline()
 47 |             if line == '':
 48 |                 break
 49 |             a = line.split()
 50 |             if len(a) != 2:
 51 |                 sys.exit("modify_speaker_info.py: bad utt2dur line from standard input (expected two fields): " +
 52 |                          line)
 53 |             [ utt, dur ] = a
 54 |             utt2dur[utt] = float(dur)
 55 |         for utt in utt2spk:
 56 |             if not utt in utt2dur:
 57 |                 sys.exit("modify_speaker_info.py: utterance {0} not in utt2dur file {1}".format(
 58 |                         utt, args.utt2dur))
 59 |     except Exception as e:
 60 |         sys.exit("modify_speaker_info.py: problem reading utt2dur info: " + str(e))
 61 | 
 62 | # splits a list of utts into a list of lists, based on constraints from the
 63 | # command line args.  Note: the last list will tend to be shorter than the others,
 64 | # we make no attempt to fix this.
 65 | def SplitIntoGroups(uttlist):
 66 |     ans = [] # list of lists.
 67 |     cur_uttlist = []
 68 |     cur_dur = 0.0
 69 |     for utt in uttlist:
 70 |         if ((args.utts_per_spk_max > 0 and len(cur_uttlist) == args.utts_per_spk_max) or
 71 |             (args.seconds_per_spk_max > 0 and len(cur_uttlist) > 0 and
 72 |              cur_dur + utt2dur[utt] > args.seconds_per_spk_max)):
 73 |             ans.append(cur_uttlist)
 74 |             cur_uttlist = []
 75 |             cur_dur = 0.0
 76 |         cur_uttlist.append(utt)
 77 |         if args.seconds_per_spk_max > 0:
 78 |             cur_dur += utt2dur[utt]
 79 |     if len(cur_uttlist) > 0:
 80 |         ans.append(cur_uttlist)
 81 |     return ans
 82 | 
 83 | 
 84 | # This function will return '%01d' if d < 10, '%02d' if d < 100, and so on.
 85 | # It's for printf printing of numbers in such a way that sorted order will be
 86 | # correct.
 87 | def GetFormatString(d):
 88 |     ans = 1
 89 |     while (d >= 10):
 90 |         d //= 10  # integer division
 91 |         ans += 1
 92 |     # e.g. we might return the string '%01d' or '%02d'
 93 |     return '%0{0}d'.format(ans)
 94 | 
 95 | 
 96 | if args.respect_speaker_info == 'true':
 97 |     for spk in sorted(spk2utt.keys()):
 98 |         uttlists = SplitIntoGroups(spk2utt[spk])
 99 |         format_string = '%s-' + GetFormatString(len(uttlists))
100 |         for i in range(len(uttlists)):
101 |             # the following might look like: '%s-%02d'.format('john_smith' 9 + 1),
102 |             # giving 'john_smith-10'.
103 |             this_spk = format_string % (spk, i + 1)
104 |             for utt in uttlists[i]:
105 |                 print(utt, this_spk)
106 | else:
107 |     uttlists = SplitIntoGroups(sorted(utt2spk.keys()))
108 |     format_string = 'speaker-' + GetFormatString(len(uttlists))
109 |     for i in range(len(uttlists)):
110 |         # the following might look like: 'speaker-%04d'.format(105 + 1),
111 |         # giving 'speaker-0106'.
112 |         this_spk = format_string % (i + 1)
113 |         for utt in uttlists[i]:
114 |             print(utt, this_spk)
115 | 
116 | 


--------------------------------------------------------------------------------
/utils/nnet/make_blstm_proto.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright 2015-2016  Brno University of Technology (author: Karel Vesely)
 4 | 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #  http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
12 | # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
13 | # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
14 | # MERCHANTABLITY OR NON-INFRINGEMENT.
15 | # See the Apache 2 License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | # Generated Nnet prototype, to be initialized by 'nnet-initialize'.
19 | 
20 | import sys
21 | 
22 | ###
23 | ### Parse options
24 | ###
25 | from optparse import OptionParser
26 | usage="%prog [options] <feat-dim> <num-leaves> >nnet-proto-file"
27 | parser = OptionParser(usage)
28 | # Required,
29 | parser.add_option('--cell-dim', dest='cell_dim', type='int', default=320,
30 |                    help='Number of cells for one direction in BLSTM [default: %default]');
31 | parser.add_option('--proj-dim', dest='proj_dim', type='int', default=200,
32 |                    help='Dim reduction for one direction in BLSTM [default: %default]');
33 | parser.add_option('--proj-dim-last', dest='proj_dim_last', type='int', default=320,
34 |                    help='Dim reduction for one direction in BLSTM (last BLSTM component) [default: %default]');
35 | parser.add_option('--num-layers', dest='num_layers', type='int', default=2,
36 |                    help='Number of BLSTM layers [default: %default]');
37 | # Optional (default == 'None'),
38 | parser.add_option('--lstm-param-range', dest='lstm_param_range', type='float',
39 |                    help='Range of initial BLSTM parameters [default: %default]');
40 | parser.add_option('--param-stddev', dest='param_stddev', type='float',
41 |                    help='Standard deviation for initial weights of Softmax layer [default: %default]');
42 | parser.add_option('--cell-clip', dest='cell_clip', type='float',
43 |                    help='Clipping cell values during propagation (per-frame) [default: %default]');
44 | parser.add_option('--diff-clip', dest='diff_clip', type='float',
45 |                    help='Clipping partial-derivatives during BPTT (per-frame) [default: %default]');
46 | parser.add_option('--cell-diff-clip', dest='cell_diff_clip', type='float',
47 |                    help='Clipping partial-derivatives of "cells" during BPTT (per-frame, those accumulated by CEC) [default: %default]');
48 | parser.add_option('--grad-clip', dest='grad_clip', type='float',
49 |                    help='Clipping the accumulated gradients (per-updates) [default: %default]');
50 | #
51 | 
52 | (o,args) = parser.parse_args()
53 | if len(args) != 2 :
54 |   parser.print_help()
55 |   sys.exit(1)
56 | 
57 | (feat_dim, num_leaves) = map(int,args);
58 | 
59 | # Original prototype from Jiayu,
60 | #<NnetProto>
61 | #<Transmit> <InputDim> 40 <OutputDim> 40
62 | #<LstmProjectedStreams> <InputDim> 40 <OutputDim> 512 <CellDim> 800 <ParamScale> 0.01 <NumStream> 4
63 | #<AffineTransform> <InputDim> 512 <OutputDim> 8000 <BiasMean> 0.000000 <BiasRange> 0.000000 <ParamStddev> 0.04
64 | #<Softmax> <InputDim> 8000 <OutputDim> 8000
65 | #</NnetProto>
66 | 
67 | lstm_extra_opts=""
68 | if None != o.lstm_param_range: lstm_extra_opts += "<ParamRange> %f "   % o.lstm_param_range
69 | if None != o.cell_clip:        lstm_extra_opts += "<CellClip> %f "     % o.cell_clip
70 | if None != o.diff_clip:        lstm_extra_opts += "<DiffClip> %f "     % o.diff_clip
71 | if None != o.cell_diff_clip:   lstm_extra_opts += "<CellDiffClip> %f " % o.cell_diff_clip
72 | if None != o.grad_clip:        lstm_extra_opts += "<GradClip> %f "     % o.grad_clip
73 | 
74 | softmax_affine_opts=""
75 | if None != o.param_stddev:     softmax_affine_opts += "<ParamStddev> %f " % o.param_stddev
76 | 
77 | # The BLSTM layers,
78 | if o.num_layers == 1:
79 |   # Single BLSTM,
80 |   print "<BlstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (feat_dim, 2*o.proj_dim_last, o.cell_dim) + lstm_extra_opts
81 | else:
82 |   # >1 BLSTM,
83 |   print "<BlstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (feat_dim, 2*o.proj_dim, o.cell_dim) + lstm_extra_opts
84 |   for l in range(o.num_layers - 2):
85 |     print "<BlstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (2*o.proj_dim, 2*o.proj_dim, o.cell_dim) + lstm_extra_opts
86 |   print "<BlstmProjected> <InputDim> %d <OutputDim> %d <CellDim> %s" % (2*o.proj_dim, 2*o.proj_dim_last, o.cell_dim) + lstm_extra_opts
87 | 
88 | # Adding <Tanh> for more stability,
89 | print "<Tanh> <InputDim> %d <OutputDim> %d" % (2*o.proj_dim_last, 2*o.proj_dim_last)
90 | 
91 | # Softmax layer,
92 | print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0" % (2*o.proj_dim_last, num_leaves) + softmax_affine_opts
93 | print "<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves)
94 | 
95 | 


--------------------------------------------------------------------------------
/utils/data/modify_speaker_info.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2013-2016  Johns Hopkins University (author: Daniel Povey)
  4 | # Apache 2.0
  5 | 
  6 | # This script copies a data directory (like utils/copy_data.sh) while
  7 | # modifying (splitting or merging) the speaker information in that data directory.
  8 | #
  9 | # This is done without looking at the data at all; we use only duration
 10 | # constraints and maximum-num-utts-per-speaker to assign contiguous
 11 | # sets of utterances to speakers.
 12 | #
 13 | # This has two general uses:
 14 | # (1) when dumping iVectors for training purposes, it's helpful to have
 15 | #   a good variety of iVectors, and this can be accomplished by splitting
 16 | #   speakers up into multiple copies of those speakers.  We typically
 17 | #   use the --utts-per-spk-max 2 option for this.
 18 | # (2) when dealing with data that is not diarized, and given that we
 19 | #   haven't checked any diarization scripts into Kaldi yet, this
 20 | #   script can do a "dumb" diarization that just groups consecutive
 21 | #   utterances into groups based on length constraints.
 22 | #   There are two cases here:
 23 | 
 24 | #       a) With --respect-speaker-info true (the default),
 25 | #         it only splits within existing speakers.
 26 | #         This is suitable when you have existing speaker
 27 | #         info that's meaningful in some way, e.g. represents
 28 | #         individual recordings.
 29 | #      b) With --respect-speaker-info false,
 30 | #        it completely ignores the existing speaker information
 31 | #        and constructs new speaker identities based on
 32 | #        utterance names.  This is suitable in scenarios when
 33 | #        you have a one-to-one map between speakers and
 34 | #        utterances.
 35 | 
 36 | # begin configuration section
 37 | utts_per_spk_max=-1
 38 | seconds_per_spk_max=-1
 39 | respect_speaker_info=true
 40 | # end configuration section
 41 | 
 42 | . utils/parse_options.sh
 43 | 
 44 | if [ $# != 2 ]; then
 45 |   echo "Usage: "
 46 |   echo "  $0 [options] <srcdir> <destdir>"
 47 |   echo "e.g.:"
 48 |   echo " $0 --utts-per-spk-max 2 data/train data/train-max2"
 49 |   echo "Options"
 50 |   echo "   --utts-per-spk-max <n>  # number of utterances per speaker maximum,"
 51 |   echo "                           # default -1 (meaning no maximum).  E.g. 2."
 52 |   echo "   --seconds-per-spk-max <n> # number of seconds per speaker maximum,"
 53 |   echo "                             # default -1 (meaning no maximum).  E.g. 60."
 54 |   echo "   --respect-speaker-info <true|false>  # If true, respect the"
 55 |   echo "                                        # existing speaker map (i.e. do not"
 56 |   echo "                                        # assign utterances from different"
 57 |   echo "                                        # speakers to the same generated speaker)."
 58 |   echo "                                        # Default: true."
 59 |   echo "Note: one or both of the --utts-per-spk-max or --seconds-per-spk-max"
 60 |   echo "options is required."
 61 |   exit 1;
 62 | fi
 63 | 
 64 | export LC_ALL=C
 65 | 
 66 | srcdir=$1
 67 | destdir=$2
 68 | 
 69 | if [ "$destdir"  == "$srcdir" ]; then
 70 |   echo "$0: <srcdir> must be different from <destdir>."
 71 |   exit 1
 72 | fi
 73 | 
 74 | if [ "$seconds_per_spk_max" == "-1" ] && ! [ "$utts_per_spk_max" -gt 0 ]; then
 75 |   echo "$0: one or both of the --utts-per-spk-max or --seconds-per-spk-max options must be provided."
 76 | fi
 77 | 
 78 | if [ ! -f $srcdir/utt2spk ]; then
 79 |   echo "$0: no such file $srcdir/utt2spk"
 80 |   exit 1;
 81 | fi
 82 | 
 83 | set -e;
 84 | set -o pipefail
 85 | 
 86 | mkdir -p $destdir
 87 | 
 88 | if [ "$seconds_per_spk_max" != -1 ]; then
 89 |   # we need the utt2dur file.
 90 |   utils/data/get_utt2dur.sh $srcdir
 91 |   utt2dur_opt="--utt2dur=$srcdir/utt2dur"
 92 | else
 93 |   utt2dur_opt=
 94 | fi
 95 | 
 96 | utils/data/internal/modify_speaker_info.py \
 97 |    $utt2dur_opt --respect-speaker-info=$respect_speaker_info \
 98 |   --utts-per-spk-max=$utts_per_spk_max --seconds-per-spk-max=$seconds_per_spk_max \
 99 |   <$srcdir/utt2spk >$destdir/utt2spk
100 | 
101 | utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt
102 | 
103 | # This script won't create the new cmvn.scp, it should be recomputed.
104 | if [ -f $destdir/cmvn.scp ]; then
105 |   mkdir -p $destdir/.backup
106 |   mv $destdir/cmvn.scp $destdir/.backup
107 |   echo "$0: moving $destdir/cmvn.scp to $destdir/.backup/cmvn.scp"
108 | fi
109 | 
110 | # these things won't be affected by the change of speaker mapping.
111 | for f in feats.scp segments wav.scp reco2file_and_channel text stm glm ctm; do
112 |   [ -f $srcdir/$f ] && cp $srcdir/$f $destdir/
113 | done
114 | 
115 | 
116 | orig_num_spk=$(wc -l <$srcdir/spk2utt)
117 | new_num_spk=$(wc -l <$destdir/spk2utt)
118 | 
119 | echo "$0: copied data from $srcdir to $destdir, number of speakers changed from $orig_num_spk to $new_num_spk"
120 | opts=
121 | [ ! -f $srcdir/feats.scp ] && opts="--no-feats"
122 | [ ! -f $srcdir/text ] && opts="$opts --no-text"
123 | [ ! -f $srcdir/wav.scp ] && opts="$opts --no-wav"
124 | 
125 | utils/validate_data_dir.sh $opts $destdir
126 | 


--------------------------------------------------------------------------------
/utils/data/extend_segment_times.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from __future__ import print_function
  4 | import sys
  5 | import argparse
  6 | from collections import defaultdict
  7 | 
  8 | 
  9 | parser = argparse.ArgumentParser(description="""
 10 |  Usage: extend_segment_times.py [options] <input-segments >output-segments
 11 |  This program pads the times in a 'segments' file (e.g. data/train/segments)
 12 |  with specified left and right context (for cases where there was no
 13 |  silence padding in the original segments file)""")
 14 | 
 15 | parser.add_argument("--start-padding", type = float, default = 0.1,
 16 |                     help="Amount of padding, in seconds, for the start time of "
 17 |                     "each segment (start times <0 will be set to zero).")
 18 | parser.add_argument("--end-padding", type = float, default = 0.1,
 19 |                     help="Amount of padding, in seconds, for the end time of "
 20 |                     "each segment.")
 21 | parser.add_argument("--last-segment-end-padding", type = float, default = 0.1,
 22 |                     help="Amount of padding, in seconds, for the end time of "
 23 |                     "the last segment of each file (maximum allowed).")
 24 | parser.add_argument("--fix-overlapping-segments", type = str,
 25 |                     default = 'true', choices=['true', 'false'],
 26 |                     help="If true, prevent segments from overlapping as a result "
 27 |                     "of the padding (or that were already overlapping)")
 28 | args = parser.parse_args()
 29 | 
 30 | 
 31 | # the input file will be a sequence of lines which are each of the form:
 32 | # <utterance-id> <recording-id> <start-time> <end-time>
 33 | # e.g.
 34 | # utt-1 recording-1 0.62 5.40
 35 | # The output will be in the same format and in the same
 36 | # order, except wiht modified times.
 37 | 
 38 | # This variable maps from a recording-id to a listof the utterance
 39 | # indexes (as integer indexes into 'entries']
 40 | # that are part of that recording.
 41 | recording_to_utt_indexes = defaultdict(list)
 42 | 
 43 | # This is an array of the entries in the segments file, in the fomrat:
 44 | # (utterance-id as astring, recording-id as string,
 45 | #  start-time as float, end-time as float)
 46 | entries = []
 47 | 
 48 | 
 49 | while True:
 50 |     line = sys.stdin.readline()
 51 |     if line == '':
 52 |         break
 53 |     try:
 54 |         [ utt_id, recording_id, start_time, end_time ] = line.split()
 55 |         start_time = float(start_time)
 56 |         end_time = float(end_time)
 57 |     except:
 58 |         sys.exit("extend_segment_times.py: could not interpret line: " + line)
 59 |     if not end_time > start_time:
 60 |         print("extend_segment_times.py: bad segment (ignoring): " + line,
 61 |               file = sys.stderr)
 62 |     recording_to_utt_indexes[recording_id].append(len(entries))
 63 |     entries.append([utt_id, recording_id, start_time, end_time])
 64 | 
 65 | num_times_fixed = 0
 66 | 
 67 | for recording, utt_indexes in recording_to_utt_indexes.items():
 68 |     # this_entries is a list of lists, sorted on mid-time.
 69 |     # Notice: because lists are objects, when we change 'this_entries'
 70 |     # we change the underlying entries.
 71 |     this_entries = sorted([ entries[x] for x in utt_indexes ],
 72 |                           key = lambda x : 0.5 * (x[2] + x[3]))
 73 |     min_time = 0
 74 |     max_time = max([ x[3] for x in this_entries ]) + args.last_segment_end_padding
 75 |     start_padding = args.start_padding
 76 |     end_padding = args.end_padding
 77 |     for n in range(len(this_entries)):
 78 |         this_entries[n][2] = max(min_time, this_entries[n][2] - start_padding)
 79 |         this_entries[n][3] = min(max_time, this_entries[n][3] + end_padding)
 80 | 
 81 |     for n in range(len(this_entries) - 1):
 82 |         this_end_time = this_entries[n][3]
 83 |         next_start_time = this_entries[n+1][2]
 84 |         if this_end_time > next_start_time and args.fix_overlapping_segments == 'true':
 85 |             midpoint = 0.5 * (this_end_time + next_start_time)
 86 |             this_entries[n][3] = midpoint
 87 |             this_entries[n+1][2] = midpoint
 88 |             num_times_fixed += 1
 89 | 
 90 | 
 91 | # this prints a number with a certain number of digits after
 92 | # the point, while removing trailing zeros.
 93 | def FloatToString(f):
 94 |     num_digits = 6 # we want to print 6 digits after the zero
 95 |     g = f
 96 |     while abs(g) > 1.0:
 97 |         g *= 0.1
 98 |         num_digits += 1
 99 |     format_str = '%.{0}g'.format(num_digits)
100 |     return format_str % f
101 | 
102 | for entry in entries:
103 |     [ utt_id, recording_id, start_time, end_time ] = entry
104 |     if not start_time < end_time:
105 |         print("extend_segment_times.py: bad segment after processing (ignoring): " +
106 |               ' '.join(entry), file = sys.stderr)
107 |         continue
108 |     print(utt_id, recording_id, FloatToString(start_time), FloatToString(end_time))
109 | 
110 | 
111 | print("extend_segment_times.py: extended {0} segments; fixed {1} "
112 |       "overlapping segments".format(len(entries), num_times_fixed),
113 |       file = sys.stderr)
114 | 
115 | ## test:
116 | #  (echo utt1 reco1 0.2 6.2; echo utt2 reco1 6.3 9.8 )| extend_segment_times.py
117 | # and also try the above with the options --last-segment-end-padding=0.0 --fix-overlapping-segments=false
118 | 
119 | 


--------------------------------------------------------------------------------
/utils/data/normalize_data_range.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | # This script is intended to read and write scp files possibly containing indexes for
  4 | # sub-ranges of features, like
  5 | # foo-123  bar.ark:431423[78:89]
  6 | # meaning rows 78 through 89 of the matrix located at bar.ark:431423.
  7 | #
  8 | # Its purpose is to normalize lines which have ranges on top of ranges, like
  9 | #
 10 | # foo-123  bar.ark:431423[78:89][3:4]
 11 | #
 12 | # This program interprets the later [] expression as a sub-range of the matrix returned by the first []
 13 | # expression; in this case, we'd get
 14 | #
 15 | # foo-123  bar.ark:431423[81:82]
 16 | #
 17 | # Note that these ranges are based on zero-indexing, and have a 'first:last'
 18 | # interpretation, so the range [0:0] is a matrix with one row.  And also note
 19 | # that column ranges are permitted, after row ranges, and the row range may be
 20 | # empty, e.g.
 21 | 
 22 | # foo-123  bar.ark:431423[81:82,0:13]
 23 | # or
 24 | # foo-123  bar.ark:431423[81:82,0:13]
 25 | #
 26 | 
 27 | # This program reads from the standard input (or command-line file or files),
 28 | # and writes to the standard output.
 29 | 
 30 | 
 31 | # This function combines ranges, either row or column ranges.  start1 and end1
 32 | # are the first range, and start2 and end2 are interpreted as a sub-range of the
 33 | # first range.  It is acceptable for either start1 and end1, or start2 and end2, to
 34 | # be empty.
 35 | # This function returns the start and end of the range, as an array.
 36 | sub combine_ranges {
 37 |   ($row_or_column, $start1, $end1, $start2, $end2) = @_;
 38 | 
 39 |   if ($start1 eq "" && $end1 eq "") {
 40 |     return ($start2, $end2);
 41 |   } elsif ($start2 eq "" && $end2 eq "") {
 42 |     return ($start1, $end1);
 43 |   } else {
 44 |     # For now this script doesn't support the case of ranges like [20:], even
 45 |     # though they are supported at the C++ level.
 46 |     if ($start1 eq "" || $start2 eq "" || $end1 eq "" || $end2 == "") {
 47 |       chop $line;
 48 |       print("normalize_data_range.pl: could not make sense of line $line\n");
 49 |       exit(1)
 50 |     }
 51 |     if ($start1 + $end2 > $end1) {
 52 |       chop $line;
 53 |       print("normalize_data_range.pl: could not make sense of line $line " .
 54 |             "[second $row_or_column range too large vs first range, $start1 + $end2 > $end1]\n");
 55 |       exit(1);
 56 |     }
 57 |     return ($start2+$start1, $end2+$start1);
 58 |   }
 59 | }
 60 | 
 61 | 
 62 | while (<>) {
 63 |   $line = $_;
 64 |   # we only need to do something if we detect two of these ranges.
 65 |   # The following regexp matches strings of the form ...[foo][bar]
 66 |   # where foo and bar have no square brackets in them.
 67 |   if (m/\[([^][]*)\]\[([^][]*)\]\s*$/) {
 68 |     $before_range = $`;
 69 |     $first_range = $1;   # e.g. '0:500,20:21', or '0:500', or ',0:13'.
 70 |     $second_range = $2;  # has same general format as first_range.
 71 |     if ($_ =~ m/concat-feats /) {
 72 |       # sometimes in scp files, we use the command concat-feats to splice together
 73 |       # two feature matrices.  Handling this correctly is complicated and we don't
 74 |       # anticipate needing it, so we just refuse to process this type of data.
 75 |       print "normalize_data_range.pl: this script cannot [yet] normalize the data ranges " .
 76 |         "if concat-feats was in the input data\n";
 77 |       exit(1);
 78 |     }
 79 |     print STDERR "matched: $before_range $first_range $second_range\n";
 80 |     if ($first_range !~ m/^((\d*):(\d*)|)(,(\d*):(\d*)|)$/) {
 81 |       print STDERR "normalize_data_range.pl: could not make sense of input line $_";
 82 |       exit(1);
 83 |     }
 84 |     $row_start1 = $2;
 85 |     $row_end1 = $3;
 86 |     $col_start1 = $5;
 87 |     $col_end1 = $6;
 88 | 
 89 |     if ($second_range !~ m/^((\d*):(\d*)|)(,(\d*):(\d*)|)$/) {
 90 |       print STDERR "normalize_data_range.pl: could not make sense of input line $_";
 91 |       exit(1);
 92 |     }
 93 |     $row_start2 = $2;
 94 |     $row_end2 = $3;
 95 |     $col_start2 = $5;
 96 |     $col_end2 = $6;
 97 | 
 98 |     ($row_start, $row_end) = combine_ranges("row", $row_start1, $row_end1, $row_start2, $row_end2);
 99 |     ($col_start, $col_end) = combine_ranges("column", $col_start1, $col_end1, $col_start2, $col_end2);
100 | 
101 | 
102 |     if ($row_start ne "") {
103 |       $range = "$row_start:$row_end";
104 |     } else {
105 |       $range = "";
106 |     }
107 |     if ($col_start ne "") {
108 |       $range .= ",$col_start:$col_end";
109 |     }
110 |     print $before_range . "[" . $range . "]\n";
111 |   } else {
112 |     print;
113 |   }
114 | }
115 | 
116 | __END__
117 | 
118 | # Testing
119 | # echo foo |  utils/data/normalize_data_range.pl -> foo
120 | # echo 'foo[bar:baz]' |  utils/data/normalize_data_range.pl -> foo[bar:baz]
121 | # echo 'foo[bar:baz][bin:bang]' |  utils/data/normalize_data_range.pl -> normalize_data_range.pl: could not make sense of input line foo[bar:baz][bin:bang]
122 | # echo 'foo[10:20][0:5]' |  utils/data/normalize_data_range.pl -> foo[10:15]
123 | # echo 'foo[,10:20][,0:5]' |  utils/data/normalize_data_range.pl -> foo[,10:15]
124 | # echo 'foo[,0:100][1:15]' |  utils/data/normalize_data_range.pl -> foo[1:15,0:100]
125 | # echo 'foo[1:15][,0:100]' |  utils/data/normalize_data_range.pl -> foo[1:15,0:100]
126 | # echo 'foo[10:20][0:11]' |  utils/data/normalize_data_range.pl -> normalize_data_range.pl: could not make sense of line foo[10:20][0:11] [second row range too large vs first range, 10 + 11 > 20]
127 | # echo 'foo[,10:20][,0:11]' |  utils/data/normalize_data_range.pl -> normalize_data_range.pl: could not make sense of line foo[,10:20][,0:11] [second column range too large vs first range, 10 + 11 > 20]
128 | 


--------------------------------------------------------------------------------