├── run.result
├── conf
└── mfcc.conf
├── LICENSE
├── README.md
├── run.sh
└── local
└── make_voxceleb1_sv.pl
/run.result:
--------------------------------------------------------------------------------
1 | CDS eer : 15.39
2 | LDA+CDS eer : 8.103
3 | PLDA eer : 5.446
4 |
--------------------------------------------------------------------------------
/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 | --sample-frequency=16000
2 | --frame-length=25 # the default is 25
3 | --low-freq=20 # the default.
4 | --high-freq=6955 # the default is zero meaning use the Nyquist (4k in this case).
5 | --num-ceps=20 # higher than the default which is 12.
6 | --snip-edges=false
7 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Suwon Shon
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Speaker Verification task in Voxceleb1 dataset
3 | This repository contains simple scripts for a training i-vector speaker recognition system on Voxceleb1[1] dataset using Kaldi. It was modified based on run.sh file on Kaldi/egs/sre10.
4 |
5 | # Requirement
6 | * Kaldi Toolkit
7 |
8 | # How to use
9 | 1. Move all files to {kaldi_root}/egs/sre10 folder
10 | 2. Modify dataset directories and parameters in run.sh file to fit in your machine.
11 | 3. Run run.sh file
12 |
13 | # Result
14 |
15 | The 2048 component GMM-UBM and 600-dimensional i-vector extractor were trained using voxceleb1 training data for verification task. Training parameter is almost same compared to sre10 baseline on Kaldi egs.
16 |
17 | GMM-2048 CDS eer : 15.39%
18 | GMM-2048 LDA+CDS eer : 8.103%
19 | GMM-2048 PLDA eer : 5.446%
20 |
21 | # Note
22 | The Voxceleb1 dataset, a large-scale speaker identification dataset was published in 2017 with speaker embedding baseline[1] and reported i-vector shows 8.8% EER. The i-vector was extracted using 1024 component GMM-UBM, so the EER is fairly worse compared to the result above.
23 |
24 |
25 | # Reference
26 | [1] A. Nagraniy, J. S. Chung, and A. Zisserman, “VoxCeleb: A large-scale speaker identification dataset,” in Interspeech, 2017, pp. 2616–2620.
27 |
28 | * CSV file in data folder created from here
29 | (https://github.com/pyannote/pyannote-db-voxceleb/blob/master/scripts/prepare_data.ipynb)
30 |
31 |
--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Written by Suwon Shon, 2018
4 | # swshon@mit.edu
5 |
6 | stage=0
7 |
8 | . cmd.sh
9 | . path.sh
10 | set -e
11 | mfccdir=`pwd`/mfcc
12 | vaddir=`pwd`/mfcc
13 | trials=data/voxceleb1_trials/voxceleb1_trials_sv
14 | num_components=2048 # Larger than this doesn't make much of a difference.
15 |
16 |
17 | if [ $stage -le 0 ]; then
18 | # Preparing dataset folder voxceleb1. The voxceleb1 folder should have subdir voxceleb1_wav which contain wav files.
19 | ./local/make_voxceleb1_sv.pl /data/sls/scratch/swshon/dataset/voxceleb1/ data
20 |
21 | # Extract speaker recogntion features.
22 | steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 100 --cmd "$train_cmd" \
23 | data/voxceleb1_train exp/make_mfcc $mfccdir
24 | steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 100 --cmd "$train_cmd" \
25 | data/voxceleb1_test exp/make_mfcc $mfccdir
26 | steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 100 --cmd "$train_cmd" \
27 | data/voxceleb1_test_1utt exp/make_mfcc $mfccdir
28 |
29 | for name in voxceleb1_train voxceleb1_test voxceleb1_test_1utt; do
30 | utils/fix_data_dir.sh data/${name}
31 | done
32 |
33 | fi
34 |
35 | if [ $stage -le 1 ]; then
36 |
37 | # VAD decision
38 | sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \
39 | data/voxceleb1_train exp/make_vad $vaddir
40 | sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \
41 | data/voxceleb1_test exp/make_vad $vaddir
42 | sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \
43 | data/voxceleb1_test_1utt exp/make_vad $vaddir
44 |
45 | for name in voxceleb1_train voxceleb1_test voxceleb1_test_1utt; do
46 | utils/fix_data_dir.sh data/${name}
47 | done
48 |
49 | fi
50 |
51 |
52 |
53 | if [ $stage -le 2 ]; then
54 |
55 | # Train UBM and i-vector extractor.
56 | sid/train_diag_ubm.sh --nj 40 --num-threads 8 --cmd "$train_cmd --mem 20G"\
57 | data/voxceleb1_train $num_components \
58 | exp/diag_ubm_$num_components
59 |
60 | sid/train_full_ubm.sh --nj 40 --remove-low-count-gaussians false \
61 | --cmd "$train_cmd --mem 25G" data/voxceleb1_train \
62 | exp/diag_ubm_$num_components exp/full_ubm_$num_components
63 |
64 | sid/train_ivector_extractor.sh --num-threads 7 --nj 20 --num_processes 2 --cmd "$train_cmd --mem 16G" \
65 | --ivector-dim 600 \
66 | --num-iters 5 exp/full_ubm_$num_components/final.ubm data/voxceleb1_train \
67 | exp/extractor
68 |
69 | fi
70 |
71 | if [ $stage -le 3 ]; then
72 |
73 | # Extract i-vectors.
74 | sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G " --nj 300 \
75 | exp/extractor data/voxceleb1_train \
76 | exp/ivectors_voxceleb1_train
77 |
78 | sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G " --nj 40 \
79 | exp/extractor data/voxceleb1_test \
80 | exp/ivectors_voxceleb1_test
81 |
82 | sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G " --nj 100 \
83 | exp/extractor data/voxceleb1_test_1utt \
84 | exp/ivectors_voxceleb1_test_1utt
85 |
86 | fi
87 |
88 |
89 |
90 | if [ $stage -le 4 ]; then
91 |
92 | # cosine distance scoring
93 | local/cosine_scoring.sh data/voxceleb1_test_1utt data/voxceleb1_test_1utt \
94 | exp/ivectors_voxceleb1_test_1utt exp/ivectors_voxceleb1_test_1utt $trials local/scores_voxceleb1
95 |
96 | eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_voxceleb1/cosine_scores) 2> /dev/null`
97 | echo "CDS eer : $eer"
98 |
99 | # LDA+cosine distance scoring
100 | local/lda_scoring.sh data/voxceleb1_train data/voxceleb1_test_1utt data/voxceleb1_test_1utt \
101 | exp/ivectors_voxceleb1_train exp/ivectors_voxceleb1_test_1utt exp/ivectors_voxceleb1_test_1utt $trials \
102 | local/scores_voxceleb1
103 |
104 | eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_voxceleb1/lda_scores) 2> /dev/null`
105 | echo "LDA+CDS eer : $eer"
106 |
107 | # PLDA scoring
108 | ivector-mean scp:exp/ivectors_voxceleb1_train/ivector.scp exp/ivectors_voxceleb1_train/mean.vec
109 | local/plda_scoring.sh data/voxceleb1_train data/voxceleb1_test_1utt data/voxceleb1_test_1utt \
110 | exp/ivectors_voxceleb1_train exp/ivectors_voxceleb1_test_1utt exp/ivectors_voxceleb1_test_1utt \
111 | $trials local/scores_voxceleb1
112 |
113 | eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_voxceleb1/plda_scores) 2> /dev/null`
114 | echo "PLDA eer : $eer"
115 |
116 | fi
117 |
118 |
119 | #GMM-2048 CDS eer : 15.39
120 | #GMM-2048 LDA+CDS eer : 8.103
121 | #GMM-2048 PLDA eer : 5.446
122 |
123 |
124 |
--------------------------------------------------------------------------------
/local/make_voxceleb1_sv.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 | #
3 | # Copyright 2018 Suwon Shon
4 | # Usage: make_voxceleb1_sv.pl /voxceleb1/ data/.
5 |
6 | if (@ARGV != 2) {
7 | print STDERR "Usage: $0 \n";
8 | print STDERR "e.g. $0 /voxceleb1/ data\n";
9 | exit(1);
10 | }
11 | ($db_base, $out_base_dir) = @ARGV;
12 |
13 | $out_dir = "$out_base_dir/voxceleb1_trials";
14 |
15 | $tmp_dir = "$out_dir";
16 | if (system("mkdir -p $tmp_dir") != 0) {
17 | die "Error making directory $tmp_dir";
18 | }
19 |
20 | open(IN_TRIALS, "<", "$db_base/voxceleb1.verification.test.csv") or die "cannot open trials list";
21 | open(OUT_TRIALS,">", "$out_dir/voxceleb1_trials_sv") or die "Could not open the output file $out_dir/voxceleb1_trials_sv";
22 | $dummy = ;
23 | while() {
24 | chomp;
25 | ($is_target,$enrollment,$test) = split(",", $_);
26 | $target='nontarget';
27 | if ($is_target eq 1) {
28 | $target='target';
29 | }
30 | print OUT_TRIALS "$enrollment $test $target\n";
31 |
32 | }
33 | close(IN_TRIALS) || die;
34 | close(OUT_TRIALS) || die;
35 |
36 |
37 |
38 |
39 |
40 | $out_dir = "$out_base_dir/voxceleb1_train";
41 |
42 | $tmp_dir = "$out_dir/tmp";
43 | if (system("mkdir -p $tmp_dir") != 0) {
44 | die "Error making directory $tmp_dir";
45 | }
46 |
47 | open(IN_TRIALS, "<", "$db_base/voxceleb1.csv") or die "cannot open trials list";
48 | open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
49 | open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
50 | open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
51 |
52 | while() {
53 | chomp;
54 | ($filename,$utt,$start,$end,$spkr,$is_sv,$is_sid) = split(",", $_);
55 | if ($is_sv eq 'dev') {
56 | print WAV "$filename"," ${db_base}voxceleb1_wav/${filename}.wav\n";
57 | print SPKR "$filename $spkr\n";
58 | print GNDR "$spkr m\n";
59 | }
60 | }
61 |
62 | close(IN_TRIALS) || die;
63 | close(GNDR) || die;
64 | close(SPKR) || die;
65 | close(WAV) || die;
66 |
67 |
68 | if (system(
69 | "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
70 | die "Error creating spk2utt file in directory $out_dir";
71 | }
72 | system("utils/fix_data_dir.sh $out_dir");
73 | if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
74 | die "Error validating directory $out_dir";
75 | }
76 |
77 |
78 | $out_dir = "$out_base_dir/voxceleb1_test";
79 |
80 | $tmp_dir = "$out_dir/tmp";
81 | if (system("mkdir -p $tmp_dir") != 0) {
82 | die "Error making directory $tmp_dir";
83 | }
84 |
85 | open(IN_TRIALS, "<", "$db_base/voxceleb1.csv") or die "cannot open trials list";
86 | open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
87 | open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
88 | open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
89 |
90 | while() {
91 | chomp;
92 | ($filename,$utt,$start,$end,$spkr,$is_sv,$is_sid) = split(",", $_);
93 | if ($is_sv eq 'tst') {
94 | print WAV "$filename"," ${db_base}voxceleb1_wav/${filename}.wav\n";
95 | print SPKR "$filename $spkr\n";
96 | print GNDR "$spkr m\n";
97 | }
98 | }
99 |
100 | close(IN_TRIALS) || die;
101 | close(GNDR) || die;
102 | close(SPKR) || die;
103 | close(WAV) || die;
104 |
105 |
106 | if (system(
107 | "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
108 | die "Error creating spk2utt file in directory $out_dir";
109 | }
110 | system("utils/fix_data_dir.sh $out_dir");
111 | if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
112 | die "Error validating directory $out_dir";
113 | }
114 |
115 |
116 |
117 |
118 | $out_dir = "$out_base_dir/voxceleb1_test_1utt";
119 |
120 | $tmp_dir = "$out_dir/tmp";
121 | if (system("mkdir -p $tmp_dir") != 0) {
122 | die "Error making directory $tmp_dir";
123 | }
124 |
125 | open(IN_TRIALS, "<", "$db_base/voxceleb1.csv") or die "cannot open trials list";
126 | open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
127 | open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
128 | open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
129 |
130 | while() {
131 | chomp;
132 | ($filename,$utt,$start,$end,$spkr,$is_sv,$is_sid) = split(",", $_);
133 | if ($is_sv eq 'tst') {
134 | print WAV "$filename"," ${db_base}voxceleb1_wav/${filename}.wav\n";
135 | print SPKR "$filename $filename\n";
136 | print GNDR "$filename m\n";
137 | }
138 | }
139 |
140 | close(IN_TRIALS) || die;
141 | close(GNDR) || die;
142 | close(SPKR) || die;
143 | close(WAV) || die;
144 |
145 |
146 | if (system(
147 | "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
148 | die "Error creating spk2utt file in directory $out_dir";
149 | }
150 | system("utils/fix_data_dir.sh $out_dir");
151 | if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
152 | die "Error validating directory $out_dir";
153 | }
154 |
--------------------------------------------------------------------------------