├── run.result ├── conf └── mfcc.conf ├── LICENSE ├── README.md ├── run.sh └── local └── make_voxceleb1_sv.pl /run.result: -------------------------------------------------------------------------------- 1 | CDS eer : 15.39 2 | LDA+CDS eer : 8.103 3 | PLDA eer : 5.446 4 | -------------------------------------------------------------------------------- /conf/mfcc.conf: -------------------------------------------------------------------------------- 1 | --sample-frequency=16000 2 | --frame-length=25 # the default is 25 3 | --low-freq=20 # the default. 4 | --high-freq=6955 # the default is zero meaning use the Nyquist (4k in this case). 5 | --num-ceps=20 # higher than the default which is 12. 6 | --snip-edges=false 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Suwon Shon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Speaker Verification task in Voxceleb1 dataset 3 | This repository contains simple scripts for a training i-vector speaker recognition system on Voxceleb1[1] dataset using Kaldi. It was modified based on run.sh file on Kaldi/egs/sre10. 4 | 5 | # Requirement 6 | * Kaldi Toolkit 7 | 8 | # How to use 9 | 1. Move all files to {kaldi_root}/egs/sre10 folder 10 | 2. Modify dataset directories and parameters in run.sh file to fit in your machine. 11 | 3. Run run.sh file 12 | 13 | # Result 14 | 15 | The 2048 component GMM-UBM and 600-dimensional i-vector extractor were trained using voxceleb1 training data for verification task. Training parameter is almost same compared to sre10 baseline on Kaldi egs. 16 | 17 | GMM-2048 CDS eer : 15.39%
18 | GMM-2048 LDA+CDS eer : 8.103%
19 | GMM-2048 PLDA eer : 5.446%
20 | 21 | # Note 22 | The Voxceleb1 dataset, a large-scale speaker identification dataset was published in 2017 with speaker embedding baseline[1] and reported i-vector shows 8.8% EER. The i-vector was extracted using 1024 component GMM-UBM, so the EER is fairly worse compared to the result above. 23 | 24 | 25 | # Reference 26 | [1] A. Nagraniy, J. S. Chung, and A. Zisserman, “VoxCeleb: A large-scale speaker identification dataset,” in Interspeech, 2017, pp. 2616–2620. 27 | 28 | * CSV file in data folder created from here 29 | (https://github.com/pyannote/pyannote-db-voxceleb/blob/master/scripts/prepare_data.ipynb) 30 | 31 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Written by Suwon Shon, 2018 4 | # swshon@mit.edu 5 | 6 | stage=0 7 | 8 | . cmd.sh 9 | . path.sh 10 | set -e 11 | mfccdir=`pwd`/mfcc 12 | vaddir=`pwd`/mfcc 13 | trials=data/voxceleb1_trials/voxceleb1_trials_sv 14 | num_components=2048 # Larger than this doesn't make much of a difference. 15 | 16 | 17 | if [ $stage -le 0 ]; then 18 | # Preparing dataset folder voxceleb1. The voxceleb1 folder should have subdir voxceleb1_wav which contain wav files. 19 | ./local/make_voxceleb1_sv.pl /data/sls/scratch/swshon/dataset/voxceleb1/ data 20 | 21 | # Extract speaker recogntion features. 22 | steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 100 --cmd "$train_cmd" \ 23 | data/voxceleb1_train exp/make_mfcc $mfccdir 24 | steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 100 --cmd "$train_cmd" \ 25 | data/voxceleb1_test exp/make_mfcc $mfccdir 26 | steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 100 --cmd "$train_cmd" \ 27 | data/voxceleb1_test_1utt exp/make_mfcc $mfccdir 28 | 29 | for name in voxceleb1_train voxceleb1_test voxceleb1_test_1utt; do 30 | utils/fix_data_dir.sh data/${name} 31 | done 32 | 33 | fi 34 | 35 | if [ $stage -le 1 ]; then 36 | 37 | # VAD decision 38 | sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \ 39 | data/voxceleb1_train exp/make_vad $vaddir 40 | sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \ 41 | data/voxceleb1_test exp/make_vad $vaddir 42 | sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \ 43 | data/voxceleb1_test_1utt exp/make_vad $vaddir 44 | 45 | for name in voxceleb1_train voxceleb1_test voxceleb1_test_1utt; do 46 | utils/fix_data_dir.sh data/${name} 47 | done 48 | 49 | fi 50 | 51 | 52 | 53 | if [ $stage -le 2 ]; then 54 | 55 | # Train UBM and i-vector extractor. 56 | sid/train_diag_ubm.sh --nj 40 --num-threads 8 --cmd "$train_cmd --mem 20G"\ 57 | data/voxceleb1_train $num_components \ 58 | exp/diag_ubm_$num_components 59 | 60 | sid/train_full_ubm.sh --nj 40 --remove-low-count-gaussians false \ 61 | --cmd "$train_cmd --mem 25G" data/voxceleb1_train \ 62 | exp/diag_ubm_$num_components exp/full_ubm_$num_components 63 | 64 | sid/train_ivector_extractor.sh --num-threads 7 --nj 20 --num_processes 2 --cmd "$train_cmd --mem 16G" \ 65 | --ivector-dim 600 \ 66 | --num-iters 5 exp/full_ubm_$num_components/final.ubm data/voxceleb1_train \ 67 | exp/extractor 68 | 69 | fi 70 | 71 | if [ $stage -le 3 ]; then 72 | 73 | # Extract i-vectors. 74 | sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G " --nj 300 \ 75 | exp/extractor data/voxceleb1_train \ 76 | exp/ivectors_voxceleb1_train 77 | 78 | sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G " --nj 40 \ 79 | exp/extractor data/voxceleb1_test \ 80 | exp/ivectors_voxceleb1_test 81 | 82 | sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G " --nj 100 \ 83 | exp/extractor data/voxceleb1_test_1utt \ 84 | exp/ivectors_voxceleb1_test_1utt 85 | 86 | fi 87 | 88 | 89 | 90 | if [ $stage -le 4 ]; then 91 | 92 | # cosine distance scoring 93 | local/cosine_scoring.sh data/voxceleb1_test_1utt data/voxceleb1_test_1utt \ 94 | exp/ivectors_voxceleb1_test_1utt exp/ivectors_voxceleb1_test_1utt $trials local/scores_voxceleb1 95 | 96 | eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_voxceleb1/cosine_scores) 2> /dev/null` 97 | echo "CDS eer : $eer" 98 | 99 | # LDA+cosine distance scoring 100 | local/lda_scoring.sh data/voxceleb1_train data/voxceleb1_test_1utt data/voxceleb1_test_1utt \ 101 | exp/ivectors_voxceleb1_train exp/ivectors_voxceleb1_test_1utt exp/ivectors_voxceleb1_test_1utt $trials \ 102 | local/scores_voxceleb1 103 | 104 | eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_voxceleb1/lda_scores) 2> /dev/null` 105 | echo "LDA+CDS eer : $eer" 106 | 107 | # PLDA scoring 108 | ivector-mean scp:exp/ivectors_voxceleb1_train/ivector.scp exp/ivectors_voxceleb1_train/mean.vec 109 | local/plda_scoring.sh data/voxceleb1_train data/voxceleb1_test_1utt data/voxceleb1_test_1utt \ 110 | exp/ivectors_voxceleb1_train exp/ivectors_voxceleb1_test_1utt exp/ivectors_voxceleb1_test_1utt \ 111 | $trials local/scores_voxceleb1 112 | 113 | eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_voxceleb1/plda_scores) 2> /dev/null` 114 | echo "PLDA eer : $eer" 115 | 116 | fi 117 | 118 | 119 | #GMM-2048 CDS eer : 15.39 120 | #GMM-2048 LDA+CDS eer : 8.103 121 | #GMM-2048 PLDA eer : 5.446 122 | 123 | 124 | -------------------------------------------------------------------------------- /local/make_voxceleb1_sv.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # 3 | # Copyright 2018 Suwon Shon 4 | # Usage: make_voxceleb1_sv.pl /voxceleb1/ data/. 5 | 6 | if (@ARGV != 2) { 7 | print STDERR "Usage: $0 \n"; 8 | print STDERR "e.g. $0 /voxceleb1/ data\n"; 9 | exit(1); 10 | } 11 | ($db_base, $out_base_dir) = @ARGV; 12 | 13 | $out_dir = "$out_base_dir/voxceleb1_trials"; 14 | 15 | $tmp_dir = "$out_dir"; 16 | if (system("mkdir -p $tmp_dir") != 0) { 17 | die "Error making directory $tmp_dir"; 18 | } 19 | 20 | open(IN_TRIALS, "<", "$db_base/voxceleb1.verification.test.csv") or die "cannot open trials list"; 21 | open(OUT_TRIALS,">", "$out_dir/voxceleb1_trials_sv") or die "Could not open the output file $out_dir/voxceleb1_trials_sv"; 22 | $dummy = ; 23 | while() { 24 | chomp; 25 | ($is_target,$enrollment,$test) = split(",", $_); 26 | $target='nontarget'; 27 | if ($is_target eq 1) { 28 | $target='target'; 29 | } 30 | print OUT_TRIALS "$enrollment $test $target\n"; 31 | 32 | } 33 | close(IN_TRIALS) || die; 34 | close(OUT_TRIALS) || die; 35 | 36 | 37 | 38 | 39 | 40 | $out_dir = "$out_base_dir/voxceleb1_train"; 41 | 42 | $tmp_dir = "$out_dir/tmp"; 43 | if (system("mkdir -p $tmp_dir") != 0) { 44 | die "Error making directory $tmp_dir"; 45 | } 46 | 47 | open(IN_TRIALS, "<", "$db_base/voxceleb1.csv") or die "cannot open trials list"; 48 | open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender"; 49 | open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk"; 50 | open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp"; 51 | 52 | while() { 53 | chomp; 54 | ($filename,$utt,$start,$end,$spkr,$is_sv,$is_sid) = split(",", $_); 55 | if ($is_sv eq 'dev') { 56 | print WAV "$filename"," ${db_base}voxceleb1_wav/${filename}.wav\n"; 57 | print SPKR "$filename $spkr\n"; 58 | print GNDR "$spkr m\n"; 59 | } 60 | } 61 | 62 | close(IN_TRIALS) || die; 63 | close(GNDR) || die; 64 | close(SPKR) || die; 65 | close(WAV) || die; 66 | 67 | 68 | if (system( 69 | "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { 70 | die "Error creating spk2utt file in directory $out_dir"; 71 | } 72 | system("utils/fix_data_dir.sh $out_dir"); 73 | if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { 74 | die "Error validating directory $out_dir"; 75 | } 76 | 77 | 78 | $out_dir = "$out_base_dir/voxceleb1_test"; 79 | 80 | $tmp_dir = "$out_dir/tmp"; 81 | if (system("mkdir -p $tmp_dir") != 0) { 82 | die "Error making directory $tmp_dir"; 83 | } 84 | 85 | open(IN_TRIALS, "<", "$db_base/voxceleb1.csv") or die "cannot open trials list"; 86 | open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender"; 87 | open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk"; 88 | open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp"; 89 | 90 | while() { 91 | chomp; 92 | ($filename,$utt,$start,$end,$spkr,$is_sv,$is_sid) = split(",", $_); 93 | if ($is_sv eq 'tst') { 94 | print WAV "$filename"," ${db_base}voxceleb1_wav/${filename}.wav\n"; 95 | print SPKR "$filename $spkr\n"; 96 | print GNDR "$spkr m\n"; 97 | } 98 | } 99 | 100 | close(IN_TRIALS) || die; 101 | close(GNDR) || die; 102 | close(SPKR) || die; 103 | close(WAV) || die; 104 | 105 | 106 | if (system( 107 | "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { 108 | die "Error creating spk2utt file in directory $out_dir"; 109 | } 110 | system("utils/fix_data_dir.sh $out_dir"); 111 | if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { 112 | die "Error validating directory $out_dir"; 113 | } 114 | 115 | 116 | 117 | 118 | $out_dir = "$out_base_dir/voxceleb1_test_1utt"; 119 | 120 | $tmp_dir = "$out_dir/tmp"; 121 | if (system("mkdir -p $tmp_dir") != 0) { 122 | die "Error making directory $tmp_dir"; 123 | } 124 | 125 | open(IN_TRIALS, "<", "$db_base/voxceleb1.csv") or die "cannot open trials list"; 126 | open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender"; 127 | open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk"; 128 | open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp"; 129 | 130 | while() { 131 | chomp; 132 | ($filename,$utt,$start,$end,$spkr,$is_sv,$is_sid) = split(",", $_); 133 | if ($is_sv eq 'tst') { 134 | print WAV "$filename"," ${db_base}voxceleb1_wav/${filename}.wav\n"; 135 | print SPKR "$filename $filename\n"; 136 | print GNDR "$filename m\n"; 137 | } 138 | } 139 | 140 | close(IN_TRIALS) || die; 141 | close(GNDR) || die; 142 | close(SPKR) || die; 143 | close(WAV) || die; 144 | 145 | 146 | if (system( 147 | "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { 148 | die "Error creating spk2utt file in directory $out_dir"; 149 | } 150 | system("utils/fix_data_dir.sh $out_dir"); 151 | if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { 152 | die "Error validating directory $out_dir"; 153 | } 154 | --------------------------------------------------------------------------------