├── baseline ├── conf │ ├── pitch.conf │ ├── mspec.conf │ ├── online_cmvn.conf │ ├── vad.conf │ ├── mfcc.conf │ └── mfcc_hires.conf ├── sid ├── steps ├── utils ├── local │ ├── featex │ │ ├── f0_yaapt │ │ │ ├── amfm_decompy │ │ │ │ ├── __init__.py │ │ │ │ ├── sample.wav │ │ │ │ ├── LICENSE.txt │ │ │ │ └── basic_tools.py │ │ │ ├── 00_batch.sh │ │ │ └── get_f0.py │ │ ├── make_pitch_yaapt.sh │ │ ├── 02_extract_pitch.sh │ │ ├── create_ppg_data.py │ │ ├── create_melspec_data.py │ │ ├── compute_ivect.sh │ │ ├── compute_hires.sh │ │ ├── extract_melspec.sh │ │ ├── extract_bn_nat.sh │ │ ├── 01_extract_xvectors.sh │ │ ├── 00_make_am_nsf_data.sh │ │ ├── split_test_data.py │ │ ├── compute_xvect.sh │ │ ├── create_xvector_f0_data.py │ │ ├── extract_ppg.sh │ │ ├── extract_bn.sh │ │ ├── check_pitch_feats.py │ │ ├── nnet3_compute.sh │ │ ├── 03_make_am_nsf_netcdf_data.sh │ │ └── split_am_nsf_data.py │ ├── chain │ │ ├── run_cnn_tdnn.sh │ │ ├── run_chain_common.sh │ │ └── compare_wer.sh │ ├── vc │ │ ├── nsf │ │ │ ├── 00_run.sh │ │ │ ├── init.sh │ │ │ └── 01_gen.sh │ │ └── am │ │ │ ├── 00_run.sh │ │ │ ├── init.sh │ │ │ └── 01_gen.sh │ ├── download_models.sh │ ├── prepare_for_eer.py │ ├── similarity_matrices │ │ ├── compute_DeID.py │ │ ├── compute_Gvd.py │ │ ├── scores_calibration.py │ │ ├── create_trial.py │ │ ├── compute_similarity_matrix.py │ │ └── compute_similarity_matrices_metrics.sh │ ├── make_eval2.sh │ ├── anon │ │ ├── make_netcdf.sh │ │ ├── compute_spk_pool_affinity.sh │ │ ├── compute_spk_pool_cosine.py │ │ ├── make_pseudospeaker.sh │ │ ├── anonymize_data_dir.sh │ │ ├── anonymise_dir_mcadams.py │ │ └── gen_pseudo_xvecs.py │ ├── download_data.sh │ ├── train_model_ss_am.sh │ ├── train_model_nsf.sh │ ├── plot │ │ ├── plot_spk_dur.py │ │ ├── plot_spk_xvectors.py │ │ └── plot_spk_xvectors_voxceleb.py │ ├── scoring │ │ └── linkability │ │ │ └── compute_linkability.py │ ├── asr_eval.sh │ ├── score.sh │ ├── download_and_untar.sh │ ├── run_cleanup_segmentation.sh │ ├── data_prep_libritts.sh │ ├── data_prep_adv.sh │ ├── run_prepfeats_am_nsf.sh │ ├── create_uniform_segments.py │ ├── asv_eval.sh │ ├── nnet3 │ │ ├── tuning │ │ │ ├── run_tdnn_1a.sh │ │ │ └── run_tdnn_1b.sh │ │ └── run_tdnn.sh │ ├── fix_eval2.py │ └── split_long_utterance.sh ├── local_librispeech ├── fig │ ├── data_dir.jpg │ └── baseline_git.jpg ├── path.sh ├── cmd.sh ├── run_asr_eval_train.sh ├── run_xvector.sh ├── run_asv_eval_train.sh ├── RESULTS_mcadams ├── RESULTS_baseline ├── RESULTS_baseline_cosine └── cleanup.sh ├── requirements.txt ├── .gitignore ├── .gitmodules ├── nii_cmake └── CMakeLists.txt └── install.sh /baseline/conf/pitch.conf: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baseline/sid: -------------------------------------------------------------------------------- 1 | ../kaldi/egs/sre08/v1/sid -------------------------------------------------------------------------------- /baseline/steps: -------------------------------------------------------------------------------- 1 | ../kaldi/egs/wsj/s5/steps -------------------------------------------------------------------------------- /baseline/utils: -------------------------------------------------------------------------------- 1 | ../kaldi/egs/wsj/s5/utils -------------------------------------------------------------------------------- /baseline/local/featex/f0_yaapt/amfm_decompy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baseline/local_librispeech: -------------------------------------------------------------------------------- 1 | ../kaldi/egs/librispeech/s5/local -------------------------------------------------------------------------------- /baseline/local/chain/run_cnn_tdnn.sh: -------------------------------------------------------------------------------- 1 | tuning/run_cnn_tdnn_1a.sh 2 | -------------------------------------------------------------------------------- /baseline/conf/mspec.conf: -------------------------------------------------------------------------------- 1 | --num-mel-bins=80 2 | --allow-downsample=true 3 | -------------------------------------------------------------------------------- /baseline/conf/online_cmvn.conf: -------------------------------------------------------------------------------- 1 | # configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh 2 | -------------------------------------------------------------------------------- /baseline/fig/data_dir.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Voice-Privacy-Challenge/Voice-Privacy-Challenge-2020/HEAD/baseline/fig/data_dir.jpg -------------------------------------------------------------------------------- /baseline/conf/vad.conf: -------------------------------------------------------------------------------- 1 | --vad-energy-threshold=5.5 2 | --vad-energy-mean-scale=0.5 3 | --vad-proportion-threshold=0.12 4 | --vad-frames-context=2 5 | -------------------------------------------------------------------------------- /baseline/fig/baseline_git.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Voice-Privacy-Challenge/Voice-Privacy-Challenge-2020/HEAD/baseline/fig/baseline_git.jpg -------------------------------------------------------------------------------- /baseline/local/featex/f0_yaapt/amfm_decompy/sample.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Voice-Privacy-Challenge/Voice-Privacy-Challenge-2020/HEAD/baseline/local/featex/f0_yaapt/amfm_decompy/sample.wav -------------------------------------------------------------------------------- /baseline/conf/mfcc.conf: -------------------------------------------------------------------------------- 1 | --sample-frequency=16000 2 | --frame-length=25 # the default is 25 3 | --low-freq=20 # the default. 4 | --high-freq=7600 # the default is zero meaning use the Nyquist (8k in this case). 5 | --num-mel-bins=30 6 | --num-ceps=30 7 | --snip-edges=false 8 | --allow-downsample=true 9 | -------------------------------------------------------------------------------- /baseline/local/vc/nsf/00_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | . path.sh 4 | . local/vc/nsf/init.sh 5 | 6 | export AM_NSF_FEAT_OUT="$1" 7 | 8 | proj_dir=${nii_scripts}/waveform-modeling/project-NSF 9 | 10 | # preparing data 11 | python ${proj_dir}/../SCRIPTS/00_prepare_data.py config_libri_nsf || exit 1 12 | 13 | # model training 14 | python ${proj_dir}/../SCRIPTS/01_train_network.py config_libri_nsf || exit 1 15 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | backports.functools-lru-cache==1.6.1 2 | cycler==0.10.0 3 | Cython==0.29.14 4 | kaldiio==2.15.1 5 | kiwisolver==1.1.0 6 | matplotlib==3.3.0 7 | numba==0.48 8 | numpy==1.16.5 9 | pandas==1.0.1 10 | pyparsing==2.4.6 11 | python-dateutil==2.8.1 12 | pytz==2019.3 13 | scipy==1.5.2 14 | seaborn==0.10.1 15 | six==1.13.0 16 | subprocess32==3.5.4 17 | librosa==0.7.1 18 | tabulate 19 | tikzplotlib 20 | -------------------------------------------------------------------------------- /baseline/local/vc/am/00_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | . path.sh 4 | . local/vc/am/init.sh 5 | 6 | export AM_NSF_FEAT_OUT="$1" 7 | 8 | proj_dir=${nii_scripts}/acoustic-modeling/project-DAR-continuous 9 | 10 | 11 | # preparing the training data 12 | python ${proj_dir}/../SCRIPTS/01_prepare.py config_libri_am || exit 1 13 | 14 | # training the RNN model 15 | python ${proj_dir}/../SCRIPTS/02_train.py config_libri_am || exit 1 16 | 17 | -------------------------------------------------------------------------------- /baseline/local/featex/make_pitch_yaapt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | wav_scp="$1" 4 | pitch_dir="$2" 5 | temp_wav="$3" 6 | 7 | echo $wav_scp, $pitch_dir, $temp_wav 8 | 9 | while read line; do 10 | echo $line 11 | utid=$(echo $line | cut -d' ' -f1) 12 | rspec=$(echo $line | cut -d' ' -f2-) 13 | wav-copy "$rspec" $temp_wav 14 | python local/featex/f0_yaapt/get_f0.py $temp_wav $pitch_dir/${utid}.f0 15 | done < ${wav_scp} 16 | 17 | 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /venv/ 2 | .nfs* 3 | *.pyc 4 | __pycache__/ 5 | *.swp 6 | *.tar.gz 7 | *.tar.xz 8 | netcdf*/ 9 | /.done-* 10 | /boost_*/ 11 | /flac-*/ 12 | /env.sh 13 | /baseline/exp/ 14 | /baseline/LibriSpeech/ 15 | /baseline/data/ 16 | /baseline/mfcc/ 17 | /baseline/corpora 18 | /baseline/corpora/ 19 | /baseline/run_temp.sh 20 | /baseline/run_nt_test.sh 21 | /Miniconda* 22 | temp*/ 23 | /baseline/libri* 24 | /baseline/vctk_dev_enrolls_anon* 25 | -------------------------------------------------------------------------------- /baseline/local/featex/02_extract_pitch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . path.sh 4 | . cmd.sh 5 | 6 | nj=20 7 | 8 | . utils/parse_options.sh 9 | 10 | if [ $# != 1 ]; then 11 | echo "Usage: " 12 | echo " $0 [options] " 13 | echo "Options" 14 | echo " --nj=40 # Number of CPUs to use for feature extraction" 15 | exit 1; 16 | fi 17 | 18 | data_dir=$1 19 | pitch_dir=${data_dir}/pitch 20 | 21 | local/featex/make_pitch.sh --nj $nj --cmd "$train_cmd" ${data_dir} \ 22 | exp/make_pitch ${pitch_dir} 23 | -------------------------------------------------------------------------------- /baseline/local/featex/create_ppg_data.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from os.path import join, basename 3 | 4 | from ioTools import readwrite 5 | from kaldiio import ReadHelper 6 | 7 | args = sys.argv 8 | ppg_file = args[1] 9 | out_dir = args[2] 10 | 11 | ppg_out_dir = join(out_dir, "ppg") 12 | 13 | print("Writing PPG feats.....") 14 | # Write ppg features 15 | with ReadHelper('scp:'+ppg_file) as reader: 16 | for key, mat in reader: 17 | readwrite.write_raw_mat(mat, join(ppg_out_dir, key+'.ppg')) 18 | print("Finished writing PPG feats.") 19 | 20 | -------------------------------------------------------------------------------- /baseline/path.sh: -------------------------------------------------------------------------------- 1 | export KALDI_ROOT=$(realpath ../kaldi) 2 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH 3 | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 4 | . $KALDI_ROOT/tools/config/common_path.sh 5 | export LC_ALL=C 6 | 7 | . ../env.sh 8 | 9 | # based on https://stackoverflow.com/a/5947802/12499892 10 | export GREEN='\033[0;32m' 11 | export RED='\033[0;31m' 12 | export NC='\033[0m' # No Color 13 | -------------------------------------------------------------------------------- /baseline/local/featex/f0_yaapt/00_batch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # ---- batch script to extract F0 3 | # Usage: 4 | # 1. config INPUT_WAV_DIR and OUTPUT_F0_DIR 5 | # 2. run sh 00_batch.sh 6 | # No dependency required 7 | 8 | # Directory of input waveform 9 | INPUT_WAV_DIR=$PWD/../../../test_sample/ 10 | # Directory to store output F0 11 | OUTPUT_F0_DIR=$PWD/../../../test_sample/ 12 | 13 | mkdir ${OUTPUT_F0_DIR} 14 | ls ${INPUT_WAV_DIR} | grep wav > file.lst 15 | cat file.lst | parallel python3 get_f0.py ${INPUT_WAV_DIR}/{/.}.wav ${OUTPUT_F0_DIR}/{/.}.f0 16 | rm file.lst 17 | -------------------------------------------------------------------------------- /baseline/local/featex/create_melspec_data.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | from os.path import join, basename 4 | 5 | from ioTools import readwrite 6 | from kaldiio import WriteHelper, ReadHelper 7 | 8 | args = sys.argv 9 | mspec_file = args[1] 10 | out_dir = args[2] 11 | 12 | mspec_out_dir = join(out_dir, "mel") 13 | 14 | print("Writing MEL feats.....") 15 | # Write mspec features 16 | with ReadHelper('scp:'+mspec_file) as reader: 17 | for key, mat in reader: 18 | #print key, mat.shape 19 | readwrite.write_raw_mat(mat, join(mspec_out_dir, key+'.mel')) 20 | print("Finished writing MEL feats.") 21 | -------------------------------------------------------------------------------- /baseline/local/featex/compute_ivect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | #export CUDA_VISIBLE_DEVICES=1,2,3,4,5,6,7 6 | 7 | . ./cmd.sh 8 | . ./path.sh 9 | 10 | nj=20 11 | model=exp/nnet3_cleaned 12 | dsets= 13 | 14 | . parse_options.sh 15 | 16 | for dset in $dsets; do 17 | expo=$model/ivectors_${dset}_hires 18 | mark=$expo/.done 19 | if [ ! -f $mark ]; then 20 | [ -d $expo ] && rm -r $expo 21 | steps/online/nnet2/extract_ivectors_online.sh \ 22 | --cmd "$train_cmd" --nj $nj data/${dset}_hires \ 23 | $model/extractor $expo || exit 1 24 | touch $mark 25 | fi 26 | done 27 | 28 | echo Done 29 | -------------------------------------------------------------------------------- /baseline/local/download_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | home=$PWD 6 | expo=exp 7 | check=$expo/models/asv_eval/xvect_01709_1/final.raw 8 | 9 | if [ ! -f $check ]; then 10 | mkdir -p $expo 11 | cd $expo 12 | if [ ! -f models.tar.gz ]; then 13 | echo " You will be prompted to enter password for getdata@voiceprivacychallenge.univ-avignon.fr" 14 | sftp getdata@voiceprivacychallenge.univ-avignon.fr < " 14 | echo "Options" 15 | echo " --nj=40 # Number of CPUs to use for feature extraction" 16 | echo " --mspec-config=config/mspec.conf # Melspectrogram config" 17 | exit 1; 18 | fi 19 | 20 | odata_dir=$1 21 | data_dir=$2 22 | mspec_dir=${data_dir}/mspec 23 | 24 | 25 | utils/copy_data_dir.sh ${odata_dir} ${data_dir} 26 | 27 | steps/make_fbank.sh --cmd "$train_cmd" --nj $nj \ 28 | --fbank-config ${mspec_config} ${data_dir} \ 29 | exp/make_fbank/${data_dir} $mspec_dir 30 | -------------------------------------------------------------------------------- /baseline/local/similarity_matrices/compute_DeID.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import argparse 4 | import numpy as np 5 | 6 | 7 | def Ddiag(X): 8 | N = X.shape[0] #matrix dimension 9 | m = np.mean(X) #mean of all elements 10 | md = np.mean(np.diag(X)) #mean of diagonal elements 11 | mnd = (N/(N-1))*(m-(md/N)) #mean of off-diagonal elements 12 | return abs(md-mnd) 13 | 14 | 15 | 16 | if __name__=="__main__": 17 | 18 | parser = argparse.ArgumentParser(description='Compute De-Identification') 19 | parser.add_argument('Soo',help="npy file of the matrix Soo", type=str) 20 | parser.add_argument('Sop',help="npy file of the matrix Sop", type=str) 21 | args = parser.parse_args() 22 | 23 | Soo = np.load(args.Soo) 24 | Sop = np.load(args.Sop) 25 | 26 | print(1-(Ddiag(Sop)/Ddiag(Soo))) 27 | 28 | -------------------------------------------------------------------------------- /baseline/local/featex/extract_bn_nat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Extract BNs using chain model 5 | 6 | . path.sh 7 | . cmd.sh 8 | 9 | nj=32 10 | stage=0 11 | 12 | . utils/parse_options.sh 13 | 14 | 15 | # remove layers after BN 16 | # nnet3-am-copy --raw=true --prepare-for-test=true --nnet-config='echo output-node name=output input=prefinal-l |' --edits='remove-orphans' final.mdl prefinal-l.raw 17 | 18 | nj=1 19 | use_gpu=yes 20 | iv_root=exp/nnet3_cleaned 21 | md_name=prefinal-l.raw 22 | cmvn_op='--norm-means=false --norm-vars=false' 23 | dsets="librispeech_dev_clean train_clean_100" 24 | 25 | 26 | . parse_options.sh 27 | 28 | 29 | ./compute_hires.sh --nj $nj --dsets "$dsets" 30 | 31 | ./compute_ivect.sh --nj $nj --dsets "$dsets" --model $iv_root 32 | 33 | ./nnet3_compute.sh --nj 1 --use_gpu $use_gpu --iv_root $iv_root --md_name $md_name --dsets "$dsets" 34 | 35 | 36 | 37 | echo Done 38 | -------------------------------------------------------------------------------- /baseline/local/similarity_matrices/compute_Gvd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import argparse 4 | import numpy as np 5 | import math 6 | 7 | 8 | def Ddiag(X): 9 | N = X.shape[0] #matrix dimension 10 | m = np.mean(X) #mean of all elements 11 | md = np.mean(np.diag(X)) #mean of diagonal elements 12 | mnd = (N/(N-1))*(m-(md/N)) #mean of off-diagonal elements 13 | return abs(md-mnd) 14 | 15 | if __name__=="__main__": 16 | 17 | parser = argparse.ArgumentParser(description='Compute Gain of Voice Uniqueness') 18 | parser.add_argument('Soo',help="npy file of the similarity matrix Soo", type=str) 19 | parser.add_argument('Spp',help="npy file of the similarity matrix Spp", type=str) 20 | args = parser.parse_args() 21 | 22 | Soo = np.load(args.Soo) 23 | Spp = np.load(args.Spp) 24 | 25 | print(10*np.log10(Ddiag(Spp)/Ddiag(Soo))) 26 | 27 | -------------------------------------------------------------------------------- /baseline/local/make_eval2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . path.sh 4 | 5 | proto_dir="$1" 6 | librispeech_corpus="$2" 7 | enroll_data="$3" 8 | trial_data="$4" 9 | 10 | local/data_prep_adv.sh ${librispeech_corpus}/dev-clean data/${enroll_data} 11 | local/data_prep_adv.sh ${librispeech_corpus}/dev-clean data/${trial_data} 12 | 13 | rm data/${enroll_data}/spk2utt 14 | rm data/${trial_data}/spk2utt 15 | 16 | python local/fix_eval2.py ${proto_dir} data/${enroll_data} data/${trial_data} || exit 1; 17 | 18 | utils/utt2spk_to_spk2utt.pl < data/${enroll_data}/utt2spk > data/${enroll_data}/spk2utt || exit 1 19 | utils/utt2spk_to_spk2utt.pl < data/${trial_data}/utt2spk > data/${trial_data}/spk2utt || exit 1 20 | 21 | utils/fix_data_dir.sh data/${enroll_data} 22 | utils/fix_data_dir.sh data/${trial_data} 23 | 24 | utils/validate_data_dir.sh --no-text --no-feats data/${enroll_data} 25 | utils/validate_data_dir.sh --no-text --no-feats data/${trial_data} 26 | -------------------------------------------------------------------------------- /baseline/cmd.sh: -------------------------------------------------------------------------------- 1 | # you can change cmd.sh depending on what type of queue you are using. 2 | # If you have no queueing system and want to run on a local machine, you 3 | # can change all instances 'queue.pl' to run.pl (but be careful and run 4 | # commands one by one: most recipes will exhaust the memory on your 5 | # machine). queue.pl works with GridEngine (qsub). slurm.pl works 6 | # with slurm. Different queues are configured differently, with different 7 | # queue names and different ways of specifying things like memory; 8 | # to account for these differences you can create and edit the file 9 | # conf/queue.conf to match your queue's configuration. Search for 10 | # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, 11 | # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. 12 | 13 | #export train_cmd="queue.pl --mem 4G" 14 | #export decode_cmd="queue.pl --mem 4G" 15 | 16 | export train_cmd=run.pl 17 | export decode_cmd=run.pl 18 | -------------------------------------------------------------------------------- /baseline/local/vc/am/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | . path.sh 3 | 4 | # PATH to the pyTools 5 | export TEMP_CURRENNT_PROJECT_PYTOOLS_PATH=${nii_dir}/pyTools 6 | 7 | # PATH to currennt 8 | export TEMP_CURRENNT_PROJECT_CURRENNT_PATH=${nii_dir}/CURRENNT_codes/build/currennt 9 | 10 | # PATH to SOX (http://sox.sourceforge.net/sox.html) 11 | export TEMP_CURRENNT_PROJECT_SOX_PATH=/usr/bin/sox 12 | 13 | # PATH to SV56 (a software to normalize waveform amplitude. 14 | # https://www.itu.int/rec/T-REC-P.56 15 | # This software is not necessary, I used it because it is available in our lab. 16 | # You can use other tools to normalize the waveforms before put them into this project. 17 | # Then, you can set TEMP_CURRENNT_PROJECT_SV56_PATH=None) 18 | #export TEMP_CURRENNT_PROJECT_SV56_PATH=/home/smg/wang/WORK/WORK/TOOL/local/bin/sv56demo 19 | export TEMP_CURRENNT_PROJECT_SV56_PATH=None 20 | 21 | export localpath=`pwd`/local/vc/am 22 | 23 | # Add pyTools to PYTHONPATH 24 | export PYTHONPATH=${PYTHONPATH}:${TEMP_CURRENNT_PROJECT_PYTOOLS_PATH}:${localpath} 25 | -------------------------------------------------------------------------------- /baseline/local/vc/nsf/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | . path.sh 4 | 5 | # PATH to the pyTools 6 | export TEMP_CURRENNT_PROJECT_PYTOOLS_PATH=${nii_dir}/pyTools 7 | 8 | # PATH to currennt 9 | export TEMP_CURRENNT_PROJECT_CURRENNT_PATH=${nii_dir}/CURRENNT_codes/build/currennt 10 | 11 | # PATH to SOX (http://sox.sourceforge.net/sox.html) 12 | export TEMP_CURRENNT_PROJECT_SOX_PATH=/usr/bin/sox 13 | 14 | # PATH to SV56 (a software to normalize waveform amplitude. 15 | # https://www.itu.int/rec/T-REC-P.56 16 | # This software is not necessary, I used it because it is available in our lab. 17 | # You can use other tools to normalize the waveforms before put them into this project. 18 | # Then, you can set TEMP_CURRENNT_PROJECT_SV56_PATH=None) 19 | #export TEMP_CURRENNT_PROJECT_SV56_PATH=/home/smg/wang/WORK/WORK/TOOL/local/bin/sv56demo 20 | export TEMP_CURRENNT_PROJECT_SV56_PATH=None 21 | 22 | export localpath=`pwd`/local/vc/nsf 23 | 24 | # Add pyTools to PYTHONPATH 25 | export PYTHONPATH=${PYTHONPATH}:${TEMP_CURRENNT_PROJECT_PYTOOLS_PATH}:${localpath} 26 | -------------------------------------------------------------------------------- /baseline/local/anon/make_netcdf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . path.sh 4 | . cmd.sh 5 | 6 | stage=0 7 | 8 | . utils/parse_options.sh 9 | 10 | if [ $# != 4 ]; then 11 | echo "Usage: " 12 | echo " $0 [options] " 13 | echo "Options" 14 | echo " --stage 0 # Number of CPUs to use for feature extraction" 15 | exit 1; 16 | fi 17 | 18 | src_data=$1 19 | 20 | ppg_file=$2 21 | xvector_file=$3 22 | 23 | out_dir=$4 24 | 25 | 26 | if [ $stage -le 0 ]; then 27 | mkdir -p $out_dir/scp $out_dir/xvector $out_dir/f0 $out_dir/ppg 28 | 29 | echo "Writing SCP file.." 30 | cut -f 1 -d' ' ${src_data}/utt2spk > ${out_dir}/scp/data.lst || exit 1; 31 | fi 32 | 33 | # initialize pytools 34 | . local/vc/am/init.sh 35 | 36 | if [ $stage -le 1 ]; then 37 | python local/featex/create_ppg_data.py ${ppg_file} ${out_dir} || exit 1; 38 | fi 39 | 40 | if [ $stage -le 2 ]; then 41 | echo "Writing xvector and F0 for train." 42 | python local/featex/create_xvector_f0_data.py ${src_data} ${xvector_file} ${out_dir} || exit 1; 43 | fi 44 | 45 | -------------------------------------------------------------------------------- /baseline/local/featex/01_extract_xvectors.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | . path.sh 6 | . cmd.sh 7 | 8 | nj=$(nproc) 9 | 10 | . utils/parse_options.sh 11 | 12 | if [ $# != 3 ]; then 13 | echo "Usage: " 14 | echo " $0 [options] " 15 | echo "Options" 16 | echo " --nj=40 # Number of CPUs to use for feature extraction" 17 | exit 1; 18 | fi 19 | 20 | data_dir=$1 21 | nnet_dir=$2 22 | out_dir=$3 23 | 24 | mfccdir=`pwd`/mfcc 25 | vaddir=`pwd`/mfcc 26 | 27 | mkdir -p ${out_dir} 28 | dataname=$(basename $data_dir) 29 | 30 | steps/make_mfcc.sh --write-utt2num-frames true --mfcc-config conf/mfcc.conf \ 31 | --nj $nj --cmd "$train_cmd" ${data_dir} exp/make_mfcc $mfccdir || exit 1 32 | 33 | utils/fix_data_dir.sh ${data_dir} || exit 1 34 | 35 | sid/compute_vad_decision.sh --nj $nj --cmd "$train_cmd" ${data_dir} exp/make_vad $vaddir || exit 1 36 | 37 | utils/fix_data_dir.sh ${data_dir} || exit 1 38 | 39 | sid/nnet3/xvector/extract_xvectors.sh --cmd "$train_cmd" --nj $nj \ 40 | $nnet_dir ${data_dir} $out_dir/xvectors_$dataname || exit 1 41 | -------------------------------------------------------------------------------- /baseline/local/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | if [ $# != 1 ]; then 6 | echo "Usage: " 7 | echo " $0 [options] " 8 | exit 1; 9 | fi 10 | 11 | data_set=$1 12 | expo_dir=data/$data_set 13 | 14 | dir=$expo_dir 15 | if [ ! -f $dir/wav.scp ]; then 16 | [ -d $dir ] && rm -r $dir 17 | if [ ! -f $data_set.tar.gz ]; then 18 | echo " You will be prompted to enter password for getdata@voiceprivacychallenge.univ-avignon.fr" 19 | sftp getdata@voiceprivacychallenge.univ-avignon.fr < $dir/text1 29 | cut -d' ' -f2- $dir/text | sed -r 's/,|!|\?|\./ /g' | sed -r 's/ +/ /g' | awk '{print toupper($0)}' > $dir/text2 30 | paste -d' ' $dir/text1 $dir/text2 > $dir/text 31 | rm $dir/text1 $dir/text2 32 | utils/fix_data_dir.sh $dir || exit 1 33 | utils/validate_data_dir.sh --no-feats $dir || exit 1 34 | fi 35 | 36 | echo ' Done' 37 | -------------------------------------------------------------------------------- /baseline/local/train_model_ss_am.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Training speech synthesis acoustic model (see the trained model in /baseline/exp/models/3_ss_am/) LibriTTS-train-clean-100 4 | 5 | # TO CORRECT 6 | 7 | . ./cmd.sh 8 | . ./path.sh 9 | 10 | set -e 11 | 12 | libritts_corpus=$(realpath $corpora/LibriTTS) 13 | 14 | ppg_model=exp/models/1_asr_am/exp 15 | ppg_dir=${ppg_model}/nnet3_cleaned 16 | xvec_nnet_dir=exp/models/2_xvect_extr/exp/xvector_nnet_1a 17 | 18 | am_nsf_train_data="libritts_train_clean_100" 19 | feats_out_dir=$(realpath exp/am_nsf_data) 20 | 21 | stage=0 22 | 23 | . utils/parse_options.sh 24 | 25 | if [ $stage -le 0 ]; then 26 | local/data_prep_libritts.sh ${libritts_corpus}/train-clean-100 data/${am_nsf_train_data} || exit 1; 27 | local/run_prepfeats_am_nsf.sh --ppg-model ${ppg_model} --ppg-dir ${ppg_dir} \ 28 | --xvec-nnet-dir ${xvec_nnet_dir} \ 29 | ${am_nsf_train_data} ${feats_out_dir} || exit 1; 30 | fi 31 | 32 | if [ $stage -le 1 ]; then 33 | local/vc/am/00_run.sh ${feats_out_dir} || exit 1; 34 | echo "Model is trained and stored at ${nii_scripts}/acoustic-modeling/project-DAR-continuous/MODELS/DAR_001/" 35 | fi 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /baseline/local/train_model_nsf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Training speech synthesis neural source filter (NSF) model (see the trained model in /baseline/exp/models/4_nsf_am/) on LibriTTS-train-clean-100 4 | # TO CORRECT 5 | 6 | . ./cmd.sh 7 | . ./path.sh 8 | 9 | set -e 10 | 11 | libritts_corpus=$(realpath $corpora/LibriTTS) 12 | 13 | ppg_model=exp/models/1_asr_am/exp 14 | ppg_dir=${ppg_model}/nnet3_cleaned 15 | xvec_nnet_dir=exp/models/2_xvect_extr/exp/xvector_nnet_1a 16 | 17 | am_nsf_train_data="libritts_train_clean_100" 18 | feats_out_dir=$(realpath exp/am_nsf_data) 19 | 20 | stage=0 21 | 22 | . utils/parse_options.sh 23 | 24 | if [ $stage -le 0 ]; then 25 | local/data_prep_libritts.sh ${libritts_corpus}/train-clean-100 data/${am_nsf_train_data} || exit 1; 26 | local/run_prepfeats_am_nsf.sh --ppg-model ${ppg_model} --ppg-dir ${ppg_dir} \ 27 | --xvec-nnet-dir ${xvec_nnet_dir} \ 28 | ${am_nsf_train_data} ${feats_out_dir} || exit 1; 29 | fi 30 | 31 | if [ $stage -le 1 ]; then 32 | local/vc/nsf/00_run.sh ${feats_out_dir} || exit 1; 33 | echo "Model is trained and stored at ${nii_scripts}/waveform-modeling/project-NSF/MODELS/h-sinc-NSF/" 34 | fi 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /baseline/local/anon/compute_spk_pool_affinity.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . path.sh 4 | . cmd.sh 5 | 6 | set -e 7 | 8 | plda_dir=$1 9 | src_xvectors_dir=$2 10 | pool_xvectors_dir=$3 11 | src_spk=$4 12 | trial_scores=$5 13 | 14 | fake_trials_dir=${src_xvectors_dir}/fake_trials 15 | mkdir -p ${fake_trials_dir} 16 | fake_trials=${fake_trials_dir}/trial_${src_spk} 17 | 18 | # Creating the fake trials file 19 | cut -d' ' -f 1 ${pool_xvectors_dir}/spk_xvector.scp | awk -v a="${src_spk}" '{print a,$1}' - > ${fake_trials} 20 | 21 | $train_cmd exp/scores/log/libritts_pool_scoring.log \ 22 | ivector-plda-scoring --normalize-length=true \ 23 | "ivector-copy-plda --smoothing=0.0 $plda_dir/plda - |" \ 24 | "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:${src_xvectors_dir}/spk_xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 25 | "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:${pool_xvectors_dir}/spk_xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 26 | "cat '${fake_trials}' | cut -d\ --fields=1,2 |" ${trial_scores} || exit 1; 27 | 28 | 29 | -------------------------------------------------------------------------------- /baseline/local/featex/f0_yaapt/amfm_decompy/LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Bernardo J. B. Schmitt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | SOFTWARE. -------------------------------------------------------------------------------- /baseline/local/featex/00_make_am_nsf_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . path.sh 4 | . cmd.sh 5 | 6 | dev_spks=20 7 | test_spks=20 8 | 9 | . utils/parse_options.sh 10 | 11 | if [ $# != 2 ]; then 12 | echo "Usage: " 13 | echo " $0 [options] " 14 | echo "Options" 15 | echo " --dev-spks=40 # Number of speakers in dev dataset" 16 | echo " --test-spks=40 # Number of speakers in test dataset" 17 | exit 1; 18 | fi 19 | 20 | in_dir=$1 21 | out_dir=$2 22 | mkdir -p ${out_dir} 23 | 24 | python local/featex/split_am_nsf_data.py ${in_dir} ${out_dir} ${dev_spks} ${test_spks} 25 | 26 | # sort each file 27 | train_dir=$out_dir/$(basename $in_dir)_train 28 | dev_dir=$out_dir/$(basename $in_dir)_dev 29 | test_dir=$out_dir/$(basename $in_dir)_test 30 | 31 | echo "Sorting : ${train_dir}, ${dev_dir} and ${test_dir}" 32 | 33 | for f in `ls ${train_dir}`; do 34 | echo "Sorting $f" 35 | sort -u ${train_dir}/$f > ${train_dir}/${f%.*} 36 | rm ${train_dir}/$f 37 | done 38 | 39 | for f in `ls ${dev_dir}`; do 40 | echo "Sorting $f" 41 | sort -u ${dev_dir}/$f > ${dev_dir}/${f%.*} 42 | rm ${dev_dir}/$f 43 | done 44 | 45 | for f in `ls ${test_dir}`; do 46 | echo "Sorting $f" 47 | sort -u ${test_dir}/$f > ${test_dir}/${f%.*} 48 | rm ${test_dir}/$f 49 | done 50 | -------------------------------------------------------------------------------- /baseline/local/featex/split_test_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join, exists 3 | import sys 4 | 5 | args = sys.argv 6 | 7 | root_dir = args[1] 8 | test_file = join(root_dir, 'scp/test.lst') 9 | 10 | test_dir = args[2] 11 | 12 | xvector_dir = join(root_dir, 'xvector') 13 | f0_dir = join(root_dir, 'f0') 14 | mel_dir = join(root_dir, 'mel') 15 | ppg_dir = join(root_dir, 'ppg') 16 | 17 | out_xvector_dir = join(test_dir, 'xvector') 18 | out_f0_dir = join(test_dir, 'f0') 19 | out_mel_dir = join(test_dir, 'mel') 20 | out_ppg_dir = join(test_dir, 'ppg') 21 | 22 | if not exists(out_xvector_dir): 23 | os.makedirs(out_xvector_dir) 24 | if not exists(out_f0_dir): 25 | os.makedirs(out_f0_dir) 26 | if not exists(out_mel_dir): 27 | os.makedirs(out_mel_dir) 28 | if not exists(out_ppg_dir): 29 | os.makedirs(out_ppg_dir) 30 | 31 | with open(test_file) as f: 32 | for line in f.read().splitlines(): 33 | os.rename(join(xvector_dir, line+'.xvector'), join(out_xvector_dir, 34 | line+'.xvector')) 35 | os.rename(join(f0_dir, line+'.f0'), join(out_f0_dir, line+'.f0')) 36 | os.rename(join(mel_dir, line+'.mel'), join(out_mel_dir, line+'.mel')) 37 | os.rename(join(ppg_dir, line+'.ppg'), join(out_ppg_dir, line+'.ppg')) 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /baseline/local/plot/plot_spk_dur.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | 3 | import numpy as np 4 | import matplotlib as mpl 5 | mpl.use('Agg') 6 | import matplotlib.pyplot as plt 7 | import operator 8 | 9 | 10 | # Each data dir must contain an utt2dur 11 | data_dirs = ['data/test_clean', 'data/dev_clean', 'data/test_other', 'data/dev_other', 'data/train_960'] 12 | plot_file = 'data/spks_stats.png' 13 | 14 | 15 | spk2dur = {} 16 | for ddir in data_dirs: 17 | with open(join(ddir, 'utt2dur')) as f: 18 | for line in f.read().splitlines(): 19 | sp = line.split() 20 | spk = sp[0].split('-')[0] 21 | cdur = float(sp[1]) 22 | spk2dur[spk] = spk2dur.get(spk, 0.0) + cdur 23 | 24 | print(f"Found {len(spk2dur)} of speakers") 25 | 26 | sorted_spk2dur = sorted(spk2dur.items(), key=operator.itemgetter(1)) 27 | 28 | #ditems = spk2dur.items() 29 | spks = [x[0] for x in sorted_spk2dur] 30 | durs = [x[1] for x in sorted_spk2dur] 31 | 32 | mean_dur = round(np.mean(durs), 2) 33 | 34 | x_pos = np.arange(len(spks)) 35 | 36 | plt.bar(x_pos, durs, align='center') 37 | plt.axhline(y=mean_dur, color='r', linestyle='-') 38 | plt.annotate(f'Mean duration = {mean_dur}', xy=(20, mean_dur+10)) 39 | #plt.xticks(x_pos, spks) 40 | plt.ylabel('Duration (sec.)') 41 | plt.grid(True) 42 | 43 | plt.title(f'Durations of {len(spks)} speakers found in LibriSpeech') 44 | 45 | 46 | plt.savefig(plot_file, dpi=300) 47 | -------------------------------------------------------------------------------- /baseline/local/anon/compute_spk_pool_cosine.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from kaldiio import WriteHelper, ReadHelper 3 | import os 4 | from os.path import join, isdir 5 | 6 | from scipy.spatial import distance 7 | 8 | args = sys.argv 9 | 10 | src_xvec_dir = args[1] 11 | pool_xvec_dir = args[2] 12 | scores_dir = args[3] 13 | 14 | if not isdir(scores_dir): 15 | os.makedirs(scores_dir) 16 | 17 | src_xvec_file = join(src_xvec_dir, 'spk_xvector.scp') 18 | pool_xvec_file = join(pool_xvec_dir, 'spk_xvector.scp') 19 | 20 | pool_xvectors = {} 21 | c = 0 22 | with ReadHelper('scp:'+pool_xvec_file) as reader: 23 | for key, xvec in reader: 24 | #print key, mat.shape 25 | pool_xvectors[key] = xvec 26 | c += 1 27 | print("Read ", c, "pool xvectors") 28 | 29 | with ReadHelper('scp:'+src_xvec_file) as reader: 30 | for sspk, sxvec in reader: 31 | print("Computing cosine measure for " + sspk) 32 | with open(join(scores_dir, 'affinity_'+sspk), 'w') as sf: 33 | for pspk, pxvec in pool_xvectors.items(): 34 | # compute cosine distance between src and pool spk 35 | # Multiplying by -1 to ensure compatibility with affinity 36 | # Now lower value will indicate less affinity as compared 37 | # to original cosine distance 38 | dist = -1.0 * distance.cosine(sxvec, pxvec) 39 | sf.write(sspk + ' ' + pspk + ' ' + str(dist) + '\n') 40 | 41 | -------------------------------------------------------------------------------- /baseline/local/vc/am/01_gen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # ------- 3 | # input feature directories 4 | # here, we use features in ../TESTDATA/vctk_vctk_anonymize for demonstration 5 | # 6 | . path.sh 7 | . local/vc/am/init.sh 8 | 9 | proj_dir=${nii_scripts}/acoustic-modeling/project-DAR-continuous 10 | test_data_dir=$1 11 | 12 | output_dir=${test_data_dir}/am_out_mel 13 | output_tmp_dir=${test_data_dir}/am_out_tmp 14 | export TEMP_ACOUSTIC_MODEL_INPUT_DIRS=${test_data_dir}/ppg,${test_data_dir}/xvector,${test_data_dir}/f0 15 | 16 | # where is the directory of the trained model 17 | export TEMP_ACOUSTIC_MODEL_DIRECTORY=exp/models/3_ss_am 18 | 19 | # where is the trained model? 20 | # here, we use network.jsn for demonstration. 21 | # of course, it will generate random noise only 22 | export TEMP_ACOUSTIC_NETWORK_PATH=${TEMP_ACOUSTIC_MODEL_DIRECTORY}/trained_network.jsn 23 | 24 | # where to store the features generated by the trained network? 25 | export TEMP_ACOUSTIC_OUTPUT_DIRECTORY=${output_dir} 26 | 27 | # directory to save intermediate files (it will be deleted after) 28 | export TEMP_ACOUSTIC_TEMP_OUTPUT_DIRECTORY=${output_tmp_dir} 29 | 30 | temp_dir="exp/tmp" 31 | mkdir -p $temp_dir 32 | export TEMP_ADDITIONAL_COMMAND="--cache_path $temp_dir" 33 | 34 | # 35 | python ${proj_dir}/../SCRIPTS/03_syn.py config_libri_am || exit 1 36 | # after running this scripts, the generated features should be in ${TEMP_ACOUSTIC_OUTPUT_DIRECTORY} 37 | 38 | rm -r ${TEMP_ACOUSTIC_TEMP_OUTPUT_DIRECTORY} 39 | -------------------------------------------------------------------------------- /baseline/local/featex/compute_xvect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . ./cmd.sh 4 | . ./path.sh 5 | 6 | set -e 7 | 8 | #Compute x-vectors using x-vector extractor (trained on VoxCeleb-1,2 data) 9 | 10 | nj_mfcc=20 11 | nj_xvec=20 12 | use_gpu=false 13 | 14 | model=exp/xvector_nnet_1as 15 | #dsets='train_clean_100' 16 | #dsets='train_other_500' 17 | #dsets='librispeech_dev_clean' 18 | #dsets='librispeech_dev_clean_uniq' 19 | #dsets='vctk_dev' 20 | #dsets='vctk_test' 21 | #dsets='vctk_dev_mic1' 22 | #dsets='vctk_dev_mic2' 23 | dsets='librispeech_train_clean_360_uniq' 24 | 25 | 26 | 27 | for dset in $dsets; do 28 | data=data/${dset}_mfcc 29 | mark=$data/.done 30 | if [ ! -f $mark ]; then 31 | [ -d $data ] && rm -r $data 32 | utils/copy_data_dir.sh data/$dset $data 33 | steps/make_mfcc.sh \ 34 | --nj $nj_mfcc \ 35 | --cmd "$train_cmd" \ 36 | --write-utt2num-frames true \ 37 | --mfcc-config conf/mfcc.conf \ 38 | $data 39 | utils/fix_data_dir.sh $data 40 | sid/compute_vad_decision.sh \ 41 | --nj $nj_mfcc \ 42 | --cmd "$train_cmd" \ 43 | $data 44 | utils/fix_data_dir.sh $data 45 | touch $mark 46 | fi 47 | expo=$model/xvectors_$dset 48 | mark=$expo/.done 49 | if [ ! -f $mark ]; then 50 | [ -d $expo ] && rm -r $expo 51 | sid/nnet3/xvector/extract_xvectors.sh \ 52 | --nj $nj_xvec \ 53 | --cmd "$train_cmd --mem 4G" \ 54 | --use_gpu $use_gpu \ 55 | $model $data $expo 56 | touch $mark 57 | fi 58 | done 59 | 60 | echo Done 61 | -------------------------------------------------------------------------------- /baseline/local/featex/create_xvector_f0_data.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from os.path import join, basename 3 | 4 | from ioTools import readwrite 5 | from kaldiio import WriteHelper, ReadHelper 6 | import numpy as np 7 | 8 | args = sys.argv 9 | data_dir = args[1] 10 | xvector_file = args[2] 11 | out_dir = args[3] 12 | 13 | dataname = basename(data_dir) 14 | yaap_pitch_dir = join(data_dir, 'yaapt_pitch') 15 | xvec_out_dir = join(out_dir, "xvector") 16 | pitch_out_dir = join(out_dir, "f0") 17 | 18 | # Write pitch features 19 | pitch_file = join(data_dir, 'pitch.scp') 20 | pitch2shape = {} 21 | with ReadHelper('scp:'+pitch_file) as reader: 22 | for key, mat in reader: 23 | pitch2shape[key] = mat.shape[0] 24 | kaldi_f0 = mat[:, 1].squeeze().copy() 25 | yaapt_f0 = readwrite.read_raw_mat(join(yaap_pitch_dir, key+'.f0'), 1) 26 | #unvoiced = np.where(yaapt_f0 == 0)[0] 27 | #kaldi_f0[unvoiced] = 0 28 | #readwrite.write_raw_mat(kaldi_f0, join(pitch_out_dir, key+'.f0')) 29 | f0 = np.zeros(kaldi_f0.shape) 30 | f0[:yaapt_f0.shape[0]] = yaapt_f0 31 | readwrite.write_raw_mat(f0, join(pitch_out_dir, key+'.f0')) 32 | 33 | 34 | # Write xvector features 35 | with ReadHelper('scp:'+xvector_file) as reader: 36 | for key, mat in reader: 37 | #print key, mat.shape 38 | plen = pitch2shape[key] 39 | mat = mat[np.newaxis] 40 | xvec = np.repeat(mat, plen, axis=0) 41 | readwrite.write_raw_mat(xvec, join(xvec_out_dir, key+'.xvector')) 42 | 43 | 44 | -------------------------------------------------------------------------------- /baseline/local/featex/extract_ppg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Extract PPGs using chain model 5 | # This script extract word position dependent phonemes (346) posteriors and 256-bottleneck PPGs based on ppg-type option. 6 | # 7 | . path.sh 8 | . cmd.sh 9 | 10 | nj=32 11 | stage=0 12 | 13 | . utils/parse_options.sh 14 | 15 | if [ $# != 3 ]; then 16 | echo "Usage: " 17 | echo " $0 [options] " 18 | echo "Options" 19 | echo " --nj=40 # Number of CPUs to use for feature extraction" 20 | echo " --stage=0 # Extraction stage" 21 | exit 1; 22 | fi 23 | 24 | data=$1 25 | ppg_model=$2 26 | ppg_dir=$3 27 | 28 | original_data_dir=data/${data} 29 | data_dir=data/${data}_hires 30 | 31 | ivec_extractor=${ppg_model}/nnet3_cleaned/extractor 32 | ivec_data_dir=${ppg_model}/nnet3_cleaned/ivectors_${data}_hires 33 | 34 | model_dir=${ppg_model}/chain_cleaned/tdnn_1d_sp 35 | 36 | 37 | 38 | export LC_ALL=C 39 | if [ $stage -le 0 ]; then 40 | utils/copy_data_dir.sh ${original_data_dir} ${data_dir} 41 | steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ 42 | --cmd "$train_cmd" ${data_dir} 43 | 44 | steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \ 45 | ${data_dir} ${ivec_extractor} ${ivec_data_dir} 46 | fi 47 | 48 | if [ $stage -le 1 ]; then 49 | # Keeping nj to 1 due to GPU memory issues 50 | local/featex/extract_bn.sh --cmd "$train_cmd" --nj 1 \ 51 | --iv-root ${ivec_data_dir} --model-dir ${model_dir} \ 52 | ${data} ${ppg_dir} || exit 1; 53 | fi 54 | -------------------------------------------------------------------------------- /baseline/local/vc/nsf/01_gen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copied from init.sh 3 | 4 | . path.sh 5 | . local/vc/nsf/init.sh 6 | 7 | # ----- Method 1 8 | # For generation, you can configure config.py and run 9 | #python ../SCRIPTS/02_genwaveform.py config 10 | 11 | # ----- Method 2 12 | # Equivalently, you can set the environment variables below 13 | # rather than manually changing config.py 14 | 15 | # Directories of the input features, which are separated by ',' 16 | #test_mel=/home/bsrivast/asr_data/LibriTTS/am_nsf_data/libritts/test/mel 17 | 18 | test_data_dir=$1 19 | 20 | proj_dir=${nii_scripts}/waveform-modeling/project-NSF 21 | 22 | test_mel=${test_data_dir}/am_out_mel 23 | test_xvector=${test_data_dir}/xvector 24 | test_f0=${test_data_dir}/f0 25 | export TEMP_WAVEFORM_MODEL_INPUT_DIRS=${test_mel},${test_xvector},${test_f0} 26 | 27 | # Path to the model directory 28 | export TEMP_WAVEFORM_MODEL_DIRECTORY=${proj_dir}/MODELS/h-sinc-NSF 29 | 30 | # Path to the directory that will save the generated waveforms 31 | export TEMP_WAVEFORM_OUTPUT_DIRECTORY="${test_data_dir}/nsf_output_wav" 32 | 33 | # Path to the trained_network.jsn (or epoch*.autosave) 34 | export TEMP_WAVEFORM_MODEL_NETWORK_PATH=exp/models/4_nsf/trained_network.jsn 35 | 36 | # Path to a temporary directory to save intermediate files (which will be deleted after generation) 37 | export TEMP_WAVEFORM_TEMP_OUTPUT_DIRECTORY="${test_data_dir}/output_tmp" 38 | 39 | temp_dir="exp/tmp" 40 | mkdir -p $temp_dir 41 | export TEMP_ADDITIONAL_COMMAND="--cache_path $temp_dir" 42 | 43 | # generating 44 | python ${proj_dir}/../SCRIPTS/02_genwaveform.py config_libri_nsf || exit 1 45 | 46 | rm -r ${TEMP_WAVEFORM_TEMP_OUTPUT_DIRECTORY} 47 | 48 | 49 | -------------------------------------------------------------------------------- /nii_cmake/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | PROJECT(currennt) 2 | 3 | CMAKE_MINIMUM_REQUIRED (VERSION 2.8) 4 | 5 | # 6 | # CUDA 7 | # 8 | 9 | FIND_PACKAGE (CUDA REQUIRED) 10 | MESSAGE ("-- CUDA_VERSION: ${CUDA_VERSION}") 11 | MESSAGE ("-- CUDA_INCLUDE_DIRS: ${CUDA_INCLUDE_DIRS}") 12 | MESSAGE ("-- CUDA_CUDA_LIBRARY: ${CUDA_CUDA_LIBRARY}") 13 | MESSAGE ("-- CUDA_CUDART_LIBRARY: ${CUDA_CUDART_LIBRARY}") 14 | MESSAGE ("-- CUDA_cublas_LIBRARY: ${CUDA_cublas_LIBRARY}") 15 | MESSAGE ("-- CUDA_CUFFT_LIBRARIES: ${CUDA_CUFFT_LIBRARIES}") 16 | MESSAGE ("-- CUDA_curand_LIBRARY: ${CUDA_curand_LIBRARY}") 17 | SET (CUDA_ALL_LIBRARIES ${CUDA_CUDA_LIBRARY} ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_CUFFT_LIBRARIES} ${CUDA_curand_LIBRARY}) 18 | INCLUDE_DIRECTORIES ("${CUDA_INCLUDE_DIRS}") 19 | SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}") 20 | 21 | # 22 | # BOOST 23 | # 24 | 25 | FIND_PACKAGE (Boost 1.59 REQUIRED COMPONENTS program_options system filesystem random thread) 26 | MESSAGE ("-- Boost_INCLUDE_DIRS: ${Boost_INCLUDE_DIRS}") 27 | MESSAGE ("-- Boost_LIBRARIES: ${Boost_LIBRARIES}") 28 | INCLUDE_DIRECTORIES (${Boost_INCLUDE_DIRS}) 29 | 30 | # 31 | # NetCDF 32 | # 33 | 34 | LINK_DIRECTORIES (${NETCDF_LIB}) 35 | 36 | # 37 | # FLAGS 38 | # 39 | 40 | SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-security") 41 | 42 | # 43 | # PROJECTS 44 | # 45 | 46 | FILE (GLOB_RECURSE src_lib currennt_lib/*.cpp currennt_lib/*.hpp currennt_lib/*.h currennt_lib/*.cu currennt_lib/*.cuh) 47 | FILE (GLOB_RECURSE src_trainer currennt/*.cpp currennt/*.hpp currennt/*.h currennt/*.cu currennt/*.cuh) 48 | CUDA_ADD_EXECUTABLE (${PROJECT_NAME} ${src_lib} ${src_trainer}) 49 | TARGET_LINK_LIBRARIES (${PROJECT_NAME} ${Boost_LIBRARIES} ${CUDA_cublas_LIBRARY} ${CUDA_CUFFT_LIBRARIES} netcdf) 50 | -------------------------------------------------------------------------------- /baseline/local/similarity_matrices/scores_calibration.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | sys.path.append('../cllr/') 5 | import argparse 6 | import numpy as np 7 | 8 | from performance import optimal_llr 9 | 10 | def readMat(ar): 11 | fh = open(ar) 12 | x = [] 13 | for line in fh.readlines(): 14 | y = [str(value) for value in line.split()] 15 | x.append(y) 16 | fh.close() 17 | return x 18 | 19 | if __name__=="__main__": 20 | 21 | parser = argparse.ArgumentParser(description="Scores calibration") 22 | parser.add_argument('scores',help="", type=str) 23 | parser.add_argument('spk',help="spk trials",type=str) 24 | args = parser.parse_args() 25 | 26 | S = np.array(readMat(args.scores)) 27 | SPK = np.array(readMat(args.spk)) 28 | S = S.astype(np.str) 29 | SPK = SPK.astype(np.str) 30 | 31 | NON = [] 32 | TAR = [] 33 | SPK_NON = [] 34 | SPK_TAR = [] 35 | 36 | for i in range(len(SPK)): 37 | if SPK[i,0] == SPK[i,1]: 38 | TAR.append(S[i,:]) 39 | SPK_TAR.append(SPK[i,:]) 40 | else: 41 | NON.append(S[i,:]) 42 | SPK_NON.append(SPK[i,:]) 43 | 44 | NON = np.array(NON) 45 | TAR = np.array(TAR) 46 | SPK_NON = np.array(SPK_NON) 47 | SPK_TAR = np.array(SPK_TAR) 48 | 49 | non = NON[:,2].astype(np.float) 50 | tar = TAR[:,2].astype(np.float) 51 | 52 | tar, non = optimal_llr(tar, non, laplace=True) 53 | 54 | non = non.astype(np.str) 55 | tar = tar.astype(np.str) 56 | 57 | NON = NON.astype(non.dtype) 58 | TAR = TAR.astype(tar.dtype) 59 | 60 | NON[:,2] = non 61 | TAR[:,2] = tar 62 | 63 | S = np.concatenate((TAR,NON)) 64 | SPK = np.concatenate((SPK_TAR,SPK_NON)) 65 | np.savetxt(args.scores+".calibrated",S,fmt="%s") 66 | np.savetxt(args.spk+".calibrated",SPK,fmt="%s") 67 | 68 | -------------------------------------------------------------------------------- /baseline/local/scoring/linkability/compute_linkability.py: -------------------------------------------------------------------------------- 1 | from performance import linkability, draw_scores 2 | import argparse 3 | import pandas 4 | 5 | 6 | parser = argparse.ArgumentParser(description='Computing the global linkability measure for a list of linkage function score') 7 | parser.add_argument('-s', dest='score_file', type=str, nargs=1, required=True, help='path to score file') 8 | parser.add_argument('-k', dest='key_file', type=str, nargs=1, required=True, help='path to key file') 9 | parser.add_argument('--omega', dest='omega', type=float, nargs=1, required=False, default=1, help='prior ratio (default is 1)') 10 | parser.add_argument('-d', dest='draw_scores', action='store_true', help='flag: draw the score distribution in a figure') 11 | parser.add_argument('-o', dest='output_file', type=str, nargs=1, required=False, help='output path of the png and pdf file (default is linkability_)') 12 | 13 | 14 | 15 | args = parser.parse_args() 16 | # args = parser.parse_args('-s scores.txt -k key.txt'.split(' ')) 17 | # args = parser.parse_args('-s scores.txt -k key.txt -e'.split(' ')) 18 | 19 | scr = pandas.read_csv(args.score_file[0], sep=' ', header=None).pivot_table(index=0, columns=1, values=2) 20 | key = pandas.read_csv(args.key_file[0], sep=' ', header=None).replace('nontarget', False).replace('target', True).pivot_table(index=0, columns=1, values=2) 21 | 22 | matedScores = scr.values[key.values == True] 23 | nonMatedScores = scr.values[key.values == False] 24 | 25 | Dsys, D, bin_centers, bin_edges = linkability(matedScores, nonMatedScores, args.omega) 26 | 27 | if args.draw_scores: 28 | output_file= "linkability_"+args.score_file[0] 29 | if args.output_file is not None: 30 | output_file = args.output_file[0] 31 | draw_scores(matedScores, nonMatedScores, Dsys, D, bin_centers, bin_edges, output_file) 32 | 33 | 34 | 35 | print("linkability: %f" % (Dsys)) 36 | print("") 37 | -------------------------------------------------------------------------------- /baseline/local/featex/extract_bn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | #export CUDA_VISIBLE_DEVICES=1,2,3,4,5,6,7 6 | 7 | . cmd.sh 8 | . path.sh 9 | 10 | # nnet3-am-copy --raw=true --prepare-for-test=true --nnet-config='echo output-node name=output input=prefinal-l |' --edits='remove-orphans' final.mdl prefinal-l.raw 11 | 12 | nj=1 13 | cmd=run.pl 14 | use_gpu=yes 15 | iv_root=exp/nnet3_cleaned 16 | model_dir=exp/chain_cleaned/tdnn_1d_sp 17 | md_name=prefinal-l.raw 18 | cmvn_op='--norm-means=false --norm-vars=false' 19 | 20 | . parse_options.sh 21 | 22 | dsets="$1" 23 | ppg_dir="$2" 24 | 25 | for dset in $dsets; do 26 | ivect=scp:$iv_root/ivector_online.scp 27 | expo=$ppg_dir 28 | mark=$expo/.done 29 | if [ ! -f $mark ]; then 30 | data=data/${dset}_hires 31 | for name in $data/feats.scp $model_dir/$md_name; do 32 | [ ! -f $name ] && echo "File $name does not exist" && exit 1 33 | done 34 | sdata=$data/split$nj 35 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; 36 | feats="ark:apply-cmvn $cmvn_op --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |" 37 | [ -d $expo ] && rm -r $expo 38 | mkdir -p $expo/log 39 | mkdir -p $expo/data 40 | $cmd JOB=1:$nj $expo/log/ppg256_${dset}.JOB.log \ 41 | nnet3-compute \ 42 | --extra-left-context=0 --extra-right-context=0 \ 43 | --extra-left-context-initial=-1 --extra-right-context-final=-1 \ 44 | --frames-per-chunk=50 --use-gpu=$use_gpu --online-ivector-period=10 \ 45 | --online-ivectors=$ivect $model_dir/$md_name "$feats" ark:- \| \ 46 | copy-feats --compress=true ark:- \ 47 | ark,scp:$expo/data/feats.JOB.ark,$expo/data/feats.JOB.scp || exit 1 48 | cat $expo/data/feats.*.scp | sort > $expo/phone_post.scp 49 | rm $expo/data/feats.*.scp 50 | touch $mark 51 | fi 52 | done 53 | 54 | echo Done 55 | -------------------------------------------------------------------------------- /baseline/local/featex/check_pitch_feats.py: -------------------------------------------------------------------------------- 1 | from kaldiio import WriteHelper, ReadHelper 2 | from ioTools import readwrite 3 | 4 | import numpy as np 5 | from os.path import join 6 | 7 | import matplotlib as mpl 8 | mpl.use('Agg') 9 | import matplotlib.pyplot as plt 10 | 11 | data_dir = 'data/eval1_enroll' 12 | yaap_pitch_dir = join(data_dir, 'yaapt_pitch') 13 | 14 | pitch_feats_file = join(data_dir, 'pitch.scp') 15 | pro_pitch_feats_file = join(data_dir, 'processed_pitch.scp') 16 | save_plot_pov = join(data_dir, 'pov.png') 17 | save_plot_nccf = join(data_dir, 'nccf.png') 18 | save_plot_pitch = join(data_dir, 'pitch.png') 19 | save_plot_ypitch = join(data_dir, 'yaapt_pitch.png') 20 | 21 | #with open(pitch_feats_file) as f: 22 | with ReadHelper('scp:'+pitch_feats_file) as reader: 23 | for key, mat in reader: 24 | print key, mat.shape 25 | nccf = mat[:, 0] 26 | pitch = mat[:, 1] 27 | break 28 | 29 | with ReadHelper('scp:'+pro_pitch_feats_file) as reader: 30 | for key, mat in reader: 31 | print key, mat.shape 32 | pov = mat[:, 0] 33 | yaapt_f0 = readwrite.read_raw_mat(join(yaap_pitch_dir, key+'.f0'), 1) 34 | print "yaapt pitch: ", yaapt_f0.shape 35 | #pov = pov / np.sum(pov) 36 | #pitch = mat[:, 1] 37 | break 38 | 39 | x = np.arange(nccf.shape[0]) 40 | x1 = np.arange(yaapt_f0.shape[0]) 41 | 42 | 43 | fig = plt.figure() 44 | ax1 = fig.add_subplot(111) 45 | ax1.plot(x, nccf, 'r') 46 | plt.savefig(save_plot_nccf, dpi=300) 47 | 48 | plt.clf() 49 | 50 | fig = plt.figure() 51 | ax1 = fig.add_subplot(111) 52 | ax1.plot(x, pitch, 'b') 53 | plt.savefig(save_plot_pitch, dpi=300) 54 | 55 | plt.clf() 56 | 57 | fig = plt.figure() 58 | ax1 = fig.add_subplot(111) 59 | ax1.plot(x, pov, 'r') 60 | plt.savefig(save_plot_pov, dpi=300) 61 | 62 | plt.clf() 63 | 64 | fig = plt.figure() 65 | ax1 = fig.add_subplot(111) 66 | ax1.plot(x1, yaapt_f0, 'r') 67 | plt.savefig(save_plot_ypitch, dpi=300) 68 | -------------------------------------------------------------------------------- /baseline/local/featex/nnet3_compute.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | #export CUDA_VISIBLE_DEVICES=1,2,3,4,5,6,7 6 | 7 | . ./cmd.sh 8 | . ./path.sh 9 | 10 | # nnet3-am-copy --raw=true --prepare-for-test=true --nnet-config='echo output-node name=output input=prefinal-l |' --edits='remove-orphans' final.mdl prefinal-l.raw 11 | 12 | nj=1 13 | use_gpu=yes 14 | iv_root=exp/nnet3_cleaned 15 | md_name=prefinal-l.raw 16 | cmvn_op='--norm-means=false --norm-vars=false' 17 | dsets=train_clean_100 18 | 19 | . parse_options.sh 20 | 21 | for dset in $dsets; do 22 | ivect=scp:$iv_root/ivectors_${dset}_hires/ivector_online.scp 23 | for model in exp/chain_cleaned/tdnn_1d_sp; do 24 | expo=$model/$dset 25 | mark=$expo/.done 26 | if [ ! -f $mark ]; then 27 | data=data/${dset}_hires 28 | for name in $data/feats.scp $model/$md_name; do 29 | [ ! -f $name ] && echo "File $name does not exist" && exit 1 30 | done 31 | sdata=$data/split$nj 32 | [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1; 33 | feats="ark:apply-cmvn $cmvn_op --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |" 34 | [ -d $expo ] && rm -r $expo 35 | mkdir -p $expo/log 36 | mkdir -p $expo/data 37 | "$train_cmd" JOB=1:$nj $expo/log/nnet3-compute.JOB.log \ 38 | nnet3-compute \ 39 | --extra-left-context=0 --extra-right-context=0 \ 40 | --extra-left-context-initial=-1 --extra-right-context-final=-1 \ 41 | --frames-per-chunk=50 --use-gpu=$use_gpu --online-ivector-period=10 \ 42 | --online-ivectors=$ivect $model/$md_name "$feats" ark:- \| \ 43 | copy-feats --compress=true ark:- \ 44 | ark,scp:$expo/data/feats.JOB.ark,$expo/data/feats.JOB.scp || exit 1 45 | cat $expo/data/feats.*.scp | sort > $expo/feats.scp 46 | rm $expo/data/feats.*.scp 47 | touch $mark 48 | fi 49 | done 50 | done 51 | 52 | echo Done 53 | -------------------------------------------------------------------------------- /baseline/local/featex/03_make_am_nsf_netcdf_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . path.sh 4 | . cmd.sh 5 | 6 | stage=0 7 | 8 | . utils/parse_options.sh 9 | 10 | if [ $# != 8 ]; then 11 | echo "Usage: " 12 | echo " $0 [options] " 13 | echo "Options" 14 | echo " --stage 0 # Number of CPUs to use for feature extraction" 15 | exit 1; 16 | fi 17 | 18 | train_data=$1 19 | dev_data=$2 20 | test_data=$3 21 | 22 | ppg_file=$4 23 | melspec_file=$5 24 | xvec_out_dir=$6 25 | 26 | out_dir=$7 27 | test_dir=$8 28 | 29 | 30 | if [ $stage -le 0 ]; then 31 | mkdir -p $out_dir/scp $out_dir/xvector $out_dir/f0 $out_dir/ppg $out_dir/mel 32 | 33 | echo "Writing SCP files.." 34 | cut -f 1 -d' ' ${train_data}/utt2spk > ${out_dir}/scp/train.lst || exit 1; 35 | cut -f 1 -d' ' ${dev_data}/utt2spk > ${out_dir}/scp/dev.lst || exit 1; 36 | cut -f 1 -d' ' ${test_data}/utt2spk > ${out_dir}/scp/test.lst || exit 1; 37 | fi 38 | 39 | 40 | if [ $stage -le 1 ]; then 41 | python local/featex/create_ppg_data.py ${ppg_file} ${out_dir} || exit 1; 42 | python local/featex/create_melspec_data.py ${melspec_file} ${out_dir} || exit 1; 43 | fi 44 | 45 | if [ $stage -le 2 ]; then 46 | echo "Writing xvector and F0 for train." 47 | xvec_file=${xvec_out_dir}/xvectors_$(basename ${train_data})/xvector.scp 48 | python local/featex/create_xvector_f0_data.py ${train_data} ${xvec_file} ${out_dir} || exit 1; 49 | echo "Writing xvector and F0 for dev." 50 | xvec_file=${xvec_out_dir}/xvectors_$(basename ${dev_data})/xvector.scp 51 | python local/featex/create_xvector_f0_data.py ${dev_data} ${xvec_file} ${out_dir} || exit 1; 52 | echo "Writing xvector and F0 for test." 53 | xvec_file=${xvec_out_dir}/xvectors_$(basename ${test_data})/xvector.scp 54 | python local/featex/create_xvector_f0_data.py ${test_data} ${xvec_file} ${out_dir} || exit 1; 55 | fi 56 | 57 | if [ $stage -le 3 ]; then 58 | echo "Splitting test data in separate folder..." 59 | python local/featex/split_test_data.py ${out_dir} ${test_dir} || exit 1; 60 | fi 61 | -------------------------------------------------------------------------------- /baseline/local/asr_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | . path.sh 6 | . cmd.sh 7 | 8 | nj=$(nproc) 9 | dset=vctk_dev_trials_f_all 10 | model=exp/models/asr_eval 11 | printf -v results '%(%Y-%m-%d-%H-%M-%S)T' -1 12 | results=exp/results-$results 13 | 14 | . utils/parse_options.sh 15 | 16 | ivec_extr=$model/extractor 17 | graph_dir=$model/graph_tgsmall 18 | large_lang=$model/lang_test_tglarge 19 | small_lang=$model/lang_test_tgsmall 20 | data=data/${dset}_hires 21 | ivect=$ivec_extr/ivect_$dset 22 | 23 | spk2utt=data/$dset/spk2utt 24 | [ ! -f $spk2utt ] && echo "File $spk2utt does not exist" && exit 1 25 | num_spk=$(wc -l < $spk2utt) 26 | [ $nj -gt $num_spk ] && nj=$num_spk 27 | 28 | if [ ! -f $data/.done_mfcc ]; then 29 | printf "${RED} compute MFCC: $dset${NC}\n" 30 | utils/copy_data_dir.sh data/$dset $data || exit 1 31 | steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" --mfcc-config conf/mfcc_hires.conf $data || exit 1 32 | steps/compute_cmvn_stats.sh $data || exit 1 33 | utils/fix_data_dir.sh $data || exit 1 34 | touch $data/.done_mfcc 35 | fi 36 | 37 | if [ ! -f $ivect/.done ]; then 38 | printf "${RED} compute i-vect: $dset${NC}\n" 39 | steps/online/nnet2/extract_ivectors_online.sh --nj $nj --cmd "$train_cmd" \ 40 | $data ${ivec_extr} $ivect || exit 1 41 | touch $ivect/.done 42 | fi 43 | 44 | expo=$model/decode_${dset}_tgsmall 45 | if [ ! -f $expo/.done ]; then 46 | printf "${RED} decoding: $dset${NC}\n" 47 | steps/nnet3/decode.sh \ 48 | --nj $nj --cmd "$decode_cmd" \ 49 | --acwt 1.0 --post-decode-acwt 10.0 \ 50 | --online-ivector-dir $ivect \ 51 | $graph_dir $data $expo || exit 1 52 | mkdir -p $results 53 | grep WER $expo/wer* | utils/best_wer.sh | tee -a $results/ASR-$dset 54 | touch $expo/.done 55 | fi 56 | 57 | expo=$model/decode_${dset}_tglarge 58 | if [ ! -f $expo/.done ]; then 59 | printf "${RED} rescoring: $dset${NC}\n" 60 | steps/lmrescore_const_arpa.sh \ 61 | --cmd "$decode_cmd" $small_lang $large_lang \ 62 | $data $model/decode_${dset}_tgsmall $expo || exit 1 63 | mkdir -p $results 64 | grep WER $expo/wer* | utils/best_wer.sh | tee -a $results/ASR-$dset 65 | touch $expo/.done 66 | fi 67 | -------------------------------------------------------------------------------- /baseline/local/similarity_matrices/create_trial.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import argparse 4 | import numpy as np 5 | 6 | def readMat(ar): 7 | fh = open(ar) 8 | x = [] 9 | for line in fh.readlines(): 10 | y = [str(value) for value in line.split()] 11 | x.append(y) 12 | fh.close() 13 | return x 14 | 15 | def readList(ar): 16 | fh = open(ar) 17 | x = [] 18 | for line in fh.readlines(): 19 | y = [str(value) for value in line.split()] 20 | x.append((y[0])) 21 | fh.close() 22 | return x 23 | 24 | if __name__=="__main__": 25 | 26 | parser = argparse.ArgumentParser(description='This computes the trial file given to list of segments and utt2spk') 27 | parser.add_argument('osp_segments_scp',help="Original speech segment list", type=str) 28 | parser.add_argument('asp_segments_scp',help="Anonymized speech segment list", type=str) 29 | parser.add_argument('name',help="name of the trial file",type=str) 30 | parser.add_argument('out_dir',help="output directory",type=str) 31 | parser.add_argument('utt2spk',help="utt2spk file", type=str) 32 | args = parser.parse_args() 33 | 34 | osp_segments_scp = readList(args.osp_segments_scp) 35 | asp_segments_scp = readList(args.asp_segments_scp) 36 | name = args.name 37 | utt2spk = readMat(args.utt2spk) 38 | out_dir = args.out_dir 39 | 40 | #Dictionary from utt to spk 41 | D_utt2spk = dict() 42 | for i in range(len(utt2spk)): 43 | D_utt2spk[utt2spk[i][0]] = utt2spk[i][1] 44 | 45 | k = 0 46 | trial = [] 47 | for i in range(len(osp_segments_scp)): 48 | for j in range(k,len(asp_segments_scp)): 49 | if osp_segments_scp[i] != asp_segments_scp[j]: 50 | trial.append([D_utt2spk[osp_segments_scp[i]], osp_segments_scp[i], D_utt2spk[asp_segments_scp[j]], asp_segments_scp[j]]) 51 | 52 | k += 1 53 | 54 | trial = np.array(trial) 55 | segment_trial = trial[:,[1,3]] 56 | spk_trial = trial[:,[0,2]] 57 | np.savetxt(out_dir+"/segments_"+name+"_trial.txt", segment_trial, delimiter=" ", newline = "\n", fmt="%s") 58 | np.savetxt(out_dir+"/spk_"+name+"_trial.txt", spk_trial, delimiter=" ", newline = "\n", fmt="%s") 59 | -------------------------------------------------------------------------------- /baseline/local/score.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2012 Johns Hopkins University (Author: Daniel Povey) 3 | # 2014 Guoguo Chen 4 | # Apache 2.0 5 | 6 | [ -f ./path.sh ] && . ./path.sh 7 | 8 | # begin configuration section. 9 | cmd=run.pl 10 | stage=0 11 | decode_mbr=true 12 | word_ins_penalty=0.0,0.5,1.0 13 | min_lmwt=7 14 | max_lmwt=17 15 | iter=final 16 | #end configuration section. 17 | 18 | [ -f ./path.sh ] && . ./path.sh 19 | . parse_options.sh || exit 1; 20 | 21 | if [ $# -ne 3 ]; then 22 | echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] " 23 | echo " Options:" 24 | echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." 25 | echo " --stage (0|1|2) # start scoring script from part-way through." 26 | echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." 27 | echo " --min_lmwt # minumum LM-weight for lattice rescoring " 28 | echo " --max_lmwt # maximum LM-weight for lattice rescoring " 29 | exit 1; 30 | fi 31 | 32 | data=$1 33 | lang_or_graph=$2 34 | dir=$3 35 | 36 | symtab=$lang_or_graph/words.txt 37 | 38 | for f in $symtab $dir/lat.1.gz $data/text; do 39 | [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; 40 | done 41 | 42 | mkdir -p $dir/scoring/log 43 | 44 | cat $data/text | sed 's:::g' | sed 's:::g' > $dir/scoring/test_filt.txt 45 | 46 | for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do 47 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \ 48 | lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ 49 | lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ 50 | lattice-best-path --word-symbol-table=$symtab \ 51 | ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1; 52 | done 53 | 54 | # Note: the double level of quoting for the sed command 55 | for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do 56 | $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \ 57 | cat $dir/scoring/LMWT.$wip.tra \| \ 58 | utils/int2sym.pl -f 2- $symtab \| sed 's:\::g' \| \ 59 | compute-wer --text --mode=present \ 60 | ark:$dir/scoring/test_filt.txt ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1; 61 | done 62 | 63 | exit 0; 64 | -------------------------------------------------------------------------------- /baseline/local/download_and_untar.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Johns Hopkins University (author: Daniel Povey) 4 | # Apache 2.0 5 | 6 | remove_archive=false 7 | 8 | if [ "$1" == --remove-archive ]; then 9 | remove_archive=true 10 | shift 11 | fi 12 | 13 | if [ $# -ne 4 ]; then 14 | echo "Usage: $0 [--remove-archive] " 15 | echo "e.g.: $0 /export/a15/vpanayotov/data www.openslr.org/resources/11 dev-clean" 16 | echo "With --remove-archive it will remove the archive after successfully un-tarring it." 17 | echo " can be one of: dev-clean, test-clean, dev-other, test-other," 18 | echo " train-clean-100, train-clean-360, train-other-500." 19 | exit 1 20 | fi 21 | 22 | data=$1 23 | url=$2 24 | part=$3 25 | corpus=$4 26 | 27 | #if [ ! -d "$data" ]; then 28 | # echo "$0: no such directory $data" 29 | # exit 1; 30 | #fi 31 | 32 | mkdir -p $data || exit 1 33 | 34 | part_ok=false 35 | list="dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500" 36 | for x in $list; do 37 | if [ "$part" == $x ]; then part_ok=true; fi 38 | done 39 | if ! $part_ok; then 40 | echo "$0: expected to be one of $list, but got '$part'" 41 | exit 1; 42 | fi 43 | 44 | if [ -z "$url" ]; then 45 | echo "$0: empty URL base." 46 | exit 1; 47 | fi 48 | 49 | if [ -f $data/$corpus/$part/.complete ]; then 50 | echo "$0: data part $part was already successfully extracted, nothing to do." 51 | exit 0; 52 | fi 53 | 54 | pushd $data 55 | 56 | if [ ! -f $part.tar.gz ]; then 57 | if ! which wget >/dev/null; then 58 | echo "$0: wget is not installed." 59 | exit 1; 60 | fi 61 | full_url=$url/$part.tar.gz 62 | echo "$0: downloading data from $full_url. This may take some time, please be patient." 63 | 64 | if ! wget --no-check-certificate $full_url; then 65 | echo "$0: error executing wget $full_url" 66 | exit 1; 67 | fi 68 | fi 69 | 70 | if ! tar -xvzf $part.tar.gz; then 71 | echo "$0: error un-tarring archive $data/$part.tar.gz" 72 | exit 1; 73 | fi 74 | 75 | popd >&/dev/null 76 | 77 | touch $data/$corpus/$part/.complete 78 | 79 | echo "$0: Successfully downloaded and un-tarred $data/$part.tar.gz" 80 | 81 | if $remove_archive; then 82 | echo "$0: removing $data/$part.tar.gz file since --remove-archive option was supplied." 83 | rm $data/$part.tar.gz 84 | fi 85 | -------------------------------------------------------------------------------- /baseline/local/featex/f0_yaapt/get_f0.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """ 3 | This script use pYAAPT to extract F0, which is robust to low-quality waveform 4 | http://bingweb.binghamton.edu/~hhu1/pitch/YAPT.pdf 5 | http://bjbschmitt.github.io/AMFM_decompy/pYAAPT.html 6 | 7 | Usage: 8 | 1. specify configuration in __main__ 9 | 2. $: python 00_get_f0.py input_wav output_f0 10 | 11 | Note: 12 | 1. the output will be binary, float32, litten-endian, which 13 | is compatible to HTS-scripts, CURRENNT-scripts 14 | 15 | 2. you can print it to string using SPTK x2x: 16 | $: x2x +fa *.f0 > *.f0.txt 17 | 18 | 3. you can read it through Numpy 19 | >> f = open("PATH_TO_F0",'rb') 20 | >> datatype = np.dtype(("> f0 = np.fromfile(f,dtype=datatype) 22 | >> f.close() 23 | 24 | 4. you can also use pyTools by xin wang 25 | >> from ioTools import readwrite 26 | >> f0 = readwrite.read_raw_mat("PATH_TO_F0", 1) 27 | 28 | """ 29 | import os 30 | import sys 31 | import numpy 32 | 33 | import amfm_decompy.pYAAPT as pYAAPT 34 | import amfm_decompy.basic_tools as basic 35 | 36 | def extractF0(input_wav, output_f0, min_f0 = 60, max_f0 = 400, frame_length = 35, frame_shift = 10): 37 | if os.path.isfile(input_wav): 38 | signal = basic.SignalObj(input_wav) 39 | pitch = pYAAPT.yaapt(signal, **{'f0_min': min_f0, 'f0_max': max_f0, 40 | 'frame_length':frame_length, 41 | 'frame_space':frame_shift}) 42 | f0_value = pitch.samp_values 43 | datatype = numpy.dtype((' " 19 | echo "Options" 20 | echo " --rand-level=utt # [utt, spk] Level of randomness while computing the pseudo-xvectors" 21 | echo " --rand-seed= # Random seed while computing the pseudo-xvectors" 22 | echo " --cross-gender=true # [true, false] Whether to select same or 23 | other gender while computing the pseudo-xvectors" 24 | exit 1; 25 | fi 26 | 27 | src_data=$1 28 | pool_data=$2 29 | xvec_out_dir=$3 30 | plda_dir=$4 31 | 32 | src_dataname=$(basename $src_data) 33 | pool_dataname=$(basename $pool_data) 34 | src_xvec_dir=${xvec_out_dir}/xvectors_${src_dataname} 35 | pool_xvec_dir=${xvec_out_dir}/xvectors_${pool_dataname} 36 | affinity_scores_dir=${src_xvec_dir}/spk_pool_scores 37 | pseudo_xvecs_dir=${src_xvec_dir}/pseudo_xvecs 38 | 39 | mkdir -p ${affinity_scores_dir} ${pseudo_xvecs_dir} 40 | 41 | # Iterate over all the source speakers and generate 42 | # affinity distribution over anonymization pool 43 | src_spk2gender=${src_data}/spk2gender 44 | pool_spk2gender=${pool_data}/spk2gender 45 | 46 | if [ $stage -le 0 ]; then 47 | if [ "$distance" = "cosine" ]; then 48 | echo "Computing cosine similarity between source to each pool speaker." 49 | python local/anon/compute_spk_pool_cosine.py ${src_xvec_dir} ${pool_xvec_dir} \ 50 | ${affinity_scores_dir} 51 | elif [ "$distance" = "plda" ]; then 52 | echo "Computing PLDA affinity scores of each source speaker to each pool speaker." 53 | cut -d\ -f 1 ${src_spk2gender} | while read s; do 54 | #echo "Speaker: $s" 55 | local/anon/compute_spk_pool_affinity.sh ${plda_dir} ${src_xvec_dir} ${pool_xvec_dir} \ 56 | "$s" "${affinity_scores_dir}/affinity_${s}" || exit 1; 57 | done 58 | fi 59 | fi 60 | 61 | if [ $stage -le 1 ]; then 62 | # Filter the scores based on gender and then sort them based on affinity. 63 | # Select the xvectors of 100 farthest speakers and average them to get pseudospeaker. 64 | python local/anon/gen_pseudo_xvecs.py ${src_data} ${pool_data} ${affinity_scores_dir} \ 65 | ${xvec_out_dir} ${pseudo_xvecs_dir} ${rand_level} ${cross_gender} ${proximity} ${rand_seed} || exit 1; 66 | fi 67 | 68 | -------------------------------------------------------------------------------- /baseline/local/run_cleanup_segmentation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2016 Vimal Manohar 4 | # 2016 Yiming Wang 5 | # 2016 Johns Hopkins University (author: Daniel Povey) 6 | # Apache 2.0 7 | 8 | # This script demonstrates how to re-segment training data selecting only the 9 | # "good" audio that matches the transcripts. 10 | # The basic idea is to decode with an existing in-domain acoustic model, and a 11 | # biased language model built from the reference, and then work out the 12 | # segmentation from a ctm like file. 13 | 14 | # For nnet3 and chain results after cleanup, see the scripts in 15 | # local/nnet3/run_tdnn.sh and local/chain/run_tdnn_6z.sh 16 | 17 | # GMM Results for speaker-independent (SI) and speaker adaptive training (SAT) systems on dev and test sets 18 | # [will add these later]. 19 | 20 | set -e 21 | set -o pipefail 22 | set -u 23 | 24 | stage=0 25 | cleanup_stage=0 26 | data=data/train_clean_360 27 | cleanup_affix=cleaned 28 | srcdir=exp/tri3b 29 | nj=10 #40 30 | decode_nj=10 31 | decode_num_threads=4 32 | 33 | . ./path.sh 34 | . ./cmd.sh 35 | . ./utils/parse_options.sh 36 | 37 | cleaned_data=${data}_${cleanup_affix} 38 | 39 | dir=${srcdir}_${cleanup_affix}_work 40 | cleaned_dir=${srcdir}_${cleanup_affix} 41 | 42 | if [ $stage -le 1 ]; then 43 | # This does the actual data cleanup. 44 | steps/cleanup/clean_and_segment_data.sh --stage $cleanup_stage --nj $nj --cmd "$train_cmd" \ 45 | $data data/lang $srcdir $dir $cleaned_data 46 | fi 47 | 48 | if [ $stage -le 2 ]; then 49 | steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ 50 | $cleaned_data data/lang $srcdir ${srcdir}_ali_${cleanup_affix} 51 | fi 52 | 53 | if [ $stage -le 3 ]; then 54 | steps/train_sat.sh --cmd "$train_cmd" \ 55 | 7000 150000 $cleaned_data data/lang ${srcdir}_ali_${cleanup_affix} ${cleaned_dir} 56 | fi 57 | 58 | if [ $stage -le 4 ]; then 59 | # Test with the models trained on cleaned-up data. 60 | utils/mkgraph.sh data/lang_test_tgsmall ${cleaned_dir} ${cleaned_dir}/graph_tgsmall 61 | 62 | for dset in test_clean test_other dev_clean dev_other; do 63 | ( 64 | steps/decode_fmllr.sh --nj $decode_nj --num-threads $decode_num_threads \ 65 | --cmd "$decode_cmd" \ 66 | ${cleaned_dir}/graph_tgsmall data/${dset} ${cleaned_dir}/decode_${dset}_tgsmall 67 | steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ 68 | data/${dset} ${cleaned_dir}/decode_${dset}_{tgsmall,tgmed} 69 | steps/lmrescore_const_arpa.sh \ 70 | --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ 71 | data/${dset} ${cleaned_dir}/decode_${dset}_{tgsmall,tglarge} 72 | steps/lmrescore_const_arpa.sh \ 73 | --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ 74 | data/${dset} ${cleaned_dir}/decode_${dset}_{tgsmall,fglarge} 75 | ) & 76 | done 77 | fi 78 | 79 | wait 80 | exit 0 81 | -------------------------------------------------------------------------------- /baseline/local/similarity_matrices/compute_similarity_matrix.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import argparse 4 | import numpy as np 5 | import math 6 | from scipy.stats.mstats import gmean 7 | from math import log10, log2 8 | 9 | def readMat(ar): 10 | fh = open(ar) 11 | x = [] 12 | for line in fh.readlines(): 13 | y = [str(value) for value in line.split()] 14 | x.append(y) 15 | fh.close() 16 | return x 17 | 18 | def getListOfSpk(spk_trial): 19 | L = [] 20 | for i in range(len(spk_trial)): 21 | if not spk_trial[i,0] in L: 22 | L.append(spk_trial[i,0]) 23 | return L 24 | 25 | def getListOfLlrGivenAandB(scores,spk_trial,A,B): 26 | a = spk_trial[:,0] 27 | b = spk_trial[:,1] 28 | indexes_a = np.where(a == A)[0] 29 | indexes_b = np.where(b == B)[0] 30 | indexes = list(set(indexes_a)&set(indexes_b)) 31 | return scores[indexes] 32 | #return (10**scores[indexes]/(1 + 10**scores[indexes])) 33 | 34 | if __name__=="__main__": 35 | 36 | parser = argparse.ArgumentParser(description='Compute the similarity matrix given the PLDA output scores and the speaker id trial file') 37 | parser.add_argument('scores',help="PLDA output scores file", type=str) 38 | parser.add_argument('spk_trial',help="speaker trial file (speaker id corresponding to the trial file)", type=str) 39 | parser.add_argument('out_dir',help="output directory",type=str) 40 | parser.add_argument('name',help="name of the similarity matrix",type=str) 41 | args = parser.parse_args() 42 | 43 | scores = np.array(readMat(args.scores))[:,2] 44 | scores = np.array([float(s) for s in scores]) 45 | spk_trial = np.array(readMat(args.spk_trial)) 46 | out_dir = args.out_dir 47 | name = args.name 48 | 49 | #sum_llrs = sum(10**scores) 50 | spk_list = getListOfSpk(spk_trial) 51 | N_spk = len(spk_list) 52 | 53 | similarity_matrix = np.zeros((N_spk,N_spk)) 54 | k = 0 55 | for i in range(N_spk): 56 | for j in range(k,N_spk): 57 | LLR = getListOfLlrGivenAandB(scores,spk_trial,spk_list[i],spk_list[j]) 58 | #c = gmean(LLR) 59 | LLR = np.array(LLR) 60 | #if i == j: 61 | # c = np.sum(np.log2(1+ 1/LR))/len(LR) 62 | #else: 63 | # c = np.sum(np.log2(1+ LR))/len(LR) 64 | #c = sum(np.log2(1+LR)/len(LR)) 65 | c = 1/(1 + np.exp(-(np.sum(LLR)/len(LLR)))) 66 | similarity_matrix[i,j] = c #(sum(LLR)/len(LLR)) 67 | similarity_matrix[j,i] = c #(sum(LLR)/len(LLR)) 68 | k += 1 69 | 70 | #print("sum conf") 71 | #print(np.sum(similarity_matrix)) 72 | 73 | #similarity_matrix = similarity_matrix/np.sum(similarity_matrix) 74 | 75 | np.save(out_dir+"/similarity_matrix_"+name,similarity_matrix) 76 | -------------------------------------------------------------------------------- /baseline/local/featex/f0_yaapt/amfm_decompy/basic_tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Auxiliary classes and functions for used by the other AMFM_decompy modules. 4 | 5 | Version 1.0.8.1 6 | 09/Jul/2018 Bernardo J.B. Schmitt - bernardo.jb.schmitt@gmail.com 7 | """ 8 | 9 | import numpy as np 10 | from scipy.signal import lfilter 11 | 12 | 13 | """ 14 | Creates a signal object. 15 | """ 16 | 17 | class SignalObj(object): 18 | 19 | def __init__(self, *args): 20 | 21 | if len(args) == 1: 22 | try: 23 | from scipy.io import wavfile 24 | except: 25 | print("ERROR: Wav modules could not loaded!") 26 | raise KeyboardInterrupt 27 | self.fs, self.data = wavfile.read(args[0]) 28 | self.name = args[0] 29 | elif len(args) == 2: 30 | self.data = args[0] 31 | self.fs = args[1] 32 | 33 | if self.data.dtype.kind == 'i': 34 | self.nbits = self.data.itemsize*8 35 | self.data = pcm2float(self.data, dtype='f') 36 | 37 | self.size = len(self.data) 38 | self.fs = float(self.fs) 39 | 40 | if self.size == self.data.size/2: 41 | print("Warning: stereo wav file. Converting it to mono for the analysis.") 42 | self.data = (self.data[:,0]+self.data[:,1])/2 43 | 44 | 45 | """ 46 | Filters the signal data by a bandpass filter. 47 | """ 48 | def filtered_version(self, bp_filter): 49 | 50 | tempData = lfilter(bp_filter.b, bp_filter.a, self.data) 51 | 52 | self.filtered = tempData[0:self.size:bp_filter.dec_factor] 53 | self.new_fs = self.fs/bp_filter.dec_factor 54 | 55 | """ 56 | Method that uses the pitch values to estimate the number of modulated 57 | components in the signal. 58 | """ 59 | 60 | def set_nharm(self, pitch_track, n_harm_max): 61 | 62 | n_harm = (self.fs/2)/np.amax(pitch_track) - 0.5 63 | self.n_harm = int(np.floor(min(n_harm, n_harm_max))) 64 | 65 | """ 66 | Adds a zero-mean gaussian noise to the signal. 67 | """ 68 | 69 | def noiser(self, pitch_track, SNR): 70 | 71 | self.clean = np.empty((self.size)) 72 | self.clean[:] = self.data 73 | 74 | RMS = np.std(self.data[pitch_track > 0]) 75 | noise = np.random.normal(0, RMS/(10**(SNR/20)), self.size) 76 | self.data += noise 77 | 78 | """ 79 | Transform a pcm raw signal into a float one, with values limited between -1 and 80 | 1. 81 | """ 82 | 83 | def pcm2float(sig, dtype=np.float64): 84 | 85 | sig = np.asarray(sig) # make sure it's a NumPy array 86 | assert sig.dtype.kind == 'i', "'sig' must be an array of signed integers!" 87 | dtype = np.dtype(dtype) # allow string input (e.g. 'f') 88 | 89 | # Note that 'min' has a greater (by 1) absolute value than 'max'! 90 | # Therefore, we use 'min' here to avoid clipping. 91 | return sig.astype(dtype) / dtype.type(-np.iinfo(sig.dtype).min) 92 | 93 | -------------------------------------------------------------------------------- /baseline/local/data_prep_libritts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Vassil Panayotov 4 | # 2014 Johns Hopkins University (author: Daniel Povey) 5 | # Modifications Copyright 2019 Nagoya University (author: Takenori Yoshimura) 6 | # Apache 2.0 7 | 8 | if [ "$#" -ne 2 ]; then 9 | echo "Usage: $0 " 10 | echo "e.g.: $0 /export/a15/vpanayotov/data/LibriTTS/dev-clean data/dev-clean" 11 | exit 1 12 | fi 13 | 14 | src=$1 15 | dst=$2 16 | 17 | spk_file=$src/../SPEAKERS.txt 18 | 19 | mkdir -p $dst || exit 1 20 | 21 | [ ! -d $src ] && echo "$0: no such directory $src" && exit 1 22 | [ ! -f $spk_file ] && echo "$0: expected file $spk_file to exist" && exit 1 23 | 24 | 25 | wav_scp=$dst/wav.scp; [[ -f "$wav_scp" ]] && rm $wav_scp 26 | trans=$dst/text; [[ -f "$trans" ]] && rm $trans 27 | utt2spk=$dst/utt2spk; [[ -f "$utt2spk" ]] && rm $utt2spk 28 | spk2gender=$dst/spk2gender; [[ -f $spk2gender ]] && rm $spk2gender 29 | 30 | for reader_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sed -e "s/$/_/" | sort); do 31 | reader_dir=$(echo $reader_dir | sed -e "s/_$//") 32 | reader=$(basename $reader_dir) 33 | if ! [ $reader -eq $reader ]; then # not integer. 34 | echo "$0: unexpected subdirectory name $reader" 35 | exit 1 36 | fi 37 | 38 | reader_gender=$(egrep "^$reader[ ]+\|" $spk_file | awk -F'|' '{gsub(/[ ]+/, ""); print tolower($2)}') 39 | if [ "$reader_gender" != 'm' ] && [ "$reader_gender" != 'f' ]; then 40 | echo "Unexpected gender: '$reader_gender'" 41 | exit 1 42 | fi 43 | 44 | for chapter_dir in $(find -L $reader_dir/ -mindepth 1 -maxdepth 1 -type d | sort); do 45 | chapter=$(basename $chapter_dir) 46 | if ! [ "$chapter" -eq "$chapter" ]; then 47 | echo "$0: unexpected chapter-subdirectory name $chapter" 48 | exit 1 49 | fi 50 | 51 | #spk="${reader}_${chapter}" 52 | spk="${reader}" 53 | 54 | find -L $chapter_dir/ -iname "*.wav" | sort | while read -r wav_file; do 55 | id="$reader"-$(basename $wav_file .wav) 56 | echo "$id $wav_file" >>$wav_scp 57 | 58 | txt=$(cat $(echo $wav_file | sed -e "s/\.wav$/.normalized.txt/")) 59 | echo "$id $txt" >>$trans 60 | 61 | # NOTE: For now we are using per-chapter utt2spk. That is each chapter is considered 62 | # to be a different speaker. This is done for simplicity and because we want 63 | # e.g. the CMVN to be calculated per-chapter 64 | echo "$id $reader" >>$utt2spk 65 | done 66 | done 67 | # reader -> gender map (again using per-chapter granularity) 68 | echo "$reader $reader_gender" >>$spk2gender 69 | done 70 | 71 | spk2utt=$dst/spk2utt 72 | utils/utt2spk_to_spk2utt.pl <$utt2spk >$spk2utt || exit 1 73 | 74 | ntrans=$(wc -l <$trans) 75 | nutt2spk=$(wc -l <$utt2spk) 76 | ! [ "$ntrans" -eq "$nutt2spk" ] && \ 77 | echo "Inconsistent #transcripts($ntrans) and #utt2spk($nutt2spk)" && exit 1 78 | 79 | utils/fix_data_dir.sh $dst || exit 1 80 | utils/validate_data_dir.sh --no-feats $dst || exit 1 81 | 82 | echo "$0: successfully prepared data in $dst" 83 | 84 | exit 0 85 | -------------------------------------------------------------------------------- /baseline/local/chain/run_chain_common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # this script has common stages shared across librispeech chain recipes. 4 | # It generates a new topology in a new lang directory, gets the alignments as 5 | # lattices, and builds a tree for the new topology 6 | set -e 7 | 8 | stage=11 9 | 10 | # input directory names. These options are actually compulsory, and they have 11 | # been named for convenience 12 | gmm_dir= 13 | ali_dir= 14 | lores_train_data_dir= 15 | 16 | num_leaves=6000 17 | 18 | # output directory names. They are also compulsory. 19 | lang= 20 | lat_dir= 21 | tree_dir= 22 | # End configuration section. 23 | echo "$0 $@" # Print the command line for logging 24 | 25 | . ./cmd.sh 26 | . ./path.sh 27 | . ./utils/parse_options.sh 28 | 29 | [ -z $lang ] && echo "Set --lang, this specifies the new lang directory which will have the new topology" && exit 1; 30 | [ -z $lat_dir ] && echo "Set --lat-dir, this specifies the experiment directory to store lattice" && exit 1; 31 | [ -z $tree_dir ] && echo "Set --tree-dir, this specifies the directory to store new tree " && exit 1; 32 | 33 | for f in $gmm_dir/final.mdl $ali_dir/ali.1.gz $lores_train_data_dir/feats.scp; do 34 | [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1 35 | done 36 | 37 | if [ $stage -le 11 ]; then 38 | echo "$0: creating lang directory with one state per phone." 39 | # Create a version of the lang/ directory that has one state per phone in the 40 | # topo file. [note, it really has two states.. the first one is only repeated 41 | # once, the second one has zero or more repeats.] 42 | if [ -d $lang ]; then 43 | if [ $lang/L.fst -nt data/lang/L.fst ]; then 44 | echo "$0: $lang already exists, not overwriting it; continuing" 45 | else 46 | echo "$0: $lang already exists and seems to be older than data/lang..." 47 | echo " ... not sure what to do. Exiting." 48 | exit 1; 49 | fi 50 | else 51 | cp -r data/lang $lang 52 | silphonelist=$(cat $lang/phones/silence.csl) || exit 1; 53 | nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1; 54 | # Use our special topology... note that later on may have to tune this 55 | # topology. 56 | steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo 57 | fi 58 | fi 59 | 60 | if [ $stage -le 12 ]; then 61 | # Get the alignments as lattices (gives the chain training more freedom). 62 | # use the same num-jobs as the alignments 63 | nj=$(cat ${ali_dir}/num_jobs) || exit 1; 64 | steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \ 65 | $lang $gmm_dir $lat_dir 66 | rm $lat_dir/fsts.*.gz # save space 67 | fi 68 | 69 | if [ $stage -le 13 ]; then 70 | # Build a tree using our new topology. We know we have alignments for the 71 | # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use 72 | # those. 73 | if [ -f $tree_dir/final.mdl ]; then 74 | echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." 75 | exit 1; 76 | fi 77 | steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ 78 | --context-opts "--context-width=2 --central-position=1" \ 79 | --cmd "$train_cmd" $num_leaves ${lores_train_data_dir} $lang $ali_dir $tree_dir 80 | fi 81 | 82 | exit 0; 83 | -------------------------------------------------------------------------------- /baseline/local/data_prep_adv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Vassil Panayotov 4 | # 2014 Johns Hopkins University (author: Daniel Povey) 5 | # Apache 2.0 6 | 7 | if [ "$#" -ne 2 ]; then 8 | echo "Usage: $0 " 9 | echo "e.g.: $0 /export/a15/vpanayotov/data/LibriSpeech/dev-clean data/dev-clean" 10 | exit 1 11 | fi 12 | 13 | src=$1 14 | dst=$2 15 | 16 | # all utterances are FLAC compressed 17 | if ! which flac >&/dev/null; then 18 | echo "Please install 'flac' on ALL worker nodes!" 19 | exit 1 20 | fi 21 | 22 | spk_file=$src/../SPEAKERS.TXT 23 | 24 | mkdir -p $dst || exit 1; 25 | 26 | [ ! -d $src ] && echo "$0: no such directory $src" && exit 1; 27 | [ ! -f $spk_file ] && echo "$0: expected file $spk_file to exist" && exit 1; 28 | 29 | 30 | wav_scp=$dst/wav.scp; [[ -f "$wav_scp" ]] && rm $wav_scp 31 | trans=$dst/text; [[ -f "$trans" ]] && rm $trans 32 | utt2spk=$dst/utt2spk; [[ -f "$utt2spk" ]] && rm $utt2spk 33 | spk2gender=$dst/spk2gender; [[ -f $spk2gender ]] && rm $spk2gender 34 | 35 | for reader_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sort); do 36 | reader=$(basename $reader_dir) 37 | if ! [ $reader -eq $reader ]; then # not integer. 38 | echo "$0: unexpected subdirectory name $reader" 39 | exit 1; 40 | fi 41 | 42 | reader_gender=$(egrep "^$reader[ ]+\|" $spk_file | awk -F'|' '{gsub(/[ ]+/, ""); print tolower($2)}') 43 | if [ "$reader_gender" != 'm' ] && [ "$reader_gender" != 'f' ]; then 44 | echo "Unexpected gender: '$reader_gender'" 45 | exit 1; 46 | fi 47 | 48 | # reader -> gender map (again using per-chapter granularity) 49 | echo "${reader} $reader_gender" >>$spk2gender 50 | 51 | for chapter_dir in $(find -L $reader_dir/ -mindepth 1 -maxdepth 1 -type d | sort); do 52 | chapter=$(basename $chapter_dir) 53 | if ! [ "$chapter" -eq "$chapter" ]; then 54 | echo "$0: unexpected chapter-subdirectory name $chapter" 55 | exit 1; 56 | fi 57 | 58 | find -L $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \ 59 | awk -v "dir=$chapter_dir" '{printf "%s flac -c -d -s %s/%s.flac |\n", $0, dir, $0}' >>$wav_scp|| exit 1 60 | 61 | chapter_trans=$chapter_dir/${reader}-${chapter}.trans.txt 62 | [ ! -f $chapter_trans ] && echo "$0: expected file $chapter_trans to exist" && exit 1 63 | cat $chapter_trans >>$trans 64 | 65 | # NOTE: For now we are using per-chapter utt2spk. That is each chapter is considered 66 | # to be a different speaker. This is done for simplicity and because we want 67 | # e.g. the CMVN to be calculated per-chapter 68 | # NOT DOING THE ABOVE FOR ADVERSARIAL TRAINING - KEEPING SPEAKER LEVEL DATA 69 | awk -v "reader=$reader" '{printf "%s %s\n", $1, reader}' \ 70 | <$chapter_trans >>$utt2spk || exit 1 71 | 72 | done 73 | done 74 | 75 | spk2utt=$dst/spk2utt 76 | utils/utt2spk_to_spk2utt.pl <$utt2spk >$spk2utt || exit 1 77 | 78 | ntrans=$(wc -l <$trans) 79 | nutt2spk=$(wc -l <$utt2spk) 80 | ! [ "$ntrans" -eq "$nutt2spk" ] && \ 81 | echo "Inconsistent #transcripts($ntrans) and #utt2spk($nutt2spk)" && exit 1; 82 | 83 | utils/fix_data_dir.sh $dst || exit 1; 84 | utils/validate_data_dir.sh --no-feats $dst || exit 1; 85 | 86 | echo "$0: successfully prepared data in $dst" 87 | 88 | exit 0 89 | -------------------------------------------------------------------------------- /baseline/local/plot/plot_spk_xvectors.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This is a general TSNE plotting script 3 | It needs spk2gender and spk_xvector.scp 4 | ''' 5 | 6 | import matplotlib as mpl 7 | mpl.use('Agg') 8 | import matplotlib.pyplot as plt 9 | import matplotlib.lines as mlines 10 | 11 | from os.path import join 12 | 13 | import numpy as np 14 | from sklearn.manifold import TSNE 15 | 16 | import kaldi_io 17 | 18 | data = 'train' 19 | spk_xvector_dir = f'exp/0007_voxceleb_v2_1a/exp/xvector_nnet_1a/am_nsf/xvectors_train_clean_360_{data}' 20 | spk_xvector_file = join(spk_xvector_dir, 'spk_xvector.scp') 21 | tsne_file = join(spk_xvector_dir, 'spk_xvector.png') 22 | 23 | spk2gender_file = f'data/am_nsf/train_clean_360_{data}/spk2gender' 24 | 25 | def get_cmap(n, name='hsv'): 26 | return plt.cm.get_cmap(name, n) 27 | 28 | # get gender info 29 | spk2gender = {} 30 | with open(spk2gender_file) as f: 31 | for line in f.read().splitlines(): 32 | sp = line.split() 33 | spkid = sp[0] 34 | gen = sp[1] 35 | spk2gender[spkid] = gen 36 | 37 | X = [] 38 | spks = [] 39 | for key, mat in kaldi_io.read_vec_flt_scp(spk_xvector_file): 40 | #print(key, mat.shape) 41 | spks.append(key) 42 | X.append(mat[np.newaxis]) 43 | 44 | X = np.concatenate(X) 45 | print("X = ", X.shape) 46 | mean_X = np.mean(X, axis=0) 47 | std_X = np.std(X, axis=0) 48 | X = (X - mean_X) / std_X 49 | 50 | tsne = TSNE(n_components=2, init='random', random_state=42, 51 | perplexity=5) 52 | Y = tsne.fit_transform(X) 53 | 54 | nspk = Y.shape[0] 55 | #nspk = 3 56 | fig = plt.figure() 57 | ax1 = fig.add_subplot(111) 58 | 59 | #cmap = get_cmap(3, name='tab10') # for male, female and others 60 | #colors = [cmap(i) for i in range(nspk)] 61 | #colors = ['b'] * nspk 62 | #smark = ['s'] * nspk 63 | for i, spkid in enumerate(spks): 64 | # Check gender 65 | scolor = 'b' 66 | smark = '*' 67 | if spkid in spk2gender: 68 | if spk2gender[spkid] == 'm': 69 | scolor = 'g' 70 | elif spk2gender[spkid] == 'f': 71 | scolor = 'r' 72 | ax1.scatter(Y[i, 0], Y[i, 1], c=scolor, s=5, marker=smark) 73 | 74 | plt.title(f'TSNE for {nspk} speakers in AM&NSF {data}. One vector per speaker.') 75 | 76 | 77 | # Legend 78 | #other_leg = mlines.Line2D([], [], color='blue', marker='s', linestyle='None', 79 | # markersize=10, label='Others') 80 | #v1male_leg = mlines.Line2D([], [], color='green', marker='*', 81 | # linestyle='None', markersize=5, label='Voxceleb1 Male') 82 | #v2male_leg = mlines.Line2D([], [], color='green', marker='^', linestyle='None', 83 | # markersize=5, label='Voxceleb2 Male') 84 | #v1female_leg = mlines.Line2D([], [], color='red', marker='*', 85 | # linestyle='None', markersize=5, label='Voxceleb1 Female') 86 | #v2female_leg = mlines.Line2D([], [], color='red', marker='^', linestyle='None', 87 | # markersize=5, label='Voxceleb2 Female') 88 | v2_leg = mlines.Line2D([], [], color='green', marker='*', linestyle='None', 89 | markersize=5, label='Male') 90 | v1_leg = mlines.Line2D([], [], color='red', marker='*', 91 | linestyle='None', markersize=5, label='Female') 92 | 93 | plt.legend(handles=[v1_leg, v2_leg]) 94 | 95 | plt.savefig(tsne_file, dpi=300) 96 | -------------------------------------------------------------------------------- /baseline/local/run_prepfeats_am_nsf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . path.sh 4 | . cmd.sh 5 | 6 | set -e 7 | 8 | #===== begin config ======= 9 | nj=40 10 | stage=0 11 | 12 | # Chain model for PPG extraction 13 | ppg_model= # change this to your pretrained chain model 14 | ppg_dir= # change this to the dir where PPGs will be stored 15 | 16 | # Xvector extractor 17 | xvec_nnet_dir= # change this to pretrained xvector model 18 | 19 | #===== end config ========= 20 | 21 | . utils/parse_options.sh 22 | 23 | if [ $# != 2 ]; then 24 | echo "Usage: " 25 | echo " $0 [options] " 26 | echo "Options" 27 | echo " --nj=40 # Number of CPUs to use for feature extraction" 28 | exit 1; 29 | fi 30 | 31 | # Original data in ./data folder which will be splitted into train, dev and test based on speakers 32 | train_data="$1" # change this to your actual data 33 | feat_out_dir="$2" 34 | 35 | # Mel spectrogram config 36 | melspec_dir=data/${train_data}_mspec 37 | melspec_file=${melspec_dir}/feats.scp 38 | 39 | # Split data 40 | dev_spks=20 41 | test_spks=20 42 | split_dir=data/am_nsf_train 43 | 44 | # x-vector extraction 45 | train_split=${train_data}_train 46 | dev_split=${train_data}_dev 47 | test_split=${train_data}_test 48 | split_data="${train_split} ${dev_split} ${test_split}" 49 | xvec_out_dir=${xvec_nnet_dir}/am_nsf 50 | 51 | # Output directories for netcdf data that will be used by AM & NSF training 52 | train_out=${feat_out_dir}/am_nsf_train # change this to the dir where train, dev data and scp files will be stored 53 | test_out=${feat_out_dir}/am_nsf_test # change this to dir where test data will be stored 54 | 55 | 56 | # Extract PPG using chain model 57 | if [ $stage -le 0 ]; then 58 | echo "Stage 0: PPG extraction." 59 | local/featex/extract_ppg.sh --nj $nj --stage 0 data/${train_data} \ 60 | ${ppg_model} ${ppg_dir}/ppg_${train_data} 61 | fi 62 | 63 | # Extract 80 dimensional mel spectrograms 64 | if [ $stage -le 1 ]; then 65 | echo "Stage 1: Mel spectrogram extraction." 66 | local/featex/extract_melspec.sh --nj $nj data/${train_data} ${melspec_dir} 67 | fi 68 | 69 | # Split the data into train, dev and test 70 | if [ $stage -le 2 ]; then 71 | echo "Stage 2: Splitting the data into train, dev and test based on speakers." 72 | local/featex/00_make_am_nsf_data.sh --dev-spks ${dev_spks} --test-spks ${test_spks} \ 73 | data/${train_data} ${split_dir} 74 | fi 75 | 76 | # Extract xvectors from each split of data 77 | if [ $stage -le 3 ]; then 78 | echo "Stage 3: x-vector extraction." 79 | for sdata in ${split_data}; do 80 | local/featex/01_extract_xvectors.sh --nj ${dev_spks} ${split_dir}/${sdata} ${xvec_nnet_dir} \ 81 | ${xvec_out_dir} 82 | done 83 | fi 84 | 85 | # Extract pitch from each split of data 86 | if [ $stage -le 4 ]; then 87 | echo "Stage 4: Pitch extraction." 88 | for sdata in ${split_data}; do 89 | local/featex/02_extract_pitch.sh --nj ${dev_spks} ${split_dir}/${sdata} 90 | done 91 | fi 92 | 93 | # Create NetCDF data from each split 94 | if [ $stage -le 5 ]; then 95 | echo "Stage 5: Making netcdf data for AM & NSF training." 96 | local/featex/03_make_am_nsf_netcdf_data.sh ${train_split} ${dev_split} ${test_split} \ 97 | ${ppg_dir}/ppg_${train_data}/phone_post.scp ${melspec_file} \ 98 | ${xvec_out_dir} ${train_out} ${test_out} 99 | fi 100 | -------------------------------------------------------------------------------- /baseline/local/create_uniform_segments.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2014 Johns Hopkins University (Authors: Daniel Povey, Vijayaditya Peddinti). Apache 2.0. 3 | 4 | # creates a segments file in the provided data directory 5 | # into uniform segments with specified window and overlap 6 | 7 | from __future__ import division 8 | import imp, sys, argparse, os, math, subprocess 9 | 10 | min_segment_length = 10 # in seconds 11 | def segment(total_length, window_length, overlap = 0): 12 | increment = window_length - overlap 13 | num_windows = int(math.ceil(float(total_length)/increment)) 14 | segments = [(x * increment, min( total_length, (x * increment) + window_length)) for x in range(0, num_windows)] 15 | if segments[-1][1] - segments[-1][0] < min_segment_length: 16 | segments[-2] = (segments[-2][0], segments[-1][1]) 17 | segments.pop() 18 | return segments 19 | 20 | def get_wave_segments(wav_command, window_length, overlap): 21 | raw_output = subprocess.check_output(wav_command+" sox -t wav - -n stat 2>&1 | grep Length ", shell = True) 22 | parts = raw_output.split(":") 23 | if parts[0].strip() != "Length (seconds)": 24 | raise Exception("Failed while processing file ", wav_command) 25 | total_length = float(parts[1]) 26 | segments = segment(total_length, window_length, overlap) 27 | return segments 28 | 29 | def prepare_segments_file(kaldi_data_dir, window_length, overlap): 30 | if not os.path.exists(kaldi_data_dir+'/wav.scp'): 31 | raise Exception("Not a proper kaldi data directory") 32 | ids = [] 33 | files = [] 34 | for line in open(kaldi_data_dir+'/wav.scp').readlines(): 35 | parts = line.split() 36 | ids.append(parts[0]) 37 | files.append(" ".join(parts[1:])) 38 | segments_total = [] 39 | segments_per_recording = [] 40 | for i in range(0, len(ids)): 41 | segments = get_wave_segments(files[i], window_length, overlap) 42 | segments_current_recording = [] 43 | for segment in segments: 44 | segment_string = "{0}-{1:06}-{2:06} {0} {3} {4}".format(ids[i], int(segment[0] * 1000), int(segment[1]* 1000), segment[0], segment[1]) 45 | segments_total.append(segment_string) 46 | segments_current_recording.append(segment_string.split()[0]) 47 | segments_per_recording.append([ids[i], segments_current_recording]) 48 | return segments_total, segments_per_recording 49 | if __name__ == "__main__": 50 | usage = """ Python script to create segments file with uniform segment 51 | given the kaldi data directory.""" 52 | sys.stderr.write(str(" ".join(sys.argv))) 53 | main_parser = argparse.ArgumentParser(usage) 54 | parser = argparse.ArgumentParser() 55 | parser.add_argument('--window-length', type = float, default = 30.0, help = 'length of the window used to cut the segment') 56 | parser.add_argument('--overlap', type = float, default = 5.0, help = 'overlap of neighboring windows') 57 | parser.add_argument('data_dir', help='directory such as data/train') 58 | 59 | params = parser.parse_args() 60 | 61 | # write the segments file 62 | segments_file = open(params.data_dir+"/segments", "w") 63 | segments, segments_per_recording = prepare_segments_file(params.data_dir, params.window_length, params.overlap) 64 | segments_file.write("\n".join(segments)) 65 | segments_file.close() 66 | 67 | utt2spk_file = open(params.data_dir + "/utt2spk", "w") 68 | spk2utt_file = open(params.data_dir + "/spk2utt", "w") 69 | # write the utt2spk file 70 | # assumes the recording id is the speaker ir 71 | for i in range(len(segments_per_recording)): 72 | segments = segments_per_recording[i][1] 73 | recording = segments_per_recording[i][0] 74 | spk2utt_file.write("{0} {1}\n".format(recording, " ".join(segments))) 75 | for segment in segments: 76 | utt2spk_file.write("{0} {1}\n".format(segment, recording)) 77 | 78 | spk2utt_file.close() 79 | utt2spk_file.close() 80 | 81 | -------------------------------------------------------------------------------- /baseline/local/plot/plot_spk_xvectors_voxceleb.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | mpl.use('Agg') 3 | import matplotlib.pyplot as plt 4 | import matplotlib.lines as mlines 5 | 6 | 7 | import numpy as np 8 | from sklearn.manifold import TSNE 9 | 10 | import kaldi_io 11 | 12 | spk_xvector_file = 'exp/xvector_nnet_1a/xvectors_train/spk_xvector.scp' 13 | tsne_file = 'exp/xvector_nnet_1a/xvectors_train/voxceleb_spk_xvector_voxversion.png' 14 | 15 | vox1_meta_file = '/home/bsrivast/asr_data/VoxCeleb/voxceleb/vox1_meta_map.csv' 16 | vox2_meta_file = '/home/bsrivast/asr_data/VoxCeleb/voxceleb2/vox2_meta.csv' 17 | 18 | def get_cmap(n, name='hsv'): 19 | return plt.cm.get_cmap(name, n) 20 | 21 | # get gender info 22 | spk2gender = {} 23 | spk2vox = {} 24 | with open(vox1_meta_file) as f: 25 | for line in f.read().splitlines(): 26 | sp = line.split() 27 | spkid = sp[1] 28 | gen = sp[2] 29 | spk2gender[spkid] = gen 30 | spk2vox[spkid] = 1 31 | with open(vox2_meta_file) as f: 32 | for line in f.read().splitlines()[1:]: 33 | sp = line.split(',') 34 | spkid = sp[0].strip() 35 | gen = sp[2].strip() 36 | spk2gender[spkid] = gen 37 | spk2vox[spkid] = 2 38 | 39 | X = [] 40 | spks = [] 41 | for key, mat in kaldi_io.read_vec_flt_scp(spk_xvector_file): 42 | #print(key, mat.shape) 43 | spks.append(key) 44 | X.append(mat[np.newaxis]) 45 | 46 | X = np.concatenate(X) 47 | print("X = ", X.shape) 48 | mean_X = np.mean(X, axis=0) 49 | std_X = np.std(X, axis=0) 50 | X = (X - mean_X) / std_X 51 | 52 | tsne = TSNE(n_components=2, init='random', random_state=42, 53 | perplexity=100) 54 | Y = tsne.fit_transform(X) 55 | 56 | nspk = Y.shape[0] 57 | #nspk = 3 58 | fig = plt.figure() 59 | ax1 = fig.add_subplot(111) 60 | 61 | #cmap = get_cmap(3, name='tab10') # for male, female and others 62 | #colors = [cmap(i) for i in range(nspk)] 63 | #colors = ['b'] * nspk 64 | #smark = ['s'] * nspk 65 | for i, spkid in enumerate(spks): 66 | # Check gender 67 | scolor = 'b' 68 | #if spkid in spk2gender: 69 | # if spk2gender[spkid] == 'm': 70 | # scolor = 'g' 71 | # elif spk2gender[spkid] == 'f': 72 | # scolor = 'r' 73 | # Check voxceleb version 74 | smark = 's' 75 | if spkid in spk2vox: 76 | if spk2vox[spkid] == 1: 77 | smark = '*' 78 | scolor = 'r' 79 | elif spk2vox[spkid] == 2: 80 | smark = '^' 81 | scolor = 'g' 82 | 83 | ax1.scatter(Y[i, 0], Y[i, 1], c=scolor, s=1, marker=smark) 84 | 85 | #ax1.scatter(Y[:, 0], Y[:, 1], c=colors, s=1, marker=smark) 86 | plt.title(f'TSNE for {nspk} speakers in Voxceleb train. One vector per speaker.') 87 | 88 | 89 | # Legend 90 | #other_leg = mlines.Line2D([], [], color='blue', marker='s', linestyle='None', 91 | # markersize=10, label='Others') 92 | #v1male_leg = mlines.Line2D([], [], color='green', marker='*', 93 | # linestyle='None', markersize=5, label='Voxceleb1 Male') 94 | #v2male_leg = mlines.Line2D([], [], color='green', marker='^', linestyle='None', 95 | # markersize=5, label='Voxceleb2 Male') 96 | #v1female_leg = mlines.Line2D([], [], color='red', marker='*', 97 | # linestyle='None', markersize=5, label='Voxceleb1 Female') 98 | #v2female_leg = mlines.Line2D([], [], color='red', marker='^', linestyle='None', 99 | # markersize=5, label='Voxceleb2 Female') 100 | v2_leg = mlines.Line2D([], [], color='green', marker='^', linestyle='None', 101 | markersize=5, label='Voxceleb2') 102 | v1_leg = mlines.Line2D([], [], color='red', marker='*', 103 | linestyle='None', markersize=5, label='Voxceleb1') 104 | 105 | plt.legend(handles=[v1_leg, v2_leg]) 106 | 107 | plt.savefig(tsne_file, dpi=300) 108 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | nj=$(nproc) 6 | 7 | home=$PWD 8 | 9 | #conda_url=https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh 10 | conda_url=https://repo.anaconda.com/miniconda/Miniconda3-py38_4.10.3-Linux-x86_64.sh 11 | venv_dir=$PWD/venv 12 | 13 | netcdf=https://github.com/Unidata/netcdf-c/archive/v4.3.3.1.tar.gz 14 | netcdf_dir=$PWD/netcdf-c-4.3.3.1 15 | 16 | boost=https://netix.dl.sourceforge.net/project/boost/boost/1.59.0/boost_1_59_0.tar.gz 17 | boost_dir=$PWD/boost_1_59_0 18 | 19 | flac=https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.3.tar.xz 20 | flac_dir=$PWD/flac-1.3.3 21 | 22 | nii_cmake=$PWD/nii_cmake/CMakeLists.txt 23 | nii_dir=$PWD/nii 24 | currennt_dir=$nii_dir/CURRENNT_codes 25 | 26 | mark=.done-venv 27 | if [ ! -f $mark ]; then 28 | echo 'Making python virtual environment' 29 | name=$(basename $conda_url) 30 | if [ ! -f $name ]; then 31 | wget $conda_url || exit 1 32 | fi 33 | [ ! -f $name ] && echo "File $name does not exist" && exit 1 34 | [ -d $venv_dir ] && rm -r $venv_dir 35 | sh $name -b -p $venv_dir || exit 1 36 | . $venv_dir/bin/activate 37 | echo 'Installing python dependencies' 38 | pip install -r requirements.txt || exit 1 39 | touch $mark 40 | fi 41 | echo "if [ \$(which python) != $venv_dir/bin/python ]; then source $venv_dir/bin/activate; fi" > env.sh 42 | 43 | mark=.done-kaldi-tools 44 | if [ ! -f $mark ]; then 45 | echo 'Building Kaldi tools' 46 | cd kaldi/tools 47 | extras/check_dependencies.sh || exit 1 48 | make -j $nj || exit 1 49 | cd $home 50 | touch $mark 51 | fi 52 | 53 | mark=.done-kaldi-src 54 | if [ ! -f $mark ]; then 55 | echo 'Building Kaldi src' 56 | cd kaldi/src 57 | ./configure --shared || exit 1 58 | make clean || exit 1 59 | make depend -j $nj || exit 1 60 | make -j $nj || exit 1 61 | cd $home 62 | touch $mark 63 | fi 64 | 65 | mark=.done-netcdf 66 | if [ ! -f $mark ]; then 67 | if [ ! -f $(basename $netcdf) ]; then 68 | wget $netcdf || exit 1 69 | fi 70 | echo 'Unpacking NetCDF source files' 71 | dir=$netcdf_dir 72 | [ -d $dir ] && rm -r $dir 73 | tar -xf $(basename $netcdf) || exit 1 74 | echo 'Building NetCDF' 75 | build=$dir/build 76 | cd $dir 77 | ./configure --disable-netcdf-4 --prefix=$build || exit 1 78 | make -j $nj || exit 1 79 | make install || exit 1 80 | cd $home 81 | touch $mark 82 | fi 83 | netcdf_bin=$netcdf_dir/build/bin 84 | netcdf_lib=$netcdf_dir/build/lib 85 | echo "export PATH=$netcdf_bin:\$PATH" >> env.sh 86 | echo "export LD_LIBRARY_PATH=$netcdf_bin:\$LD_LIBRARY_PATH" >> env.sh 87 | 88 | mark=.done-boost 89 | if [ ! -f $mark ]; then 90 | if [ ! -f $(basename $boost) ]; then 91 | wget $boost || exit 1 92 | fi 93 | echo 'Unpacking boost source files' 94 | dir=$boost_dir 95 | [ -d $dir ] && rm -r $dir 96 | tar -xf $(basename $boost) || exit 1 97 | echo 'Building boost libraries' 98 | build=$dir/build 99 | cd $dir 100 | ./bootstrap.sh --with-libraries=program_options,filesystem,system,random,thread || exit 1 101 | ./b2 -j $nj --prefix=$build || exit 1 102 | cd $home 103 | touch $mark 104 | fi 105 | boost_root=$boost_dir 106 | echo "export LD_LIBRARY_PATH=$boost_root/stage/lib:\$LD_LIBRARY_PATH" >> env.sh 107 | 108 | mark=.done-flac 109 | if [ ! -f $mark ]; then 110 | if [ -z "$(which flac)" ]; then 111 | if [ ! -f $(basename $flac) ]; then 112 | wget $flac || exit 1 113 | fi 114 | echo 'Unpacking flac source files' 115 | [ -d $flac_dir ] && rm -r $flac_dir 116 | tar -xf $(basename $flac) || exit 1 117 | echo 'Building flac' 118 | cd $flac_dir 119 | ./configure --prefix=$PWD/install || exit 1 120 | make -j $nj || exit 1 121 | # make -j $nj check || exit 1 122 | make install || exit 1 123 | fi 124 | cd $home 125 | touch $mark 126 | fi 127 | [ -f $flac_dir/install/bin/flac ] && \ 128 | echo "export PATH=$flac_dir/install/bin:\$PATH" >> env.sh 129 | 130 | mark=.done-nii 131 | if [ ! -f $mark ]; then 132 | echo 'Building nii' 133 | cp $nii_cmake $currennt_dir || exit 1 134 | dir=$currennt_dir/build 135 | [ -d $dir ] && rm -r $dir 136 | mkdir -p $dir || exit 1 137 | cd $dir 138 | cmake .. \ 139 | -DCMAKE_BUILD_TYPE=Release \ 140 | -DBOOST_ROOT=$boost_root \ 141 | -DNETCDF_LIB=$netcdf_lib || exit 1 142 | make -j $(npoc) || exit 1 143 | cd $home 144 | touch $mark 145 | fi 146 | echo "export PATH=$currennt_dir/build:\$PATH" >> env.sh 147 | echo "export PYTHONPATH=$currennt_dir:$nii_dir/pyTools:$PWD/nii_scripts:\$PYTHONPATH" >> env.sh 148 | echo "export nii_scripts=$PWD/nii_scripts" >> env.sh 149 | echo "export nii_dir=$nii_dir" >> env.sh 150 | 151 | echo Done 152 | -------------------------------------------------------------------------------- /baseline/local/asv_eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | . ./cmd.sh 6 | . ./path.sh 7 | 8 | nj=$(nproc) 9 | asv_eval_model=exp/models/asv_eval/xvect_01709_1 10 | plda_dir=$asv_eval_model/xvect_train_clean_360 11 | 12 | #enrolls=vctk_dev_enrolls 13 | #trials=vctk_dev_trials_f_common 14 | 15 | enrolls=libri_dev_enrolls 16 | trials=libri_dev_trials_f 17 | 18 | printf -v results '%(%Y-%m-%d-%H-%M-%S)T' -1 19 | results=exp/results-$results 20 | 21 | . ./utils/parse_options.sh 22 | 23 | for name in $asv_eval_model/final.raw $plda_dir/plda $plda_dir/mean.vec \ 24 | $plda_dir/transform.mat data/$enrolls/enrolls data/$trials/trials ; do 25 | [ ! -f $name ] && echo "File $name does not exist" && exit 1 26 | done 27 | 28 | for dset in $enrolls $trials; do 29 | data=data/$dset 30 | spk2utt=$data/spk2utt 31 | [ ! -f $spk2utt ] && echo "File $spk2utt does not exist" && exit 1 32 | num_spk=$(wc -l < $spk2utt) 33 | njobs=$([ $num_spk -le $nj ] && echo $num_spk || echo $nj) 34 | if [ ! -f $data/.done_mfcc ]; then 35 | printf "${RED} compute MFCC: $dset${NC}\n" 36 | steps/make_mfcc.sh --nj $njobs --cmd "$train_cmd" \ 37 | --write-utt2num-frames true $data || exit 1 38 | utils/fix_data_dir.sh $data || exit 1 39 | touch $data/.done_mfcc 40 | fi 41 | if [ ! -f $data/.done_vad ]; then 42 | printf "${RED} compute VAD: $dset${NC}\n" 43 | sid/compute_vad_decision.sh --nj $njobs --cmd "$train_cmd" $data || exit 1 44 | utils/fix_data_dir.sh $data || exit 1 45 | touch $data/.done_vad 46 | fi 47 | done 48 | 49 | for dset in $enrolls $trials; do 50 | data=data/$dset 51 | spk2utt=$data/spk2utt 52 | [ ! -f $spk2utt ] && echo "File $spk2utt does not exist" && exit 1 53 | num_spk=$(wc -l < $spk2utt) 54 | njobs=$([ $num_spk -le $nj ] && echo $num_spk || echo $nj) 55 | expo=$asv_eval_model/xvect_$dset 56 | if [ ! -f $expo/.done ]; then 57 | printf "${RED} compute x-vect: $dset${NC}\n" 58 | sid/nnet3/xvector/extract_xvectors.sh --nj $njobs --cmd "$train_cmd" \ 59 | $asv_eval_model $data $expo || exit 1 60 | touch $expo/.done 61 | fi 62 | done 63 | 64 | expo=$results/ASV-$enrolls-$trials 65 | if [ ! -f $expo/.done ]; then 66 | printf "${RED} ASV scoring: $expo${NC}\n" 67 | mkdir -p $expo 68 | xvect_enrolls=$asv_eval_model/xvect_$enrolls/xvector.scp 69 | xvect_trials=$asv_eval_model/xvect_$trials/xvector.scp 70 | for name in $xvect_enrolls $xvect_trials; do 71 | [ ! -f $name ] && echo "File $name does not exist" && exit 1 72 | done 73 | $train_cmd $expo/log/ivector-plda-scoring.log \ 74 | sed -r 's/_|-/ /g' data/$enrolls/enrolls \| awk '{split($1, val, "_"); ++num[val[1]]}END{for (spk in num) print spk, num[spk]}' \| \ 75 | ivector-plda-scoring --normalize-length=true --num-utts=ark:- \ 76 | "ivector-copy-plda --smoothing=0.0 $plda_dir/plda - |" \ 77 | "ark:cut -d' ' -f1 data/$enrolls/enrolls | grep -Ff - $xvect_enrolls | ivector-mean ark:data/$enrolls/spk2utt scp:- ark:- | ivector-subtract-global-mean $plda_dir/mean.vec ark:- ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 78 | "ark:cut -d' ' -f2 data/$trials/trials | sort | uniq | grep -Ff - $xvect_trials | ivector-subtract-global-mean $plda_dir/mean.vec scp:- ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 79 | "cat data/$trials/trials | cut -d' ' --fields=1,2 |" $expo/scores || exit 1 80 | eer=`compute-eer <(local/prepare_for_eer.py data/$trials/trials $expo/scores) 2> /dev/null` 81 | mindcf1=`sid/compute_min_dcf.py --p-target 0.01 $expo/scores data/$trials/trials 2> /dev/null` 82 | mindcf2=`sid/compute_min_dcf.py --p-target 0.001 $expo/scores data/$trials/trials 2> /dev/null` 83 | echo "EER: $eer%" | tee $expo/EER 84 | echo "minDCF(p-target=0.01): $mindcf1" | tee -a $expo/EER 85 | echo "minDCF(p-target=0.001): $mindcf2" | tee -a $expo/EER 86 | PYTHONPATH=$(realpath ../cllr) python ../cllr/compute_cllr.py \ 87 | -k data/$trials/trials -s $expo/scores -e | tee $expo/Cllr || exit 1 88 | 89 | # Compute linkability 90 | PYTHONPATH=$(realpath ../anonymization_metrics) python local/scoring/linkability/compute_linkability.py \ 91 | -k data/$trials/trials -s $expo/scores \ 92 | -d -o $expo/linkability | tee $expo/linkability_log || exit 1 93 | 94 | # Zebra 95 | label=$enrolls-$trials 96 | PYTHONPATH=$(realpath ../zebra) python ../zebra/zero_evidence.py \ 97 | -k data/$trials/trials -s $expo/scores -l $label | tee $expo/zebra || exit 1 98 | #-k data/$trials/trials -s $expo/scores -l $label -e png | tee $expo/zebra || exit 1 99 | 100 | touch $expo/.done 101 | fi 102 | -------------------------------------------------------------------------------- /baseline/local/anon/anonymize_data_dir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script for first voice privacy challenge 2020 3 | # 4 | # This script anonymizes a kaldi data directory and produces a new 5 | # directory with given suffix in the name 6 | 7 | . path.sh 8 | . cmd.sh 9 | 10 | set -e 11 | 12 | #===== begin config ======= 13 | nj=20 14 | stage=0 15 | 16 | anoni_pool="libritts_train_other_500" # change this to the data you want to use for anonymization pool 17 | data_netcdf= # change this to dir where VC features data will be stored 18 | 19 | # Chain model for PPG extraction 20 | ppg_model= 21 | ppg_type= 22 | 23 | ppg_dir=exp/nnet3_cleaned # change this to the dir where PPGs will be stored 24 | 25 | # x-vector extraction 26 | xvec_nnet_dir= # change this to pretrained xvector model downloaded from Kaldi website 27 | anon_xvec_out_dir=${xvec_nnet_dir}/anon 28 | 29 | plda_dir=${xvec_nnet_dir}/xvectors_train 30 | 31 | pseudo_xvec_rand_level=spk # spk (all utterances will have same xvector) or utt (each utterance will have randomly selected xvector) 32 | cross_gender="false" # true, same gender xvectors will be selected; false, other gender xvectors 33 | distance="cosine" # cosine/plda 34 | proximity="farthest" # nearest/farthest 35 | 36 | anon_data_suffix=_anon_${pseudo_xvec_rand_level}_${cross_gender}_${distance}_${proximity} 37 | 38 | rand_seed=2020 39 | 40 | #=========== end config =========== 41 | 42 | . utils/parse_options.sh 43 | 44 | if [ $# != 1 ]; then 45 | echo "Usage: " 46 | echo " $0 [options] " 47 | echo "Options" 48 | echo " --nj=40 # Number of CPUs to use for feature extraction" 49 | exit 1; 50 | fi 51 | 52 | data_dir="$1" # Data to be anonymized, must be in Kaldi format 53 | 54 | spk2utt=data/$data_dir/spk2utt 55 | [ ! -f $spk2utt ] && echo "File $spk2utt does not exist" && exit 1 56 | num_spk=$(wc -l < $spk2utt) 57 | [ $nj -gt $num_spk ] && nj=$num_spk 58 | 59 | # Extract xvectors from data which has to be anonymized 60 | if [ $stage -le 0 ]; then 61 | printf "${RED}\nStage a.0: Extracting xvectors for ${data_dir}.${NC}\n" 62 | local/featex/01_extract_xvectors.sh --nj $nj data/${data_dir} ${xvec_nnet_dir} \ 63 | ${anon_xvec_out_dir} || exit 1; 64 | fi 65 | 66 | # Generate pseudo-speakers for source data 67 | if [ $stage -le 1 ]; then 68 | printf "${RED}\nStage a.1: Generating pseudo-speakers for ${data_dir}.${NC}\n" 69 | local/anon/make_pseudospeaker.sh --rand-level ${pseudo_xvec_rand_level} \ 70 | --cross-gender ${cross_gender} --distance ${distance} \ 71 | --proximity ${proximity} --rand-seed ${rand_seed} \ 72 | data/${data_dir} data/${anoni_pool} ${anon_xvec_out_dir} \ 73 | ${plda_dir} || exit 1; 74 | fi 75 | 76 | # Extract pitch for source data 77 | if [ $stage -le 2 ]; then 78 | printf "${RED}\nStage a.2: Pitch extraction for ${data_dir}.${NC}\n" 79 | local/featex/02_extract_pitch.sh --nj ${nj} data/${data_dir} || exit 1; 80 | fi 81 | 82 | # Extract PPGs for source data 83 | if [ $stage -le 3 ]; then 84 | printf "${RED}\nStage a.3: PPG extraction for ${data_dir}.${NC}\n" 85 | local/featex/extract_ppg.sh --nj $nj --stage 0 \ 86 | ${data_dir} ${ppg_model} ${ppg_dir}/ppg_${data_dir} || exit 1; 87 | fi 88 | 89 | # Create netcdf data for voice conversion 90 | if [ $stage -le 4 ]; then 91 | printf "${RED}\nStage a.4: Make netcdf data for VC.${NC}\n" 92 | local/anon/make_netcdf.sh --stage 0 data/${data_dir} ${ppg_dir}/ppg_${data_dir}/phone_post.scp \ 93 | ${anon_xvec_out_dir}/xvectors_${data_dir}/pseudo_xvecs/pseudo_xvector.scp \ 94 | ${data_netcdf}/${data_dir} || exit 1; 95 | fi 96 | 97 | if [ $stage -le 5 ]; then 98 | printf "${RED}\nStage a.5: Extract melspec from acoustic model for ${data_dir}.${NC}\n" 99 | local/vc/am/01_gen.sh ${data_netcdf}/${data_dir} ${ppg_type} || exit 1; 100 | fi 101 | 102 | if [ $stage -le 6 ]; then 103 | printf "${RED}\nStage a.6: Generate waveform from NSF model for ${data_dir}.${NC}\n" 104 | local/vc/nsf/01_gen.sh ${data_netcdf}/${data_dir} || exit 1; 105 | fi 106 | 107 | if [ $stage -le 7 ]; then 108 | printf "${RED}\nStage a.7: Creating new data directories corresponding to anonymization.${NC}\n" 109 | wav_path=${data_netcdf}/${data_dir}/nsf_output_wav 110 | new_data_dir=data/${data_dir}${anon_data_suffix} 111 | if [ -d "$new_data_dir" ]; then 112 | rm -rf ${new_data_dir} 113 | fi 114 | utils/copy_data_dir.sh data/${data_dir} ${new_data_dir} 115 | [ -f ${new_data_dir}/feats.scp ] && rm ${new_data_dir}/feats.scp 116 | [ -f ${new_data_dir}/vad.scp ] && rm ${new_data_dir}/vad.scp 117 | # Copy new spk2gender in case cross_gender vc has been done 118 | cp ${anon_xvec_out_dir}/xvectors_${data_dir}/pseudo_xvecs/spk2gender ${new_data_dir}/ 119 | awk -v p="$wav_path" '{print $1, "sox", p"/"$1".wav", "-t wav -R -b 16 - |"}' data/${data_dir}/wav.scp > ${new_data_dir}/wav.scp 120 | fi 121 | -------------------------------------------------------------------------------- /baseline/local/nnet3/tuning/run_tdnn_1a.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # this is the standard "tdnn" system, built in nnet3; it's what we use to 4 | # call multi-splice. 5 | 6 | # without cleanup: 7 | # local/nnet3/run_tdnn.sh --train-set train960 --gmm tri6b --nnet3-affix "" & 8 | 9 | 10 | # At this script level we don't support not running on GPU, as it would be painfully slow. 11 | # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, 12 | # --num-threads 16 and --minibatch-size 128. 13 | 14 | # First the options that are passed through to run_ivector_common.sh 15 | # (some of which are also used in this script directly). 16 | stage=0 17 | decode_nj=30 18 | train_set=train_960_cleaned 19 | gmm=tri6b_cleaned # this is the source gmm-dir for the data-type of interest; it 20 | # should have alignments for the specified training data. 21 | nnet3_affix=_cleaned 22 | 23 | # Options which are not passed through to run_ivector_common.sh 24 | affix= 25 | train_stage=-10 26 | common_egs_dir= 27 | reporting_email= 28 | remove_egs=true 29 | 30 | . ./cmd.sh 31 | . ./path.sh 32 | . ./utils/parse_options.sh 33 | 34 | 35 | if ! cuda-compiled; then 36 | cat </dev/null || true 108 | for test in test_clean test_other dev_clean dev_other; do 109 | ( 110 | steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ 111 | --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \ 112 | ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1 113 | steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ 114 | data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1 115 | steps/lmrescore_const_arpa.sh \ 116 | --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ 117 | data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1 118 | steps/lmrescore_const_arpa.sh \ 119 | --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ 120 | data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1 121 | ) || touch $dir/.error & 122 | done 123 | wait 124 | [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 125 | fi 126 | 127 | exit 0; 128 | -------------------------------------------------------------------------------- /baseline/local/anon/anonymise_dir_mcadams.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3.0 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @author: Jose Patino, Massimiliano Todisco, Pramod Bachhav, Nicholas Evans 5 | Audio Security and Privacy Group, EURECOM 6 | """ 7 | import os 8 | import librosa 9 | import numpy as np 10 | import scipy 11 | import argparse 12 | 13 | def anonym(file, output_dir, winLengthinms=20, shiftLengthinms=10, lp_order=20, mcadams=0.8): 14 | filename = file[0] 15 | filepath = file[1] 16 | output_file = output_dir + filename + '.wav' 17 | if not os.path.exists(output_dir): os.makedirs(output_dir) 18 | sig, fs = librosa.load(filepath,sr=None) 19 | eps = np.finfo(np.float32).eps 20 | sig = sig+eps 21 | 22 | # simulation parameters 23 | winlen = np.floor(winLengthinms*0.001*fs).astype(int) 24 | shift = np.floor(shiftLengthinms*0.001*fs).astype(int) 25 | length_sig = len(sig) 26 | 27 | # fft processing parameters 28 | NFFT = 2**(np.ceil((np.log2(winlen)))).astype(int) 29 | # anaysis and synth window which satisfies the constraint 30 | wPR = np.hanning(winlen) 31 | K = np.sum(wPR)/shift 32 | win = np.sqrt(wPR/K) 33 | Nframes = 1+np.floor((length_sig-winlen)/shift).astype(int) # nr of complete frames 34 | 35 | # carry out the overlap - add FFT processing 36 | sig_rec = np.zeros([length_sig]) # allocate output+'ringing' vector 37 | 38 | for m in np.arange(1,Nframes): 39 | # indices of the mth frame 40 | index = np.arange(m*shift,np.minimum(m*shift+winlen,length_sig)) 41 | # windowed mth frame (other than rectangular window) 42 | frame = sig[index]*win 43 | # get lpc coefficients 44 | a_lpc = librosa.core.lpc(frame+eps,lp_order) 45 | # get poles 46 | poles = scipy.signal.tf2zpk(np.array([1]), a_lpc)[1] 47 | #index of imaginary poles 48 | ind_imag = np.where(np.isreal(poles)==False)[0] 49 | #index of first imaginary poles 50 | ind_imag_con = ind_imag[np.arange(0,np.size(ind_imag),2)] 51 | 52 | # here we define the new angles of the poles, shifted accordingly to the mcadams coefficient 53 | # values >1 expand the spectrum, while values <1 constract it for angles>1 54 | # values >1 constract the spectrum, while values <1 expand it for angles<1 55 | # the choice of this value is strongly linked to the number of lpc coefficients 56 | # a bigger lpc coefficients number constraints the effect of the coefficient to very small variations 57 | # a smaller lpc coefficients number allows for a bigger flexibility 58 | new_angles = np.angle(poles[ind_imag_con])**mcadams 59 | 60 | # make sure new angles stay between 0 and pi 61 | new_angles[np.where(new_angles>=np.pi)] = np.pi 62 | new_angles[np.where(new_angles<=0)] = 0 63 | 64 | # copy of the original poles to be adjusted with the new angles 65 | new_poles = poles 66 | for k in np.arange(np.size(ind_imag_con)): 67 | # compute new poles with the same magnitued and new angles 68 | new_poles[ind_imag_con[k]] = np.abs(poles[ind_imag_con[k]])*np.exp(1j*new_angles[k]) 69 | # applied also to the conjugate pole 70 | new_poles[ind_imag_con[k]+1] = np.abs(poles[ind_imag_con[k]+1])*np.exp(-1j*new_angles[k]) 71 | 72 | # recover new, modified lpc coefficients 73 | a_lpc_new = np.real(np.poly(new_poles)) 74 | # get residual excitation for reconstruction 75 | res = scipy.signal.lfilter(a_lpc,np.array(1),frame) 76 | # reconstruct frames with new lpc coefficient 77 | frame_rec = scipy.signal.lfilter(np.array([1]),a_lpc_new,res) 78 | frame_rec = frame_rec*win 79 | 80 | outindex = np.arange(m*shift,m*shift+len(frame_rec)) 81 | # overlap add 82 | sig_rec[outindex] = sig_rec[outindex] + frame_rec 83 | sig_rec = sig_rec/np.max(np.abs(sig_rec)) 84 | scipy.io.wavfile.write(output_file, fs, np.float32(sig_rec)) 85 | return [] 86 | 87 | if __name__ == "__main__": 88 | #Parse args 89 | parser = argparse.ArgumentParser() 90 | parser.add_argument('--data_dir',type=str,default='../data/libri_test_enrolls_anon') 91 | parser.add_argument('--anon_suffix',type=str,default='_anon') 92 | parser.add_argument('--n_coeffs',type=int,default=20) 93 | parser.add_argument('--mc_coeff',type=float,default=0.8) 94 | parser.add_argument('--winLengthinms',type=int,default=20) 95 | parser.add_argument('--shiftLengthinms',type=int,default=10) 96 | config = parser.parse_args() 97 | 98 | #Load protocol file 99 | list_name= config.data_dir + '/wav.scp' 100 | list_files = np.genfromtxt(list_name,dtype='U') 101 | 102 | config.data_dir = config.data_dir+config.anon_suffix 103 | 104 | for idx,file in enumerate(list_files): 105 | print(str(idx+1),'/',len(list_files)) 106 | anonym(file, output_dir=config.data_dir+'/wav/'+file[0]+'/', winLengthinms=config.winLengthinms, shiftLengthinms=config.shiftLengthinms, lp_order=config.n_coeffs, mcadams=config.mc_coeff) 107 | 108 | -------------------------------------------------------------------------------- /baseline/local/fix_eval2.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Usage: python local/fix_eval2.py 3 | ''' 4 | import sys 5 | import os 6 | import shutil 7 | from os.path import join, exists 8 | 9 | args = sys.argv 10 | proto_path = args[1] 11 | enroll_dir = args[2] 12 | trial_dir = args[3] 13 | 14 | enroll_files = ["enroll.txt"] 15 | trial_files = ["trials.txt"] 16 | 17 | # Prepare enroll data 18 | wav_scp = [] 19 | utt2spk = [] 20 | spk2gender = [] 21 | text = [] 22 | 23 | enroll_spks = set() 24 | enroll_uttids = set() 25 | 26 | for ef in enroll_files: 27 | ef_path = join(proto_path, ef) 28 | with open(ef_path) as f: 29 | for line in f.read().splitlines(): 30 | line = line.strip().split() 31 | 32 | # Register all speaker ids 33 | spkid = line[0] 34 | enroll_spks.add(spkid) 35 | 36 | # Register all utterance ids 37 | uttarr = line[1].split(',') 38 | for utt in uttarr: 39 | uttid = utt.split('/')[-1].split('.')[0] 40 | enroll_uttids.add(uttid) 41 | 42 | # Filter wav.scp, utt2spk, text and spk2gender 43 | # based on spkids and uttids 44 | with open(join(enroll_dir, 'wav.scp')) as f: 45 | for line in f.readlines(): 46 | uttid = line.split()[0] 47 | if uttid in enroll_uttids: 48 | wav_scp.append(line) 49 | with open(join(enroll_dir, 'text')) as f: 50 | for line in f.readlines(): 51 | uttid = line.split()[0] 52 | if uttid in enroll_uttids: 53 | text.append(line) 54 | with open(join(enroll_dir, 'utt2spk')) as f: 55 | for line in f.readlines(): 56 | uttid = line.split()[0] 57 | if uttid in enroll_uttids: 58 | utt2spk.append(line) 59 | with open(join(enroll_dir, 'spk2gender')) as f: 60 | for line in f.readlines(): 61 | spkid = line.split()[0] 62 | if spkid in enroll_spks: 63 | spk2gender.append(line) 64 | with open(join(enroll_dir, 'wav.scp'), 'w') as f: 65 | for line in wav_scp: 66 | f.write(line) 67 | with open(join(enroll_dir, 'text'), 'w') as f: 68 | for line in text: 69 | f.write(line) 70 | with open(join(enroll_dir, 'utt2spk'), 'w') as f: 71 | for line in utt2spk: 72 | f.write(line) 73 | with open(join(enroll_dir, 'spk2gender'), 'w') as f: 74 | for line in spk2gender: 75 | f.write(line) 76 | 77 | 78 | 79 | # Prepare trial data 80 | wav_scp = [] 81 | utt2spk = [] 82 | spk2gender = [] 83 | text = [] 84 | trials_male = [] 85 | trials_female = [] 86 | 87 | trial_spks = set() 88 | trial_uttids = set() 89 | 90 | for i, tf in enumerate(trial_files): 91 | tf_path = join(proto_path, tf) 92 | with open(tf_path) as f: 93 | for line in f.read().splitlines(): 94 | line = line.strip().split() 95 | spkid = line[0] 96 | utt = line[1] 97 | target_type = line[2] 98 | gender = line[3] 99 | 100 | uttid = utt.split('/')[-1].split('.')[0] 101 | utt_spkid = uttid.split('-')[0] 102 | 103 | trial_spks.add(spkid) 104 | trial_spks.add(utt_spkid) 105 | trial_uttids.add(uttid) 106 | 107 | if gender == 'M': 108 | trials_male.append(spkid + ' ' + uttid + ' ' + target_type) 109 | else: 110 | trials_female.append(spkid + ' ' + uttid + ' ' + target_type) 111 | 112 | 113 | # Filter wav.scp, utt2spk and spk2gender 114 | # based on spkids and uttids 115 | with open(join(trial_dir, 'wav.scp')) as f: 116 | for line in f.readlines(): 117 | uttid = line.split()[0] 118 | if uttid in trial_uttids: 119 | wav_scp.append(line) 120 | with open(join(trial_dir, 'text')) as f: 121 | for line in f.readlines(): 122 | uttid = line.split()[0] 123 | if uttid in trial_uttids: 124 | text.append(line) 125 | with open(join(trial_dir, 'utt2spk')) as f: 126 | for line in f.readlines(): 127 | uttid = line.split()[0] 128 | if uttid in trial_uttids: 129 | utt2spk.append(line) 130 | with open(join(trial_dir, 'spk2gender')) as f: 131 | for line in f.readlines(): 132 | spkid = line.split()[0] 133 | if spkid in trial_spks: 134 | spk2gender.append(line) 135 | with open(join(trial_dir, 'wav.scp'), 'w') as f: 136 | for line in wav_scp: 137 | f.write(line) 138 | with open(join(trial_dir, 'text'), 'w') as f: 139 | for line in text: 140 | f.write(line) 141 | with open(join(trial_dir, 'utt2spk'), 'w') as f: 142 | for line in utt2spk: 143 | f.write(line) 144 | with open(join(trial_dir, 'spk2gender'), 'w') as f: 145 | for line in spk2gender: 146 | f.write(line) 147 | 148 | 149 | all_trials = sorted(trials_male + trials_female) 150 | with open(join(trial_dir, 'trials'), 'w') as f: 151 | f.write('\n'.join(all_trials) + '\n') 152 | 153 | tt_male = sorted(trials_male) 154 | tt_female = sorted(trials_female) 155 | with open(join(trial_dir, 'trials_male'), 'w') as f: 156 | f.write('\n'.join(tt_male) + '\n') 157 | with open(join(trial_dir, 'trials_female'), 'w') as f: 158 | f.write('\n'.join(tt_female) + '\n') 159 | 160 | -------------------------------------------------------------------------------- /baseline/local/chain/compare_wer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # this script is used for comparing decoding results between systems. 4 | # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp 5 | # For use with discriminatively trained systems you specify the epochs after a colon: 6 | # for instance, 7 | # local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3} 8 | 9 | 10 | if [ $# == 0 ]; then 11 | echo "Usage: $0: [--looped] [--online] [ ... ]" 12 | echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp" 13 | echo "or (with epoch numbers for discriminative training):" 14 | echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}" 15 | exit 1 16 | fi 17 | 18 | echo "# $0 $*" 19 | 20 | include_looped=false 21 | if [ "$1" == "--looped" ]; then 22 | include_looped=true 23 | shift 24 | fi 25 | include_online=false 26 | if [ "$1" == "--online" ]; then 27 | include_online=true 28 | shift 29 | fi 30 | 31 | 32 | used_epochs=false 33 | 34 | # this function set_names is used to separate the epoch-related parts of the name 35 | # [for discriminative training] and the regular parts of the name. 36 | # If called with a colon-free directory name, like: 37 | # set_names exp/chain/tdnn_lstm1e_sp_bi_smbr 38 | # it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" 39 | # If called with something like: 40 | # set_names exp/chain/tdnn_d_sp_smbr:3 41 | # it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3" 42 | 43 | 44 | set_names() { 45 | if [ $# != 1 ]; then 46 | echo "compare_wer_general.sh: internal error" 47 | exit 1 # exit the program 48 | fi 49 | dirname=$(echo $1 | cut -d: -f1) 50 | epoch=$(echo $1 | cut -s -d: -f2) 51 | if [ -z $epoch ]; then 52 | epoch_infix="" 53 | else 54 | used_epochs=true 55 | epoch_infix=_epoch${epoch} 56 | fi 57 | } 58 | 59 | 60 | 61 | echo -n "# System " 62 | for x in $*; do printf "% 10s" " $(basename $x)"; done 63 | echo 64 | 65 | strings=( 66 | "# WER on dev(fglarge) " 67 | "# WER on dev(tglarge) " 68 | "# WER on dev(tgmed) " 69 | "# WER on dev(tgsmall) " 70 | "# WER on dev_other(fglarge) " 71 | "# WER on dev_other(tglarge) " 72 | "# WER on dev_other(tgmed) " 73 | "# WER on dev_other(tgsmall) " 74 | "# WER on test(fglarge) " 75 | "# WER on test(tglarge) " 76 | "# WER on test(tgmed) " 77 | "# WER on test(tgsmall) " 78 | "# WER on test_other(fglarge) " 79 | "# WER on test_other(tglarge) " 80 | "# WER on test_other(tgmed) " 81 | "# WER on test_other(tgsmall) ") 82 | 83 | for n in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do 84 | echo -n "${strings[$n]}" 85 | for x in $*; do 86 | set_names $x # sets $dirname and $epoch_infix 87 | decode_names=(dev_clean_fglarge dev_clean_tglarge dev_clean_tgmed dev_clean_tgsmall dev_other_fglarge dev_other_tglarge dev_other_tgmed dev_other_tgsmall test_clean_fglarge test_clean_tglarge test_clean_tgmed test_clean_tgsmall test_other_fglarge test_other_tglarge test_other_tgmed test_other_tgsmall) 88 | 89 | wer=$(grep WER $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') 90 | printf "% 10s" $wer 91 | done 92 | echo 93 | if $include_looped; then 94 | echo -n "# [looped:] " 95 | for x in $*; do 96 | set_names $x # sets $dirname and $epoch_infix 97 | wer=$(grep WER $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') 98 | printf "% 10s" $wer 99 | done 100 | echo 101 | fi 102 | if $include_online; then 103 | echo -n "# [online:] " 104 | for x in $*; do 105 | set_names $x # sets $dirname and $epoch_infix 106 | wer=$(grep WER ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}') 107 | printf "% 10s" $wer 108 | done 109 | echo 110 | fi 111 | done 112 | 113 | 114 | if $used_epochs; then 115 | exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. 116 | fi 117 | 118 | 119 | echo -n "# Final train prob " 120 | for x in $*; do 121 | prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') 122 | printf "% 10s" $prob 123 | done 124 | echo 125 | 126 | echo -n "# Final valid prob " 127 | for x in $*; do 128 | prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') 129 | printf "% 10s" $prob 130 | done 131 | echo 132 | 133 | echo -n "# Final train prob (xent) " 134 | for x in $*; do 135 | prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') 136 | printf "% 10s" $prob 137 | done 138 | echo 139 | 140 | echo -n "# Final valid prob (xent) " 141 | for x in $*; do 142 | prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') 143 | printf "% 10s" $prob 144 | done 145 | echo 146 | 147 | echo -n "# Num-parameters " 148 | for x in $*; do 149 | num_params=$(grep num-parameters $x/log/progress.1.log | awk '{print $2}') 150 | printf "% 10d" $num_params 151 | done 152 | echo 153 | -------------------------------------------------------------------------------- /baseline/run_asr_eval_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | 6 | 7 | stage=6 8 | 9 | . ./cmd.sh 10 | . ./path.sh 11 | 12 | nj=10 13 | 14 | . parse_options.sh 15 | 16 | train="train_clean_360" 17 | 18 | if [ $stage -le 6 ]; then 19 | for part in dev_clean test_clean $train; do 20 | steps/make_mfcc.sh --cmd "$train_cmd" --nj $nj data/$part exp/make_mfcc/$part $mfccdir 21 | steps/compute_cmvn_stats.sh data/$part exp/make_mfcc/$part $mfccdir 22 | done 23 | fi 24 | 25 | if [ $stage -le 7 ]; then 26 | utils/subset_data_dir.sh --shortest data/$train 5000 data/train_5k 27 | fi 28 | 29 | if [ $stage -le 8 ]; then 30 | steps/train_mono.sh \ 31 | --boost-silence 1.25 --nj $nj --cmd "$train_cmd" \ 32 | data/train_5k data/lang_nosp exp/mono 33 | ( 34 | utils/mkgraph.sh \ 35 | data/lang_nosp_test_tgsmall \ 36 | exp/mono exp/mono/graph_nosp_tgsmall 37 | for test in test_clean dev_clean; do 38 | steps/decode.sh \ 39 | --nj $nj --cmd "$decode_cmd" \ 40 | exp/mono/graph_nosp_tgsmall \ 41 | data/$test exp/mono/decode_nosp_tgsmall_$test 42 | done 43 | )& 44 | fi 45 | 46 | if [ $stage -le 9 ]; then 47 | steps/align_si.sh \ 48 | --boost-silence 1.25 --nj $nj --cmd "$train_cmd" \ 49 | data/$train data/lang_nosp exp/mono exp/mono_ali 50 | steps/train_deltas.sh \ 51 | --boost-silence 1.25 --cmd "$train_cmd" \ 52 | 2000 20000 data/$train data/lang_nosp \ 53 | exp/mono_ali exp/tri1 54 | ( 55 | utils/mkgraph.sh \ 56 | data/lang_nosp_test_tgsmall \ 57 | exp/tri1 exp/tri1/graph_nosp_tgsmall 58 | for test in test_clean dev_clean; do 59 | steps/decode.sh --nj $nj --cmd "$decode_cmd" exp/tri1/graph_nosp_tgsmall \ 60 | data/$test exp/tri1/decode_nosp_tgsmall_$test 61 | steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ 62 | data/$test exp/tri1/decode_nosp_{tgsmall,tgmed}_$test 63 | steps/lmrescore_const_arpa.sh \ 64 | --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ 65 | data/$test exp/tri1/decode_nosp_{tgsmall,tglarge}_$test 66 | done 67 | )& 68 | fi 69 | 70 | if [ $stage -le 10 ]; then 71 | steps/align_si.sh --nj $nj --cmd "$train_cmd" \ 72 | data/$train data/lang_nosp exp/tri1 exp/tri1_ali 73 | steps/train_lda_mllt.sh --cmd "$train_cmd" \ 74 | --splice-opts "--left-context=3 --right-context=3" 2500 25000 \ 75 | data/$train data/lang_nosp exp/tri1_ali exp/tri2b 76 | ( 77 | utils/mkgraph.sh data/lang_nosp_test_tgsmall \ 78 | exp/tri2b exp/tri2b/graph_nosp_tgsmall 79 | for test in test_clean dev_clean; do 80 | steps/decode.sh --nj $nj --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgsmall \ 81 | data/$test exp/tri2b/decode_nosp_tgsmall_$test 82 | steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ 83 | data/$test exp/tri2b/decode_nosp_{tgsmall,tgmed}_$test 84 | steps/lmrescore_const_arpa.sh \ 85 | --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ 86 | data/$test exp/tri2b/decode_nosp_{tgsmall,tglarge}_$test 87 | done 88 | )& 89 | fi 90 | 91 | if [ $stage -le 11 ]; then 92 | steps/align_si.sh --nj $nj --cmd "$train_cmd" --use-graphs true \ 93 | data/$train data/lang_nosp exp/tri2b exp/tri2b_ali 94 | steps/train_sat.sh --cmd "$train_cmd" 3000 45000 \ 95 | data/$train data/lang_nosp exp/tri2b_ali exp/tri3b 96 | ( 97 | utils/mkgraph.sh data/lang_nosp_test_tgsmall \ 98 | exp/tri3b exp/tri3b/graph_nosp_tgsmall 99 | for test in test_clean dev_clean; do 100 | steps/decode_fmllr.sh --nj $nj --cmd "$decode_cmd" \ 101 | exp/tri3b/graph_nosp_tgsmall data/$test \ 102 | exp/tri3b/decode_nosp_tgsmall_$test 103 | steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \ 104 | data/$test exp/tri3b/decode_nosp_{tgsmall,tgmed}_$test 105 | steps/lmrescore_const_arpa.sh \ 106 | --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \ 107 | data/$test exp/tri3b/decode_nosp_{tgsmall,tglarge}_$test 108 | done 109 | )& 110 | fi 111 | 112 | if [ $stage -le 19 ]; then 113 | # this does some data-cleaning. The cleaned data should be useful when we add 114 | # the neural net and chain systems. (although actually it was pretty clean already.) 115 | local/run_cleanup_segmentation.sh --data "data/$train" 116 | fi 117 | 118 | if [ $stage -le 20 ]; then 119 | # train and test nnet3 tdnn models on the entire data with data-cleaning. 120 | # set "--stage 11" if you have already run local/nnet3/run_tdnn.sh 121 | 122 | 123 | # local/chain/run_tdnn_1d__360.sh 124 | local/chain/run_tdnn_1d__360.sh 125 | 126 | # --stage 15 --train_stage 563 127 | 128 | # local/chain/run_tdnn.sh \ 129 | # --stage 3 \ 130 | # --train_stage -10 131 | fi 132 | 133 | # The nnet3 TDNN recipe: 134 | # local/nnet3/run_tdnn.sh # set "--stage 11" if you have already run local/chain/run_tdnn.sh 135 | 136 | # # train models on cleaned-up data 137 | # # we've found that this isn't helpful-- see the comments in local/run_data_cleaning.sh 138 | # local/run_data_cleaning.sh 139 | 140 | # Wait for decodings in the background 141 | wait 142 | echo Done 143 | -------------------------------------------------------------------------------- /baseline/local/nnet3/run_tdnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 1b is as 1a but uses xconfigs. 4 | 5 | # this is the standard "tdnn" system, built in nnet3; it's what we use to 6 | # call multi-splice. 7 | 8 | # without cleanup: 9 | # local/nnet3/run_tdnn.sh --train-set train960 --gmm tri6b --nnet3-affix "" & 10 | 11 | 12 | # At this script level we don't support not running on GPU, as it would be painfully slow. 13 | # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, 14 | # --num-threads 16 and --minibatch-size 128. 15 | 16 | # First the options that are passed through to run_ivector_common.sh 17 | # (some of which are also used in this script directly). 18 | stage=0 19 | decode_nj=30 20 | train_set=train_960_cleaned 21 | gmm=tri6b_cleaned # this is the source gmm-dir for the data-type of interest; it 22 | # should have alignments for the specified training data. 23 | nnet3_affix=_cleaned 24 | 25 | # Options which are not passed through to run_ivector_common.sh 26 | affix= 27 | train_stage=-10 28 | common_egs_dir= 29 | reporting_email= 30 | remove_egs=true 31 | 32 | . ./cmd.sh 33 | . ./path.sh 34 | . ./utils/parse_options.sh 35 | 36 | 37 | if ! cuda-compiled; then 38 | cat < $dir/configs/network.xconfig 71 | input dim=100 name=ivector 72 | input dim=40 name=input 73 | fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat 74 | 75 | relu-batchnorm-layer name=tdnn0 dim=1280 76 | relu-batchnorm-layer name=tdnn1 dim=1280 input=Append(-1,2) 77 | relu-batchnorm-layer name=tdnn2 dim=1280 input=Append(-3,3) 78 | relu-batchnorm-layer name=tdnn3 dim=1280 input=Append(-7,2) 79 | relu-batchnorm-layer name=tdnn4 dim=1280 80 | output-layer name=output input=tdnn4 dim=$num_targets max-change=1.5 81 | EOF 82 | steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ 83 | --config-dir $dir/configs || exit 1; 84 | fi 85 | 86 | if [ $stage -le 12 ]; then 87 | if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then 88 | utils/create_split_dir.pl \ 89 | /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage 90 | fi 91 | 92 | steps/nnet3/train_dnn.py --stage=$train_stage \ 93 | --cmd="$decode_cmd" \ 94 | --feat.online-ivector-dir $train_ivector_dir \ 95 | --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ 96 | --trainer.num-epochs 4 \ 97 | --trainer.optimization.num-jobs-initial 3 \ 98 | --trainer.optimization.num-jobs-final 16 \ 99 | --trainer.optimization.initial-effective-lrate 0.0017 \ 100 | --trainer.optimization.final-effective-lrate 0.00017 \ 101 | --egs.dir "$common_egs_dir" \ 102 | --cleanup.remove-egs $remove_egs \ 103 | --cleanup.preserve-model-interval 100 \ 104 | --feat-dir=$train_data_dir \ 105 | --ali-dir $ali_dir \ 106 | --lang data/lang \ 107 | --reporting.email="$reporting_email" \ 108 | --dir=$dir || exit 1; 109 | 110 | fi 111 | 112 | if [ $stage -le 13 ]; then 113 | # this does offline decoding that should give about the same results as the 114 | # real online decoding (the one with --per-utt true) 115 | rm $dir/.error 2>/dev/null || true 116 | for test in test_clean test_other dev_clean dev_other; do 117 | ( 118 | steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ 119 | --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \ 120 | ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1 121 | steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ 122 | data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1 123 | steps/lmrescore_const_arpa.sh \ 124 | --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ 125 | data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1 126 | steps/lmrescore_const_arpa.sh \ 127 | --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ 128 | data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1 129 | ) || touch $dir/.error & 130 | done 131 | wait 132 | [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 133 | fi 134 | 135 | exit 0; 136 | -------------------------------------------------------------------------------- /baseline/local/nnet3/tuning/run_tdnn_1b.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 1b is as 1a but uses xconfigs. 4 | 5 | # this is the standard "tdnn" system, built in nnet3; it's what we use to 6 | # call multi-splice. 7 | 8 | # without cleanup: 9 | # local/nnet3/run_tdnn.sh --train-set train960 --gmm tri6b --nnet3-affix "" & 10 | 11 | 12 | # At this script level we don't support not running on GPU, as it would be painfully slow. 13 | # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, 14 | # --num-threads 16 and --minibatch-size 128. 15 | 16 | # First the options that are passed through to run_ivector_common.sh 17 | # (some of which are also used in this script directly). 18 | stage=0 19 | decode_nj=30 20 | train_set=train_960_cleaned 21 | gmm=tri6b_cleaned # this is the source gmm-dir for the data-type of interest; it 22 | # should have alignments for the specified training data. 23 | nnet3_affix=_cleaned 24 | 25 | # Options which are not passed through to run_ivector_common.sh 26 | affix= 27 | train_stage=-10 28 | common_egs_dir= 29 | reporting_email= 30 | remove_egs=true 31 | 32 | . ./cmd.sh 33 | . ./path.sh 34 | . ./utils/parse_options.sh 35 | 36 | 37 | if ! cuda-compiled; then 38 | cat < $dir/configs/network.xconfig 71 | input dim=100 name=ivector 72 | input dim=40 name=input 73 | fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat 74 | 75 | relu-batchnorm-layer name=tdnn0 dim=1280 76 | relu-batchnorm-layer name=tdnn1 dim=1280 input=Append(-1,2) 77 | relu-batchnorm-layer name=tdnn2 dim=1280 input=Append(-3,3) 78 | relu-batchnorm-layer name=tdnn3 dim=1280 input=Append(-7,2) 79 | relu-batchnorm-layer name=tdnn4 dim=1280 80 | output-layer name=output input=tdnn4 dim=$num_targets max-change=1.5 81 | EOF 82 | steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \ 83 | --config-dir $dir/configs || exit 1; 84 | fi 85 | 86 | if [ $stage -le 12 ]; then 87 | if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then 88 | utils/create_split_dir.pl \ 89 | /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage 90 | fi 91 | 92 | steps/nnet3/train_dnn.py --stage=$train_stage \ 93 | --cmd="$decode_cmd" \ 94 | --feat.online-ivector-dir $train_ivector_dir \ 95 | --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ 96 | --trainer.num-epochs 4 \ 97 | --trainer.optimization.num-jobs-initial 3 \ 98 | --trainer.optimization.num-jobs-final 16 \ 99 | --trainer.optimization.initial-effective-lrate 0.0017 \ 100 | --trainer.optimization.final-effective-lrate 0.00017 \ 101 | --egs.dir "$common_egs_dir" \ 102 | --cleanup.remove-egs $remove_egs \ 103 | --cleanup.preserve-model-interval 100 \ 104 | --feat-dir=$train_data_dir \ 105 | --ali-dir $ali_dir \ 106 | --lang data/lang \ 107 | --reporting.email="$reporting_email" \ 108 | --dir=$dir || exit 1; 109 | 110 | fi 111 | 112 | if [ $stage -le 13 ]; then 113 | # this does offline decoding that should give about the same results as the 114 | # real online decoding (the one with --per-utt true) 115 | rm $dir/.error 2>/dev/null || true 116 | for test in test_clean test_other dev_clean dev_other; do 117 | ( 118 | steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ 119 | --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \ 120 | ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1 121 | steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ 122 | data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed} || exit 1 123 | steps/lmrescore_const_arpa.sh \ 124 | --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ 125 | data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1 126 | steps/lmrescore_const_arpa.sh \ 127 | --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \ 128 | data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1 129 | ) || touch $dir/.error & 130 | done 131 | wait 132 | [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 133 | fi 134 | 135 | exit 0; 136 | -------------------------------------------------------------------------------- /baseline/local/similarity_matrices/compute_similarity_matrices_metrics.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . ./cmd.sh 4 | . ./path.sh 5 | 6 | set -e 7 | 8 | #===== begin config ======= 9 | 10 | set_test=libri_test_trials_f 11 | results= 12 | 13 | asv_eval_model=exp/models/asv_eval/xvect_01709_1 14 | plda_dir=$asv_eval_model/xvect_train_clean_360 15 | 16 | #=========== end config =========== 17 | 18 | . utils/parse_options.sh 19 | 20 | anon_data_suffix=_anon 21 | osp_set_folder=$asv_eval_model/xvect_$set_test 22 | psp_set_folder=${osp_set_folder}$anon_data_suffix 23 | utt2spk=data/$set_test/utt2spk 24 | 25 | printf "asv_eval_model = $asv_eval_model\n" 26 | printf "set_test = $set_test\n" 27 | printf "plda_dir = $plda_dir\n" 28 | printf "results = $results\n" 29 | 30 | exp_files_dir=$results/similarity_matrices_DeID_Gvd/$set_test/exp_files 31 | 32 | if [ ! -d "$exp_files_dir" ]; then 33 | mkdir -p $exp_files_dir 34 | fi 35 | 36 | cat $osp_set_folder/xvector.scp | cut -d' ' -f1 > $exp_files_dir/segments_osp_set.scp 37 | cat $psp_set_folder/xvector.scp | cut -d' ' -f1 > $exp_files_dir/segments_psp_set.scp 38 | 39 | python3 local/similarity_matrices/create_trial.py $exp_files_dir/segments_osp_set.scp $exp_files_dir/segments_osp_set.scp osp_osp $exp_files_dir/ $utt2spk 40 | python3 local/similarity_matrices/create_trial.py $exp_files_dir/segments_osp_set.scp $exp_files_dir/segments_psp_set.scp osp_psp $exp_files_dir/ $utt2spk 41 | python3 local/similarity_matrices/create_trial.py $exp_files_dir/segments_psp_set.scp $exp_files_dir/segments_psp_set.scp psp_psp $exp_files_dir/ $utt2spk 42 | wait 43 | 44 | #Compute scores Osp-Osp 45 | $train_cmd $exp_files_dir/scores/log/test_scoring.log \ 46 | ivector-plda-scoring --normalize-length=true \ 47 | "ivector-copy-plda --smoothing=0.0 $plda_dir/plda - |" \ 48 | "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:$osp_set_folder/xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 49 | "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:$osp_set_folder/xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 50 | "cat $exp_files_dir/segments_osp_osp_trial.txt | cut -d\ --fields=1,2 |" $exp_files_dir/scores_output_osp_osp || exit 1; 51 | 52 | 53 | #Compute scores Osp-Psp 54 | $train_cmd $exp_files_dir/scores/log/test_scoring.log \ 55 | ivector-plda-scoring --normalize-length=true \ 56 | "ivector-copy-plda --smoothing=0.0 $plda_dir/plda - |" \ 57 | "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:$osp_set_folder/xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 58 | "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:$psp_set_folder/xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 59 | "cat $exp_files_dir/segments_osp_psp_trial.txt | cut -d\ --fields=1,2 |" $exp_files_dir/scores_output_osp_psp || exit 1; 60 | 61 | 62 | #Compute scores Psp-Psp 63 | $train_cmd $exp_files_dir/scores/log/test_scoring.log \ 64 | ivector-plda-scoring --normalize-length=true \ 65 | "ivector-copy-plda --smoothing=0.0 $plda_dir/plda - |" \ 66 | "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:$psp_set_folder/xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 67 | "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:$psp_set_folder/xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 68 | "cat $exp_files_dir/segments_psp_psp_trial.txt | cut -d\ --fields=1,2 |" $exp_files_dir/scores_output_psp_psp || exit 1; 69 | 70 | 71 | python3 local/similarity_matrices/scores_calibration.py $exp_files_dir/scores_output_osp_osp $exp_files_dir/spk_osp_osp_trial.txt 72 | python3 local/similarity_matrices/scores_calibration.py $exp_files_dir/scores_output_osp_psp $exp_files_dir/spk_osp_psp_trial.txt 73 | python3 local/similarity_matrices/scores_calibration.py $exp_files_dir/scores_output_psp_psp $exp_files_dir/spk_psp_psp_trial.txt 74 | wait 75 | 76 | python3 local/similarity_matrices/compute_similarity_matrix.py $exp_files_dir/scores_output_osp_osp.calibrated $exp_files_dir/spk_osp_osp_trial.txt.calibrated $results/similarity_matrices_DeID_Gvd/$set_test osp_osp 77 | python3 local/similarity_matrices/compute_similarity_matrix.py $exp_files_dir/scores_output_osp_psp.calibrated $exp_files_dir/spk_osp_psp_trial.txt.calibrated $results/similarity_matrices_DeID_Gvd/${set_test} osp_psp 78 | python3 local/similarity_matrices/compute_similarity_matrix.py $exp_files_dir/scores_output_psp_psp.calibrated $exp_files_dir/spk_psp_psp_trial.txt.calibrated $results/similarity_matrices_DeID_Gvd/${set_test} psp_psp 79 | wait 80 | 81 | DeID=$(python3 local/similarity_matrices/compute_DeID.py $results/similarity_matrices_DeID_Gvd/${set_test}/similarity_matrix_osp_osp.npy $results/similarity_matrices_DeID_Gvd/${set_test}/similarity_matrix_osp_psp.npy) 82 | Gvd=$(python3 local/similarity_matrices/compute_Gvd.py $results/similarity_matrices_DeID_Gvd/${set_test}/similarity_matrix_osp_osp.npy $results/similarity_matrices_DeID_Gvd/${set_test}/similarity_matrix_psp_psp.npy) 83 | 84 | echo "Set : $set_test" 85 | echo " De-Identification : $DeID" 86 | echo " Gain of voice distinctiveness : $Gvd" 87 | 88 | echo "De-Identification : $DeID" > $results/similarity_matrices_DeID_Gvd/$set_test/DeIDentification 89 | echo "Gain of voice distinctiveness : $Gvd" > $results/similarity_matrices_DeID_Gvd/$set_test/gain_of_voice_distinctiveness 90 | 91 | -------------------------------------------------------------------------------- /baseline/local/split_long_utterance.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2014 Guoguo Chen 4 | # Apache 2.0 5 | 6 | # Begin configuration section. 7 | seg_length=30 8 | min_seg_length=10 9 | overlap_length=5 10 | # End configuration section. 11 | 12 | echo "$0 $@" 13 | 14 | [ -f ./path.sh ] && . ./path.sh 15 | . parse_options.sh || exit 1; 16 | 17 | if [ $# -ne 2 ]; then 18 | echo "This script truncates the long audio into smaller overlapping segments" 19 | echo "" 20 | echo "Usage: $0 [options] " 21 | echo " e.g.: $0 data/train_si284_long data/train_si284_split" 22 | echo "" 23 | echo "Options:" 24 | echo " --min-seg-length # minimal segment length" 25 | echo " --seg-length # length of segments in seconds." 26 | echo " --overlap-length # length of overlap in seconds." 27 | exit 1; 28 | fi 29 | 30 | input_dir=$1 31 | output_dir=$2 32 | 33 | for f in spk2utt utt2spk wav.scp; do 34 | [ ! -f $input_dir/$f ] && echo "$0: no such file $input_dir/$f" && exit 1; 35 | done 36 | 37 | [ ! $seg_length -gt $overlap_length ] \ 38 | && echo "$0: --seg-length should be longer than --overlap-length." && exit 1; 39 | 40 | # Checks if sox is on the path. 41 | sox=`which sox` 42 | [ $? -ne 0 ] && echo "$0: sox command not found." && exit 1; 43 | sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe 44 | [ ! -x $sph2pipe ] && echo "$0: sph2pipe command not found." && exit 1; 45 | 46 | mkdir -p $output_dir 47 | cp -f $input_dir/spk2gender $output_dir/spk2gender 2>/dev/null 48 | cp -f $input_dir/wav.scp $output_dir/wav.scp 49 | 50 | # We assume the audio length in header is correct and get it from there. It is 51 | # a little bit annoying that old version of sox does not support the following: 52 | # $audio_cmd | sox --i -D 53 | # we have to put it in the following format for the old versions: 54 | # $sox --i -D "|$audio_cmd" 55 | # Another way is to count all the samples to get the duration, but it takes 56 | # longer time, so we do not use it here.. The command is: 57 | # $audio_cmd | sox -t wav - -n stat | grep -P "^Length" | awk '{print $1;}' 58 | # 59 | # Note: in the wsj example the process takes couple of minutes because of the 60 | # audio file concatenation; in a real case it should be much faster since 61 | # it just reads the header. 62 | cat $output_dir/wav.scp | perl -e ' 63 | $no_orig_seg = "false"; # Original segment file may or may not exist. 64 | ($u2s_in, $u2s_out, $seg_in, 65 | $seg_out, $orig2utt, $sox, $slen, $mslen, $olen) = @ARGV; 66 | open(UI, "<$u2s_in") || die "Error: fail to open $u2s_in\n"; 67 | open(UO, ">$u2s_out") || die "Error: fail to open $u2s_out\n"; 68 | open(SI, "<$seg_in") || ($no_orig_seg = "true"); 69 | open(SO, ">$seg_out") || die "Error: fail to open $seg_out\n"; 70 | open(UMAP, ">$orig2utt") || die "Error: fail to open $orig2utt\n"; 71 | # If the original segment file exists, we have to work out the segment 72 | # duration from the segment file. Otherwise we work that out from the wav.scp 73 | # file. 74 | if ($no_orig_seg eq "false") { 75 | while () { 76 | chomp; 77 | @col = split; 78 | @col == 4 || die "Error: bad line $_\n"; 79 | ($seg_id, $wav_id, $seg_start, $seg_end) = @col; 80 | $seg2wav{$seg_id} = $wav_id; 81 | $seg_start{$seg_id} = $seg_start; 82 | $seg_end{$seg_id} = $seg_end; 83 | } 84 | } else { 85 | while () { 86 | chomp; 87 | @col = split; 88 | @col >= 2 || "bad line $_\n"; 89 | if ((@col > 2) && ($col[-1] eq "|")) { 90 | $wav_id = shift @col; pop @col; 91 | $audio_cmd = join(" ", @col); 92 | $duration = `$sox --i -D '\''|$audio_cmd'\''`; 93 | } else { 94 | @col == 2 || die "Error: bad line $_\n in wav.scp"; 95 | $wav_id = $col[0]; 96 | $audio_file = $col[1]; 97 | $duration = `$sox --i -D $audio_file`; 98 | } 99 | chomp($duration); 100 | $seg2wav{$wav_id} = $wav_id; 101 | $seg_start{$wav_id} = 0; 102 | $seg_end{$wav_id} = $duration; 103 | } 104 | } 105 | while () { 106 | chomp; 107 | @col = split; 108 | @col == 2 || die "Error: bad line $_\n"; 109 | $utt2spk{$col[0]} = $col[1]; 110 | } 111 | foreach $seg (sort keys %seg2wav) { 112 | $index = 0; 113 | $step = $slen - $olen; 114 | print UMAP "$seg"; 115 | while ($seg_start{$seg} + $index * $step < $seg_end{$seg}) { 116 | $new_seg = $seg . "_" . sprintf("%05d", $index); 117 | $start = $seg_start{$seg} + $index * $step; 118 | $end = $start + $slen; 119 | defined($utt2spk{$seg}) || die "Error: speaker not found for $seg\n"; 120 | print UO "$new_seg $utt2spk{$seg}\n"; 121 | print UMAP " $new_seg"; 122 | $index += 1; 123 | if ($end - $olen + $mslen >= $seg_end{$seg}) { 124 | # last segment will have at least $mslen seconds. 125 | $end = $seg_end{$seg}; 126 | print SO "$new_seg $seg2wav{$seg} $start $end\n"; 127 | last; 128 | } else { 129 | print SO "$new_seg $seg2wav{$seg} $start $end\n"; 130 | } 131 | } 132 | print UMAP "\n"; 133 | }' $input_dir/utt2spk $output_dir/utt2spk \ 134 | $input_dir/segments $output_dir/segments $output_dir/orig2utt \ 135 | $sox $seg_length $min_seg_length $overlap_length 136 | 137 | # CAVEAT: We are not dealing with channels here. Each channel should have a 138 | # unique file name in wav.scp. 139 | paste -d ' ' <(cut -d ' ' -f 1 $output_dir/wav.scp) \ 140 | <(cut -d ' ' -f 1 $output_dir/wav.scp) | awk '{print $1" "$2" A";}' \ 141 | > $output_dir/reco2file_and_channel 142 | 143 | utils/fix_data_dir.sh $output_dir 144 | 145 | exit 0; 146 | -------------------------------------------------------------------------------- /baseline/local/anon/gen_pseudo_xvecs.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from os.path import basename, join 3 | import operator 4 | 5 | import numpy as np 6 | import random 7 | from kaldiio import WriteHelper, ReadHelper 8 | 9 | args = sys.argv 10 | print(args) 11 | 12 | src_data = args[1] 13 | pool_data = args[2] 14 | affinity_scores_dir = args[3] 15 | xvec_out_dir = args[4] 16 | pseudo_xvecs_dir = args[5] 17 | rand_level = args[6] 18 | cross_gender = args[7] == "true" 19 | proximity = args[8] 20 | 21 | rand_seed = args[9] 22 | 23 | REGION = 100 24 | WORLD = 200 25 | 26 | random.seed(rand_seed) 27 | 28 | if cross_gender: 29 | print("**Opposite gender speakers will be selected.**") 30 | else: 31 | print("**Same gender speakers will be selected.**") 32 | 33 | print("Randomization level: " + rand_level) 34 | print("Proximity: " + proximity) 35 | # Core logic of anonymization by randomization 36 | def select_random_xvec(top500, pool_xvectors): 37 | # number of random xvectors to select out of pool 38 | #random100mask = np.random.random_integers(0, 199, NR) 39 | random100mask = random.sample(range(WORLD), REGION) 40 | pseudo_spk_list = [x for i, x in enumerate(top500) if i in 41 | random100mask] 42 | pseudo_spk_matrix = np.zeros((REGION, 512), dtype='float64') 43 | for i, spk_aff in enumerate(pseudo_spk_list): 44 | pseudo_spk_matrix[i, :] = pool_xvectors[spk_aff[0]] 45 | # Take mean of 100 randomly selected xvectors 46 | pseudo_xvec = np.mean(pseudo_spk_matrix, axis=0) 47 | return pseudo_xvec 48 | 49 | 50 | gender_rev = {'m': 'f', 'f': 'm'} 51 | src_spk2gender_file = join(src_data, 'spk2gender') 52 | src_spk2utt_file = join(src_data, 'spk2utt') 53 | pool_spk2gender_file = join(pool_data, 'spk2gender') 54 | 55 | src_spk2gender = {} 56 | src_spk2utt = {} 57 | pool_spk2gender = {} 58 | # Read source spk2gender and spk2utt 59 | print("Reading source spk2gender.") 60 | with open(src_spk2gender_file) as f: 61 | for line in f.read().splitlines(): 62 | sp = line.split() 63 | src_spk2gender[sp[0]] = sp[1] 64 | print("Reading source spk2utt.") 65 | with open(src_spk2utt_file) as f: 66 | for line in f.read().splitlines(): 67 | sp = line.split() 68 | src_spk2utt[sp[0]] = sp[1:] 69 | # Read pool spk2gender 70 | print("Reading pool spk2gender.") 71 | with open(pool_spk2gender_file) as f: 72 | for line in f.read().splitlines(): 73 | sp = line.split() 74 | pool_spk2gender[sp[0]] = sp[1] 75 | 76 | # Read pool xvectors 77 | print("Reading pool xvectors.") 78 | pool_xvec_file = join(xvec_out_dir, 'xvectors_'+basename(pool_data), 79 | 'spk_xvector.scp') 80 | pool_xvectors = {} 81 | c = 0 82 | #with open(pool_xvec_file) as f: 83 | # for key, xvec in kaldi_io.read_vec_flt_scp(f): 84 | with ReadHelper('scp:'+pool_xvec_file) as reader: 85 | for key, xvec in reader: 86 | #print key, mat.shape 87 | pool_xvectors[key] = xvec 88 | c += 1 89 | print("Read ", c, "pool xvectors") 90 | 91 | pseudo_xvec_map = {} 92 | pseudo_gender_map = {} 93 | for spk, gender in src_spk2gender.items(): 94 | # Filter the affinity pool by gender 95 | affinity_pool = {} 96 | # If we are doing cross-gender VC, reverse the gender else gender remains same 97 | if cross_gender: 98 | gender = gender_rev[gender] 99 | #print("Filtering pool for spk: "+spk) 100 | pseudo_gender_map[spk] = gender 101 | with open(join(affinity_scores_dir, 'affinity_'+spk)) as f: 102 | for line in f.read().splitlines(): 103 | sp = line.split() 104 | pool_spk = sp[1] 105 | af_score = float(sp[2]) 106 | if pool_spk2gender[pool_spk] == gender: 107 | affinity_pool[pool_spk] = af_score 108 | 109 | # Sort the filtered affinity pool by scores 110 | if proximity == "farthest": 111 | sorted_aff = sorted(affinity_pool.items(), key=operator.itemgetter(1)) 112 | elif proximity == "nearest": 113 | sorted_aff = sorted(affinity_pool.items(), key=operator.itemgetter(1), 114 | reverse=True) 115 | 116 | 117 | # Select WORLD least affinity speakers and then randomly select REGION out of 118 | # them 119 | top_spk = sorted_aff[:WORLD] 120 | if rand_level == 'spk': 121 | # For rand_level = spk, one xvector is assigned to all the utterances 122 | # of a speaker 123 | pseudo_xvec = select_random_xvec(top_spk, pool_xvectors) 124 | # Assign it to all utterances of the current speaker 125 | for uttid in src_spk2utt[spk]: 126 | pseudo_xvec_map[uttid] = pseudo_xvec 127 | elif rand_level == 'utt': 128 | # For rand_level = utt, random xvector is assigned to all the utterances 129 | # of a speaker 130 | for uttid in src_spk2utt[spk]: 131 | # Compute random vector for every utt 132 | pseudo_xvec = select_random_xvec(top_spk, pool_xvectors) 133 | # Assign it to all utterances of the current speaker 134 | pseudo_xvec_map[uttid] = pseudo_xvec 135 | else: 136 | print("rand_level not supported! Errors will happen!") 137 | 138 | 139 | # Write features as ark,scp 140 | print("Writing pseud-speaker xvectors to: "+pseudo_xvecs_dir) 141 | ark_scp_output = 'ark,scp:{}/{}.ark,{}/{}.scp'.format( 142 | pseudo_xvecs_dir, 'pseudo_xvector', 143 | pseudo_xvecs_dir, 'pseudo_xvector') 144 | with WriteHelper(ark_scp_output) as writer: 145 | for uttid, xvec in pseudo_xvec_map.items(): 146 | writer(uttid, xvec) 147 | 148 | print("Writing pseudo-speaker spk2gender.") 149 | with open(join(pseudo_xvecs_dir, 'spk2gender'), 'w') as f: 150 | spk2gen_arr = [spk+' '+gender for spk, gender in pseudo_gender_map.items()] 151 | sorted_spk2gen = sorted(spk2gen_arr) 152 | f.write('\n'.join(sorted_spk2gen) + '\n') 153 | 154 | 155 | -------------------------------------------------------------------------------- /baseline/run_xvector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 David Snyder 3 | # 2017 Johns Hopkins University (Author: Daniel Garcia-Romero) 4 | # 2017 Johns Hopkins University (Author: Daniel Povey) 5 | # 6 | # Copied from egs/sre16/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh (commit e082c17d4a8f8a791428ae4d9f7ceb776aef3f0b). 7 | # 8 | # Apache 2.0. 9 | 10 | # Adapted from the follwing: 11 | # script trains a DNN similar to the recipe described in http://www.danielpovey.com/files/2018_icassp_xvectors.pdf 12 | 13 | . ./cmd.sh 14 | . ./path.sh 15 | 16 | set -e 17 | 18 | stage=1 19 | train_stage=-1 20 | lrate=001 21 | epochs=1 22 | shrink=10 23 | 24 | data=data/train_clean_360 25 | nnet_dir=exp/xvect 26 | egs_dir=$nnet_dir/egs 27 | 28 | . ./utils/parse_options.sh 29 | 30 | num_pdfs=$(awk '{print $2}' $data/utt2spk | sort | uniq -c | wc -l) 31 | 32 | # Now we create the nnet examples using sid/nnet3/xvector/get_egs.sh. 33 | # The argument --num-repeats is related to the number of times a speaker 34 | # repeats per archive. If it seems like you're getting too many archives 35 | # (e.g., more than 200) try increasing the --frames-per-iter option. The 36 | # arguments --min-frames-per-chunk and --max-frames-per-chunk specify the 37 | # minimum and maximum length (in terms of number of frames) of the features 38 | # in the examples. 39 | # 40 | # To make sense of the egs script, it may be necessary to put an "exit 1" 41 | # command immediately after stage 3. Then, inspect 42 | # exp//egs/temp/ranges.* . The ranges files specify the examples that 43 | # will be created, and which archives they will be stored in. Each line of 44 | # ranges.* has the following form: 45 | # 46 | # For example: 47 | # 100304-f-sre2006-kacg-A 1 2 4079 881 23 48 | 49 | # If you're satisfied with the number of archives (e.g., 50-150 archives is 50 | # reasonable) and with the number of examples per speaker (e.g., 1000-5000 51 | # is reasonable) then you can let the script continue to the later stages. 52 | # Otherwise, try increasing or decreasing the --num-repeats option. You might 53 | # need to fiddle with --frames-per-iter. Increasing this value decreases the 54 | # the number of archives and increases the number of examples per archive. 55 | # Decreasing this value increases the number of archives, while decreasing the 56 | # number of examples per archive. 57 | if [ $stage -le 6 ]; then 58 | echo "$0: Getting neural network training egs"; 59 | sid/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \ 60 | --nj 8 \ 61 | --stage 0 \ 62 | --frames-per-iter 100000000 \ 63 | --frames-per-iter-diagnostic 100000 \ 64 | --min-frames-per-chunk 200 \ 65 | --max-frames-per-chunk 400 \ 66 | --num-diagnostic-archives 3 \ 67 | --num-repeats 50 \ 68 | "$data" $egs_dir || exit 1 69 | fi 70 | 71 | if [ $stage -le 7 ]; then 72 | echo "$0: creating neural net configs using the xconfig parser"; 73 | num_targets=$(wc -w $egs_dir/pdf2num | awk '{print $1}') 74 | feat_dim=$(cat $egs_dir/info/feat_dim) 75 | 76 | # This chunk-size corresponds to the maximum number of frames the 77 | # stats layer is able to pool over. In this script, it corresponds 78 | # to 100 seconds. If the input recording is greater than 100 seconds, 79 | # we will compute multiple xvectors from the same recording and average 80 | # to produce the final xvector. 81 | max_chunk_size=10000 82 | 83 | # The smallest number of frames we're comfortable computing an xvector from. 84 | # Note that the hard minimum is given by the left and right context of the 85 | # frame-level layers. 86 | min_chunk_size=25 87 | mkdir -p $nnet_dir/configs 88 | cat < $nnet_dir/configs/network.xconfig 89 | # please note that it is important to have input layer with the name=input 90 | 91 | # The frame-level layers 92 | input dim=${feat_dim} name=input 93 | relu-batchnorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=512 94 | relu-batchnorm-layer name=tdnn2 input=Append(-2,0,2) dim=512 95 | relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=512 96 | relu-batchnorm-layer name=tdnn4 dim=512 97 | relu-batchnorm-layer name=tdnn5 dim=1500 98 | 99 | # The stats pooling layer. Layers after this are segment-level. 100 | # In the config below, the first and last argument (0, and ${max_chunk_size}) 101 | # means that we pool over an input segment starting at frame 0 102 | # and ending at frame ${max_chunk_size} or earlier. The other arguments (1:1) 103 | # mean that no subsampling is performed. 104 | stats-layer name=stats config=mean+stddev(0:1:1:${max_chunk_size}) 105 | 106 | # This is where we usually extract the embedding (aka xvector) from. 107 | relu-batchnorm-layer name=tdnn6 dim=512 input=stats 108 | 109 | # This is where another layer the embedding could be extracted 110 | # from, but usually the previous one works better. 111 | relu-batchnorm-layer name=tdnn7 dim=512 112 | output-layer name=output include-log-softmax=true dim=${num_targets} 113 | EOF 114 | 115 | steps/nnet3/xconfig_to_configs.py \ 116 | --xconfig-file $nnet_dir/configs/network.xconfig \ 117 | --config-dir $nnet_dir/configs 118 | cp $nnet_dir/configs/final.config $nnet_dir/nnet.config 119 | 120 | # These three files will be used by sid/nnet3/xvector/extract_xvectors.sh 121 | echo "output-node name=output input=tdnn6.affine" > $nnet_dir/extract.config 122 | echo "$max_chunk_size" > $nnet_dir/max_chunk_size 123 | echo "$min_chunk_size" > $nnet_dir/min_chunk_size 124 | fi 125 | 126 | dropout_schedule='0,0@0.20,0.1@0.50,0' 127 | srand=123 128 | if [ $stage -le 8 ]; then 129 | steps/nnet3/train_raw_dnn.py --stage=$train_stage \ 130 | --cmd="$train_cmd" \ 131 | --trainer.optimization.proportional-shrink $shrink \ 132 | --trainer.optimization.momentum=0.5 \ 133 | --trainer.optimization.num-jobs-initial=2 \ 134 | --trainer.optimization.num-jobs-final=2 \ 135 | --trainer.optimization.initial-effective-lrate=0.$lrate \ 136 | --trainer.optimization.final-effective-lrate=0.0$lrate \ 137 | --trainer.optimization.minibatch-size=64 \ 138 | --trainer.srand=$srand \ 139 | --trainer.max-param-change=2 \ 140 | --trainer.num-epochs=$epochs \ 141 | --trainer.dropout-schedule="$dropout_schedule" \ 142 | --trainer.shuffle-buffer-size=1000 \ 143 | --egs.frames-per-eg=1 \ 144 | --egs.dir="$egs_dir" \ 145 | --cleanup.remove-egs false \ 146 | --cleanup.preserve-model-interval=5 \ 147 | --use-gpu=true \ 148 | --dir=$nnet_dir || exit 1 149 | fi 150 | 151 | exit 0 152 | -------------------------------------------------------------------------------- /baseline/run_asv_eval_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . ./cmd.sh 4 | . ./path.sh 5 | 6 | set -e 7 | 8 | #ASV_eval training on LibriSpeech train_clean_360 corpus 9 | 10 | 11 | nj=20 12 | voxceleb1_trials=data/voxceleb1_test/trials 13 | 14 | lrate=01709 15 | epochs=1 16 | shrink=10 17 | egs_dir=exp/xvect_egs 18 | 19 | stage=0 20 | train_stage=-1 21 | 22 | . ./utils/parse_options.sh 23 | 24 | nnet_dir=exp/xvect_${lrate}_${epochs} 25 | 26 | if [ $stage -le 0 ]; then 27 | for name in voxceleb1_test train_clean_360; do 28 | steps/make_mfcc.sh \ 29 | --write-utt2num-frames true \ 30 | --mfcc-config conf/mfcc.conf \ 31 | --nj $nj --cmd "$train_cmd" \ 32 | data/$name || exit 1 33 | utils/fix_data_dir.sh data/$name || exit 1 34 | sid/compute_vad_decision.sh \ 35 | --nj $nj --cmd "$train_cmd" \ 36 | --vad-config conf/vad.conf \ 37 | data/$name || exit 1 38 | utils/fix_data_dir.sh data/$name || exit 1 39 | done 40 | fi 41 | 42 | # Now we prepare the features to generate examples for xvector training. 43 | if [ $stage -le 1 ]; then 44 | # This script applies CMVN and removes nonspeech frames. Note that this is somewhat 45 | # wasteful, as it roughly doubles the amount of training data on disk. After 46 | # creating training examples, this can be removed. 47 | local/nnet3/xvector/prepare_feats_for_egs.sh \ 48 | --nj $nj --cmd "$train_cmd" \ 49 | data/train_clean_360 data/train_clean_360_no_sil \ 50 | exp/train_clean_360_no_sil || exit 1 51 | utils/fix_data_dir.sh data/train_clean_360_no_sil || exit 1 52 | fi 53 | 54 | if [ $stage -le 2 ]; then 55 | # Now, we need to remove features that are too short after removing silence 56 | # frames. We want atleast 5s (500 frames) per utterance. 57 | min_len=400 58 | mv data/train_clean_360_no_sil/utt2num_frames data/train_clean_360_no_sil/utt2num_frames.bak 59 | awk -v min_len=${min_len} '$2 > min_len {print $1, $2}' data/train_clean_360_no_sil/utt2num_frames.bak > data/train_clean_360_no_sil/utt2num_frames 60 | utils/filter_scp.pl data/train_clean_360_no_sil/utt2num_frames data/train_clean_360_no_sil/utt2spk > data/train_clean_360_no_sil/utt2spk.new 61 | mv data/train_clean_360_no_sil/utt2spk.new data/train_clean_360_no_sil/utt2spk 62 | utils/fix_data_dir.sh data/train_clean_360_no_sil || exit 1 63 | 64 | # We also want several utterances per speaker. Now we'll throw out speakers 65 | # with fewer than 8 utterances. 66 | min_num_utts=8 67 | awk '{print $1, NF-1}' data/train_clean_360_no_sil/spk2utt > data/train_clean_360_no_sil/spk2num 68 | awk -v min_num_utts=${min_num_utts} '$2 >= min_num_utts {print $1, $2}' data/train_clean_360_no_sil/spk2num | utils/filter_scp.pl - data/train_clean_360_no_sil/spk2utt > data/train_clean_360_no_sil/spk2utt.new 69 | mv data/train_clean_360_no_sil/spk2utt.new data/train_clean_360_no_sil/spk2utt 70 | utils/spk2utt_to_utt2spk.pl data/train_clean_360_no_sil/spk2utt > data/train_clean_360_no_sil/utt2spk 71 | 72 | utils/filter_scp.pl data/train_clean_360_no_sil/utt2spk data/train_clean_360_no_sil/utt2num_frames > data/train_clean_360_no_sil/utt2num_frames.new 73 | mv data/train_clean_360_no_sil/utt2num_frames.new data/train_clean_360_no_sil/utt2num_frames 74 | 75 | # Now we're ready to create training examples. 76 | utils/fix_data_dir.sh data/train_clean_360_no_sil || exit 1 77 | fi 78 | 79 | # Stages 6 through 8 are handled in run_xvector.sh 80 | if [ $stage -le 8 ]; then 81 | ./run_xvector.sh \ 82 | --stage $stage --train-stage $train_stage \ 83 | --data data/train_clean_360_no_sil --nnet-dir $nnet_dir \ 84 | --epochs $epochs --shrink $shrink --lrate $lrate --egs-dir $egs_dir || exit 1 85 | fi 86 | 87 | if [ $stage -le 9 ]; then 88 | # Extract x-vectors for centering, LDA, and PLDA training. 89 | sid/nnet3/xvector/extract_xvectors.sh \ 90 | --cmd "$train_cmd --mem 4G" --nj $nj \ 91 | $nnet_dir data/train_clean_360 \ 92 | $nnet_dir/xvect_train_clean_360 || exit 1 93 | # Extract x-vectors used in the evaluation. 94 | sid/nnet3/xvector/extract_xvectors.sh \ 95 | --cmd "$train_cmd --mem 4G" --nj $nj \ 96 | $nnet_dir data/voxceleb1_test \ 97 | $nnet_dir/xvect_voxceleb1_test || exit 1 98 | fi 99 | 100 | if [ $stage -le 10 ]; then 101 | # Compute the mean vector for centering the evaluation xvectors. 102 | $train_cmd $nnet_dir/xvect_train_clean_360/log/compute_mean.log \ 103 | ivector-mean scp:$nnet_dir/xvect_train_clean_360/xvector.scp \ 104 | $nnet_dir/xvect_train_clean_360/mean.vec || exit 1 105 | 106 | # This script uses LDA to decrease the dimensionality prior to PLDA. 107 | lda_dim=200 108 | $train_cmd $nnet_dir/xvect_train_clean_360/log/lda.log \ 109 | ivector-compute-lda --total-covariance-factor=0.0 --dim=$lda_dim \ 110 | "ark:ivector-subtract-global-mean scp:$nnet_dir/xvect_train_clean_360/xvector.scp ark:- |" \ 111 | ark:data/train_clean_360/utt2spk $nnet_dir/xvect_train_clean_360/transform.mat || exit 1 112 | 113 | # Train the PLDA model. 114 | $train_cmd $nnet_dir/xvect_train_clean_360/log/plda.log \ 115 | ivector-compute-plda ark:data/train_clean_360/spk2utt \ 116 | "ark:ivector-subtract-global-mean scp:$nnet_dir/xvect_train_clean_360/xvector.scp ark:- | transform-vec $nnet_dir/xvect_train_clean_360/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 117 | $nnet_dir/xvect_train_clean_360/plda || exit 1 118 | fi 119 | 120 | if [ $stage -le 11 ]; then 121 | $train_cmd $nnet_dir/scores/log/voxceleb1_test_scoring.log \ 122 | ivector-plda-scoring --normalize-length=true \ 123 | "ivector-copy-plda --smoothing=0.0 $nnet_dir/xvect_train_clean_360/plda - |" \ 124 | "ark:ivector-subtract-global-mean $nnet_dir/xvect_train_clean_360/mean.vec scp:$nnet_dir/xvect_voxceleb1_test/xvector.scp ark:- | transform-vec $nnet_dir/xvect_train_clean_360/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 125 | "ark:ivector-subtract-global-mean $nnet_dir/xvect_train_clean_360/mean.vec scp:$nnet_dir/xvect_voxceleb1_test/xvector.scp ark:- | transform-vec $nnet_dir/xvect_train_clean_360/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ 126 | "cat '$voxceleb1_trials' | cut -d\ --fields=1,2 |" $nnet_dir/scores_voxceleb1_test || exit 1 127 | fi 128 | 129 | if [ $stage -le 12 ]; then 130 | eer=`compute-eer <(local/prepare_for_eer.py $voxceleb1_trials $nnet_dir/scores_voxceleb1_test) 2> /dev/null` 131 | mindcf1=`sid/compute_min_dcf.py --p-target 0.01 $nnet_dir/scores_voxceleb1_test $voxceleb1_trials 2> /dev/null` 132 | mindcf2=`sid/compute_min_dcf.py --p-target 0.001 $nnet_dir/scores_voxceleb1_test $voxceleb1_trials 2> /dev/null` 133 | echo "EER: $eer%" | tee $nnet_dir/scores/voxceleb1_test.txt 134 | echo "minDCF(p-target=0.01): $mindcf1" | tee -a $nnet_dir/scores/voxceleb1_test.txt 135 | echo "minDCF(p-target=0.001): $mindcf2" | tee -a $nnet_dir/scores/voxceleb1_test.txt 136 | 137 | fi 138 | 139 | echo Done 140 | -------------------------------------------------------------------------------- /baseline/RESULTS_mcadams: -------------------------------------------------------------------------------- 1 | ASV-libri_dev_enrolls-libri_dev_trials_f 2 | EER: 8.807% 3 | Cllr (min/act): 0.305/42.903 4 | ROCCH-EER: 8.686% 5 | ASV-libri_dev_enrolls-libri_dev_trials_f_anon 6 | EER: 35.37% 7 | Cllr (min/act): 0.820/116.889 8 | ROCCH-EER: 34.640% 9 | ASV-libri_dev_enrolls-libri_dev_trials_m 10 | EER: 1.242% 11 | Cllr (min/act): 0.035/14.294 12 | ROCCH-EER: 1.128% 13 | ASV-libri_dev_enrolls-libri_dev_trials_m_anon 14 | EER: 17.86% 15 | Cllr (min/act): 0.526/105.727 16 | ROCCH-EER: 17.633% 17 | ASV-libri_dev_enrolls_anon-libri_dev_trials_f_anon 18 | EER: 23.58% 19 | Cllr (min/act): 0.620/11.765 20 | ROCCH-EER: 23.081% 21 | ASV-libri_dev_enrolls_anon-libri_dev_trials_m_anon 22 | EER: 10.56% 23 | Cllr (min/act): 0.359/11.959 24 | ROCCH-EER: 10.433% 25 | ASV-libri_test_enrolls-libri_test_trials_f 26 | EER: 7.664% 27 | Cllr (min/act): 0.184/26.808 28 | ROCCH-EER: 7.179% 29 | ASV-libri_test_enrolls-libri_test_trials_f_anon 30 | EER: 26.09% 31 | Cllr (min/act): 0.686/115.572 32 | ROCCH-EER: 25.575% 33 | ASV-libri_test_enrolls-libri_test_trials_m 34 | EER: 1.114% 35 | Cllr (min/act): 0.041/15.342 36 | ROCCH-EER: 1.065% 37 | ASV-libri_test_enrolls-libri_test_trials_m_anon 38 | EER: 17.82% 39 | Cllr (min/act): 0.498/106.444 40 | ROCCH-EER: 17.532% 41 | ASV-libri_test_enrolls_anon-libri_test_trials_f_anon 42 | EER: 15.15% 43 | Cllr (min/act): 0.489/12.542 44 | ROCCH-EER: 15.002% 45 | ASV-libri_test_enrolls_anon-libri_test_trials_m_anon 46 | EER: 8.463% 47 | Cllr (min/act): 0.263/15.393 48 | ROCCH-EER: 8.237% 49 | ASV-vctk_dev_enrolls-vctk_dev_trials_f 50 | EER: 2.92% 51 | Cllr (min/act): 0.101/1.135 52 | ROCCH-EER: 2.874% 53 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_anon 54 | EER: 35.43% 55 | Cllr (min/act): 0.907/90.524 56 | ROCCH-EER: 35.304% 57 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_common 58 | EER: 2.616% 59 | Cllr (min/act): 0.088/0.869 60 | ROCCH-EER: 2.195% 61 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_common_anon 62 | EER: 34.01% 63 | Cllr (min/act): 0.879/85.860 64 | ROCCH-EER: 33.871% 65 | ASV-vctk_dev_enrolls-vctk_dev_trials_m 66 | EER: 1.439% 67 | Cllr (min/act): 0.052/1.155 68 | ROCCH-EER: 1.375% 69 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_anon 70 | EER: 28.14% 71 | Cllr (min/act): 0.740/98.410 72 | ROCCH-EER: 28.033% 73 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_common 74 | EER: 1.425% 75 | Cllr (min/act): 0.050/1.555 76 | ROCCH-EER: 1.318% 77 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_common_anon 78 | EER: 23.93% 79 | Cllr (min/act): 0.669/90.705 80 | ROCCH-EER: 23.485% 81 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_f_anon 82 | EER: 15.78% 83 | Cllr (min/act): 0.504/39.761 84 | ROCCH-EER: 15.687% 85 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_f_common_anon 86 | EER: 11.63% 87 | Cllr (min/act): 0.368/43.488 88 | ROCCH-EER: 11.197% 89 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_m_anon 90 | EER: 11.12% 91 | Cllr (min/act): 0.384/23.024 92 | ROCCH-EER: 11.075% 93 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_m_common_anon 94 | EER: 10.54% 95 | Cllr (min/act): 0.317/24.945 96 | ROCCH-EER: 10.073% 97 | ASV-vctk_test_enrolls-vctk_test_trials_f 98 | EER: 4.938% 99 | Cllr (min/act): 0.169/1.492 100 | ROCCH-EER: 4.864% 101 | ASV-vctk_test_enrolls-vctk_test_trials_f_anon 102 | EER: 29.99% 103 | Cllr (min/act): 0.795/93.164 104 | ROCCH-EER: 29.905% 105 | ASV-vctk_test_enrolls-vctk_test_trials_f_common 106 | EER: 2.89% 107 | Cllr (min/act): 0.092/0.861 108 | ROCCH-EER: 2.748% 109 | ASV-vctk_test_enrolls-vctk_test_trials_f_common_anon 110 | EER: 30.92% 111 | Cllr (min/act): 0.807/93.959 112 | ROCCH-EER: 30.455% 113 | ASV-vctk_test_enrolls-vctk_test_trials_m 114 | EER: 2.067% 115 | Cllr (min/act): 0.072/1.816 116 | ROCCH-EER: 1.968% 117 | ASV-vctk_test_enrolls-vctk_test_trials_m_anon 118 | EER: 28.3% 119 | Cllr (min/act): 0.720/101.697 120 | ROCCH-EER: 27.994% 121 | ASV-vctk_test_enrolls-vctk_test_trials_m_common 122 | EER: 1.13% 123 | Cllr (min/act): 0.036/1.042 124 | ROCCH-EER: 0.965% 125 | ASV-vctk_test_enrolls-vctk_test_trials_m_common_anon 126 | EER: 24.29% 127 | Cllr (min/act): 0.713/99.336 128 | ROCCH-EER: 23.988% 129 | ASV-vctk_test_enrolls_anon-vctk_test_trials_f_anon 130 | EER: 16.98% 131 | Cllr (min/act): 0.546/41.337 132 | ROCCH-EER: 16.894% 133 | ASV-vctk_test_enrolls_anon-vctk_test_trials_f_common_anon 134 | EER: 14.45% 135 | Cllr (min/act): 0.464/42.745 136 | ROCCH-EER: 14.069% 137 | ASV-vctk_test_enrolls_anon-vctk_test_trials_m_anon 138 | EER: 12.23% 139 | Cllr (min/act): 0.397/25.074 140 | ROCCH-EER: 11.931% 141 | ASV-vctk_test_enrolls_anon-vctk_test_trials_m_common_anon 142 | EER: 11.86% 143 | Cllr (min/act): 0.347/28.230 144 | ROCCH-EER: 11.217% 145 | ASR-libri_dev_asr 146 | %WER 5.24 [ 2084 / 39783, 204 ins, 214 del, 1666 sub ] exp/models/asr_eval/decode_libri_dev_asr_tgsmall/wer_12_0.0 147 | %WER 3.84 [ 1527 / 39783, 179 ins, 148 del, 1200 sub ] exp/models/asr_eval/decode_libri_dev_asr_tglarge/wer_11_0.5 148 | ASR-libri_dev_asr_anon 149 | %WER 12.15 [ 4832 / 39783, 345 ins, 683 del, 3804 sub ] exp/models/asr_eval/decode_libri_dev_asr_anon_tgsmall/wer_15_0.0 150 | %WER 8.74 [ 3476 / 39783, 353 ins, 378 del, 2745 sub ] exp/models/asr_eval/decode_libri_dev_asr_anon_tglarge/wer_15_0.0 151 | ASR-libri_test_asr 152 | %WER 5.55 [ 1944 / 35042, 229 ins, 185 del, 1530 sub ] exp/models/asr_eval/decode_libri_test_asr_tgsmall/wer_12_0.0 153 | %WER 4.17 [ 1460 / 35042, 175 ins, 158 del, 1127 sub ] exp/models/asr_eval/decode_libri_test_asr_tglarge/wer_12_1.0 154 | ASR-libri_test_asr_anon 155 | %WER 11.75 [ 4119 / 35042, 373 ins, 448 del, 3298 sub ] exp/models/asr_eval/decode_libri_test_asr_anon_tgsmall/wer_14_0.0 156 | %WER 8.90 [ 3118 / 35042, 352 ins, 318 del, 2448 sub ] exp/models/asr_eval/decode_libri_test_asr_anon_tglarge/wer_15_0.0 157 | ASR-vctk_dev_asr 158 | %WER 14.00 [ 12132 / 86627, 1148 ins, 1857 del, 9127 sub ] exp/models/asr_eval/decode_vctk_dev_asr_tgsmall/wer_15_0.0 159 | %WER 10.78 [ 9337 / 86627, 988 ins, 1352 del, 6997 sub ] exp/models/asr_eval/decode_vctk_dev_asr_tglarge/wer_14_0.5 160 | ASR-vctk_dev_asr_anon 161 | %WER 30.05 [ 26035 / 86627, 1874 ins, 4609 del, 19552 sub ] exp/models/asr_eval/decode_vctk_dev_asr_anon_tgsmall/wer_16_0.0 162 | %WER 25.56 [ 22138 / 86627, 1764 ins, 3677 del, 16697 sub ] exp/models/asr_eval/decode_vctk_dev_asr_anon_tglarge/wer_16_0.5 163 | ASR-vctk_test_asr 164 | %WER 16.38 [ 14196 / 86642, 1331 ins, 2234 del, 10631 sub ] exp/models/asr_eval/decode_vctk_test_asr_tgsmall/wer_14_0.0 165 | %WER 12.80 [ 11092 / 86642, 1301 ins, 1467 del, 8324 sub ] exp/models/asr_eval/decode_vctk_test_asr_tglarge/wer_14_0.0 166 | ASR-vctk_test_asr_anon 167 | %WER 33.30 [ 28854 / 86642, 1933 ins, 5177 del, 21744 sub ] exp/models/asr_eval/decode_vctk_test_asr_anon_tgsmall/wer_16_0.0 168 | %WER 28.15 [ 24388 / 86642, 2128 ins, 3676 del, 18584 sub ] exp/models/asr_eval/decode_vctk_test_asr_anon_tglarge/wer_17_0.0 169 | -------------------------------------------------------------------------------- /baseline/RESULTS_baseline: -------------------------------------------------------------------------------- 1 | ASV-libri_dev_enrolls-libri_dev_trials_f 2 | EER: 8.665% 3 | Cllr (min/act): 0.304/42.857 4 | ROCCH-EER: 8.600% 5 | ASV-libri_dev_enrolls-libri_dev_trials_f_anon 6 | EER: 50.14% 7 | Cllr (min/act): 0.996/144.112 8 | ROCCH-EER: 48.584% 9 | ASV-libri_dev_enrolls-libri_dev_trials_m 10 | EER: 1.242% 11 | Cllr (min/act): 0.034/14.250 12 | ROCCH-EER: 1.070% 13 | ASV-libri_dev_enrolls-libri_dev_trials_m_anon 14 | EER: 57.76% 15 | Cllr (min/act): 0.999/168.988 16 | ROCCH-EER: 49.932% 17 | ASV-libri_dev_enrolls_anon-libri_dev_trials_f_anon 18 | EER: 36.79% 19 | Cllr (min/act): 0.894/16.345 20 | ROCCH-EER: 36.281% 21 | ASV-libri_dev_enrolls_anon-libri_dev_trials_m_anon 22 | EER: 34.16% 23 | Cllr (min/act): 0.867/24.715 24 | ROCCH-EER: 33.886% 25 | ASV-libri_test_enrolls-libri_test_trials_f 26 | EER: 7.664% 27 | Cllr (min/act): 0.183/26.793 28 | ROCCH-EER: 7.165% 29 | ASV-libri_test_enrolls-libri_test_trials_f_anon 30 | EER: 47.26% 31 | Cllr (min/act): 0.995/151.822 32 | ROCCH-EER: 46.808% 33 | ASV-libri_test_enrolls-libri_test_trials_m 34 | EER: 1.114% 35 | Cllr (min/act): 0.041/15.303 36 | ROCCH-EER: 1.046% 37 | ASV-libri_test_enrolls-libri_test_trials_m_anon 38 | EER: 52.12% 39 | Cllr (min/act): 0.999/166.658 40 | ROCCH-EER: 49.713% 41 | ASV-libri_test_enrolls_anon-libri_test_trials_f_anon 42 | EER: 32.12% 43 | Cllr (min/act): 0.839/16.270 44 | ROCCH-EER: 31.599% 45 | ASV-libri_test_enrolls_anon-libri_test_trials_m_anon 46 | EER: 36.75% 47 | Cllr (min/act): 0.903/33.928 48 | ROCCH-EER: 36.117% 49 | ASV-vctk_dev_enrolls-vctk_dev_trials_f 50 | EER: 2.864% 51 | Cllr (min/act): 0.100/1.134 52 | ROCCH-EER: 2.858% 53 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_anon 54 | EER: 49.97% 55 | Cllr (min/act): 0.989/166.027 56 | ROCCH-EER: 48.043% 57 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_common 58 | EER: 2.616% 59 | Cllr (min/act): 0.088/0.868 60 | ROCCH-EER: 2.181% 61 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_common_anon 62 | EER: 49.71% 63 | Cllr (min/act): 0.995/172.049 64 | ROCCH-EER: 47.895% 65 | ASV-vctk_dev_enrolls-vctk_dev_trials_m 66 | EER: 1.439% 67 | Cllr (min/act): 0.052/1.158 68 | ROCCH-EER: 1.389% 69 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_anon 70 | EER: 53.95% 71 | Cllr (min/act): 1.000/167.511 72 | ROCCH-EER: 49.992% 73 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_common 74 | EER: 1.425% 75 | Cllr (min/act): 0.050/1.559 76 | ROCCH-EER: 1.326% 77 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_common_anon 78 | EER: 54.99% 79 | Cllr (min/act): 0.999/192.924 80 | ROCCH-EER: 49.914% 81 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_f_anon 82 | EER: 26.11% 83 | Cllr (min/act): 0.760/8.414 84 | ROCCH-EER: 25.986% 85 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_f_common_anon 86 | EER: 27.91% 87 | Cllr (min/act): 0.741/7.205 88 | ROCCH-EER: 27.035% 89 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_m_anon 90 | EER: 30.92% 91 | Cllr (min/act): 0.839/23.797 92 | ROCCH-EER: 30.791% 93 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_m_common_anon 94 | EER: 33.33% 95 | Cllr (min/act): 0.840/23.891 96 | ROCCH-EER: 32.847% 97 | ASV-vctk_test_enrolls-vctk_test_trials_f 98 | EER: 4.887% 99 | Cllr (min/act): 0.169/1.495 100 | ROCCH-EER: 4.842% 101 | ASV-vctk_test_enrolls-vctk_test_trials_f_anon 102 | EER: 48.05% 103 | Cllr (min/act): 0.998/146.929 104 | ROCCH-EER: 47.837% 105 | ASV-vctk_test_enrolls-vctk_test_trials_f_common 106 | EER: 2.89% 107 | Cllr (min/act): 0.091/0.866 108 | ROCCH-EER: 2.749% 109 | ASV-vctk_test_enrolls-vctk_test_trials_f_common_anon 110 | EER: 48.27% 111 | Cllr (min/act): 0.994/162.531 112 | ROCCH-EER: 47.635% 113 | ASV-vctk_test_enrolls-vctk_test_trials_m 114 | EER: 2.067% 115 | Cllr (min/act): 0.072/1.817 116 | ROCCH-EER: 1.970% 117 | ASV-vctk_test_enrolls-vctk_test_trials_m_anon 118 | EER: 53.85% 119 | Cllr (min/act): 1.000/167.824 120 | ROCCH-EER: 49.998% 121 | ASV-vctk_test_enrolls-vctk_test_trials_m_common 122 | EER: 1.13% 123 | Cllr (min/act): 0.036/1.041 124 | ROCCH-EER: 0.958% 125 | ASV-vctk_test_enrolls-vctk_test_trials_m_common_anon 126 | EER: 53.39% 127 | Cllr (min/act): 1.000/190.136 128 | ROCCH-EER: 49.955% 129 | ASV-vctk_test_enrolls_anon-vctk_test_trials_f_anon 130 | EER: 31.74% 131 | Cllr (min/act): 0.847/11.527 132 | ROCCH-EER: 31.664% 133 | ASV-vctk_test_enrolls_anon-vctk_test_trials_f_common_anon 134 | EER: 31.21% 135 | Cllr (min/act): 0.830/9.015 136 | ROCCH-EER: 30.791% 137 | ASV-vctk_test_enrolls_anon-vctk_test_trials_m_anon 138 | EER: 30.94% 139 | Cllr (min/act): 0.834/23.842 140 | ROCCH-EER: 30.784% 141 | ASV-vctk_test_enrolls_anon-vctk_test_trials_m_common_anon 142 | EER: 31.07% 143 | Cllr (min/act): 0.835/21.680 144 | ROCCH-EER: 30.794% 145 | ASR-libri_dev_asr 146 | %WER 5.25 [ 2089 / 39783, 172 ins, 250 del, 1667 sub ] exp/models/asr_eval/decode_libri_dev_asr_tgsmall/wer_12_0.5 147 | %WER 3.83 [ 1522 / 39783, 187 ins, 139 del, 1196 sub ] exp/models/asr_eval/decode_libri_dev_asr_tglarge/wer_13_0.0 148 | ASR-libri_dev_asr_anon 149 | %WER 8.76 [ 3485 / 39783, 277 ins, 484 del, 2724 sub ] exp/models/asr_eval/decode_libri_dev_asr_anon_tgsmall/wer_14_0.0 150 | %WER 6.39 [ 2543 / 39783, 270 ins, 284 del, 1989 sub ] exp/models/asr_eval/decode_libri_dev_asr_anon_tglarge/wer_14_0.0 151 | ASR-libri_test_asr 152 | %WER 5.55 [ 1944 / 35042, 214 ins, 197 del, 1533 sub ] exp/models/asr_eval/decode_libri_test_asr_tgsmall/wer_13_0.0 153 | %WER 4.15 [ 1453 / 35042, 173 ins, 158 del, 1122 sub ] exp/models/asr_eval/decode_libri_test_asr_tglarge/wer_12_1.0 154 | ASR-libri_test_asr_anon 155 | %WER 9.15 [ 3205 / 35042, 295 ins, 407 del, 2503 sub ] exp/models/asr_eval/decode_libri_test_asr_anon_tgsmall/wer_14_0.0 156 | %WER 6.73 [ 2359 / 35042, 216 ins, 326 del, 1817 sub ] exp/models/asr_eval/decode_libri_test_asr_anon_tglarge/wer_15_0.5 157 | ASR-vctk_dev_asr 158 | %WER 14.00 [ 12127 / 86627, 1148 ins, 1869 del, 9110 sub ] exp/models/asr_eval/decode_vctk_dev_asr_tgsmall/wer_15_0.0 159 | %WER 10.79 [ 9348 / 86627, 993 ins, 1354 del, 7001 sub ] exp/models/asr_eval/decode_vctk_dev_asr_tglarge/wer_14_0.5 160 | ASR-vctk_dev_asr_anon 161 | %WER 18.92 [ 16390 / 86627, 1486 ins, 2568 del, 12336 sub ] exp/models/asr_eval/decode_vctk_dev_asr_anon_tgsmall/wer_16_0.0 162 | %WER 15.38 [ 13327 / 86627, 1591 ins, 1738 del, 9998 sub ] exp/models/asr_eval/decode_vctk_dev_asr_anon_tglarge/wer_16_0.0 163 | ASR-vctk_test_asr 164 | %WER 16.39 [ 14198 / 86642, 1323 ins, 2243 del, 10632 sub ] exp/models/asr_eval/decode_vctk_test_asr_tgsmall/wer_14_0.0 165 | %WER 12.82 [ 11104 / 86642, 1300 ins, 1472 del, 8332 sub ] exp/models/asr_eval/decode_vctk_test_asr_tglarge/wer_14_0.0 166 | ASR-vctk_test_asr_anon 167 | %WER 18.88 [ 16358 / 86642, 1482 ins, 2647 del, 12229 sub ] exp/models/asr_eval/decode_vctk_test_asr_anon_tgsmall/wer_15_0.0 168 | %WER 15.23 [ 13193 / 86642, 1294 ins, 2044 del, 9855 sub ] exp/models/asr_eval/decode_vctk_test_asr_anon_tglarge/wer_14_0.5 169 | -------------------------------------------------------------------------------- /baseline/RESULTS_baseline_cosine: -------------------------------------------------------------------------------- 1 | ASV-libri_dev_enrolls-libri_dev_trials_f 2 | EER: 8.665% 3 | Cllr (min/act): 0.304/42.857 4 | ROCCH-EER: 8.600% 5 | ASV-libri_dev_enrolls-libri_dev_trials_f_anon 6 | EER: 49.29% 7 | Cllr (min/act): 0.996/146.391 8 | ROCCH-EER: 48.327% 9 | ASV-libri_dev_enrolls-libri_dev_trials_m 10 | EER: 1.242% 11 | Cllr (min/act): 0.034/14.250 12 | ROCCH-EER: 1.070% 13 | ASV-libri_dev_enrolls-libri_dev_trials_m_anon 14 | EER: 58.7% 15 | Cllr (min/act): 0.999/169.570 16 | ROCCH-EER: 49.911% 17 | ASV-libri_dev_enrolls_anon-libri_dev_trials_f_anon 18 | EER: 34.66% 19 | Cllr (min/act): 0.873/15.674 20 | ROCCH-EER: 34.168% 21 | ASV-libri_dev_enrolls_anon-libri_dev_trials_m_anon 22 | EER: 29.66% 23 | Cllr (min/act): 0.799/19.098 24 | ROCCH-EER: 29.356% 25 | ASV-libri_test_enrolls-libri_test_trials_f 26 | EER: 7.664% 27 | Cllr (min/act): 0.183/26.793 28 | ROCCH-EER: 7.165% 29 | ASV-libri_test_enrolls-libri_test_trials_f_anon 30 | EER: 49.09% 31 | Cllr (min/act): 0.996/151.245 32 | ROCCH-EER: 48.088% 33 | ASV-libri_test_enrolls-libri_test_trials_m 34 | EER: 1.114% 35 | Cllr (min/act): 0.041/15.303 36 | ROCCH-EER: 1.046% 37 | ASV-libri_test_enrolls-libri_test_trials_m_anon 38 | EER: 52.78% 39 | Cllr (min/act): 0.999/169.178 40 | ROCCH-EER: 49.968% 41 | ASV-libri_test_enrolls_anon-libri_test_trials_f_anon 42 | EER: 29.38% 43 | Cllr (min/act): 0.806/13.904 44 | ROCCH-EER: 29.112% 45 | ASV-libri_test_enrolls_anon-libri_test_trials_m_anon 46 | EER: 31.85% 47 | Cllr (min/act): 0.840/26.806 48 | ROCCH-EER: 31.324% 49 | ASV-vctk_dev_enrolls-vctk_dev_trials_f 50 | EER: 2.864% 51 | Cllr (min/act): 0.100/1.134 52 | ROCCH-EER: 2.858% 53 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_anon 54 | EER: 49.92% 55 | Cllr (min/act): 0.990/163.294 56 | ROCCH-EER: 48.289% 57 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_common 58 | EER: 2.616% 59 | Cllr (min/act): 0.088/0.868 60 | ROCCH-EER: 2.181% 61 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_common_anon 62 | EER: 49.42% 63 | Cllr (min/act): 0.995/165.686 64 | ROCCH-EER: 48.335% 65 | ASV-vctk_dev_enrolls-vctk_dev_trials_m 66 | EER: 1.439% 67 | Cllr (min/act): 0.052/1.158 68 | ROCCH-EER: 1.389% 69 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_anon 70 | EER: 54.99% 71 | Cllr (min/act): 1.000/165.738 72 | ROCCH-EER: 49.989% 73 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_common 74 | EER: 1.425% 75 | Cllr (min/act): 0.050/1.559 76 | ROCCH-EER: 1.326% 77 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_common_anon 78 | EER: 56.13% 79 | Cllr (min/act): 1.000/191.512 80 | ROCCH-EER: 49.949% 81 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_f_anon 82 | EER: 28.47% 83 | Cllr (min/act): 0.809/9.809 84 | ROCCH-EER: 28.438% 85 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_f_common_anon 86 | EER: 25.29% 87 | Cllr (min/act): 0.733/7.660 88 | ROCCH-EER: 24.873% 89 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_m_anon 90 | EER: 27.74% 91 | Cllr (min/act): 0.787/19.331 92 | ROCCH-EER: 27.522% 93 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_m_common_anon 94 | EER: 28.77% 95 | Cllr (min/act): 0.750/18.813 96 | ROCCH-EER: 27.871% 97 | ASV-vctk_test_enrolls-vctk_test_trials_f 98 | EER: 4.887% 99 | Cllr (min/act): 0.169/1.495 100 | ROCCH-EER: 4.842% 101 | ASV-vctk_test_enrolls-vctk_test_trials_f_anon 102 | EER: 49.23% 103 | Cllr (min/act): 1.000/141.531 104 | ROCCH-EER: 48.932% 105 | ASV-vctk_test_enrolls-vctk_test_trials_f_common 106 | EER: 2.89% 107 | Cllr (min/act): 0.091/0.866 108 | ROCCH-EER: 2.749% 109 | ASV-vctk_test_enrolls-vctk_test_trials_f_common_anon 110 | EER: 49.42% 111 | Cllr (min/act): 0.996/156.555 112 | ROCCH-EER: 48.192% 113 | ASV-vctk_test_enrolls-vctk_test_trials_m 114 | EER: 2.067% 115 | Cllr (min/act): 0.072/1.817 116 | ROCCH-EER: 1.970% 117 | ASV-vctk_test_enrolls-vctk_test_trials_m_anon 118 | EER: 53.67% 119 | Cllr (min/act): 1.000/166.265 120 | ROCCH-EER: 49.998% 121 | ASV-vctk_test_enrolls-vctk_test_trials_m_common 122 | EER: 1.13% 123 | Cllr (min/act): 0.036/1.041 124 | ROCCH-EER: 0.958% 125 | ASV-vctk_test_enrolls-vctk_test_trials_m_common_anon 126 | EER: 55.93% 127 | Cllr (min/act): 1.000/190.058 128 | ROCCH-EER: 49.975% 129 | ASV-vctk_test_enrolls_anon-vctk_test_trials_f_anon 130 | EER: 34.88% 131 | Cllr (min/act): 0.896/13.381 132 | ROCCH-EER: 34.582% 133 | ASV-vctk_test_enrolls_anon-vctk_test_trials_f_common_anon 134 | EER: 31.5% 135 | Cllr (min/act): 0.846/9.833 136 | ROCCH-EER: 30.885% 137 | ASV-vctk_test_enrolls_anon-vctk_test_trials_m_anon 138 | EER: 24.28% 139 | Cllr (min/act): 0.716/15.101 140 | ROCCH-EER: 24.116% 141 | ASV-vctk_test_enrolls_anon-vctk_test_trials_m_common_anon 142 | EER: 22.6% 143 | Cllr (min/act): 0.669/14.047 144 | ROCCH-EER: 22.075% 145 | ASR-libri_dev_asr 146 | %WER 5.25 [ 2089 / 39783, 172 ins, 250 del, 1667 sub ] exp/models/asr_eval/decode_libri_dev_asr_tgsmall/wer_12_0.5 147 | %WER 3.83 [ 1522 / 39783, 187 ins, 139 del, 1196 sub ] exp/models/asr_eval/decode_libri_dev_asr_tglarge/wer_13_0.0 148 | ASR-libri_dev_asr_anon 149 | %WER 9.02 [ 3587 / 39783, 280 ins, 512 del, 2795 sub ] exp/models/asr_eval/decode_libri_dev_asr_anon_tgsmall/wer_14_0.0 150 | %WER 6.53 [ 2597 / 39783, 296 ins, 272 del, 2029 sub ] exp/models/asr_eval/decode_libri_dev_asr_anon_tglarge/wer_13_0.0 151 | ASR-libri_test_asr 152 | %WER 5.55 [ 1944 / 35042, 214 ins, 197 del, 1533 sub ] exp/models/asr_eval/decode_libri_test_asr_tgsmall/wer_13_0.0 153 | %WER 4.15 [ 1453 / 35042, 173 ins, 158 del, 1122 sub ] exp/models/asr_eval/decode_libri_test_asr_tglarge/wer_12_1.0 154 | ASR-libri_test_asr_anon 155 | %WER 9.01 [ 3158 / 35042, 267 ins, 380 del, 2511 sub ] exp/models/asr_eval/decode_libri_test_asr_anon_tgsmall/wer_14_0.0 156 | %WER 6.69 [ 2344 / 35042, 216 ins, 308 del, 1820 sub ] exp/models/asr_eval/decode_libri_test_asr_anon_tglarge/wer_15_0.5 157 | ASR-vctk_dev_asr 158 | %WER 14.00 [ 12127 / 86627, 1148 ins, 1869 del, 9110 sub ] exp/models/asr_eval/decode_vctk_dev_asr_tgsmall/wer_15_0.0 159 | %WER 10.79 [ 9348 / 86627, 993 ins, 1354 del, 7001 sub ] exp/models/asr_eval/decode_vctk_dev_asr_tglarge/wer_14_0.5 160 | ASR-vctk_dev_asr_anon 161 | %WER 19.06 [ 16507 / 86627, 1451 ins, 2652 del, 12404 sub ] exp/models/asr_eval/decode_vctk_dev_asr_anon_tgsmall/wer_16_0.0 162 | %WER 15.57 [ 13487 / 86627, 1583 ins, 1814 del, 10090 sub ] exp/models/asr_eval/decode_vctk_dev_asr_anon_tglarge/wer_16_0.0 163 | ASR-vctk_test_asr 164 | %WER 16.39 [ 14198 / 86642, 1323 ins, 2243 del, 10632 sub ] exp/models/asr_eval/decode_vctk_test_asr_tgsmall/wer_14_0.0 165 | %WER 12.82 [ 11104 / 86642, 1300 ins, 1472 del, 8332 sub ] exp/models/asr_eval/decode_vctk_test_asr_tglarge/wer_14_0.0 166 | ASR-vctk_test_asr_anon 167 | %WER 19.35 [ 16767 / 86642, 1484 ins, 2731 del, 12552 sub ] exp/models/asr_eval/decode_vctk_test_asr_anon_tgsmall/wer_15_0.0 168 | %WER 15.61 [ 13524 / 86642, 1495 ins, 1946 del, 10083 sub ] exp/models/asr_eval/decode_vctk_test_asr_anon_tglarge/wer_16_0.0 169 | -------------------------------------------------------------------------------- /baseline/local/featex/split_am_nsf_data.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | ''' 4 | Script to divide a given data directory for Acoustic modeling 5 | and Neural Source Filter waveform modeling. The dev set will contain some 6 | speakers from train set, and the test set will contain completely disjoint 7 | speakers. 8 | 9 | The root-dir should be in kaldi format, out-dir will be where newly created 10 | train, dev and test will be stored. 11 | 12 | The data division will be with respect to gender. First, ntest speakers 13 | (ntest/2 male, ntest/2 female) will be 14 | split from the dataset with all their utterances to create the test set, then 15 | ndev speakers (ndev/2 male, ndev/2 female) will be sampled from remaining data 16 | and a given percentage (dev-utt-per value can range from 0 to 1) of utterances 17 | will be sampled from each speaker to 18 | create the dev set. 19 | 20 | Remaining data will be used for training. 21 | 22 | To run: 23 | python local/split_am_nsf_data.py 24 | ''' 25 | 26 | import sys 27 | import os 28 | from os.path import join, basename 29 | 30 | args = sys.argv 31 | 32 | root_dir = args[1] 33 | out_dir = args[2] 34 | ntest = int(args[3]) 35 | ndev = int(args[4]) 36 | 37 | print "Config: root_dir =", root_dir, " out_dir =", out_dir 38 | print "Config: ntest =", ntest, " ndev =", ndev 39 | 40 | test_dir = join(out_dir, basename(root_dir) + '_test') 41 | dev_dir = join(out_dir, basename(root_dir) + '_dev') 42 | train_dir = join(out_dir, basename(root_dir) + '_train') 43 | 44 | spk2utt = {} 45 | spk2gender = {} 46 | utt2wav = {} 47 | utt2text = {} 48 | utt2spk = {} 49 | 50 | with open(join(root_dir, 'spk2utt')) as f: 51 | for line in f.read().splitlines(): 52 | sp = line.split() 53 | spkid = sp[0] 54 | utts = sp[1:] 55 | spk2utt[spkid] = utts 56 | 57 | with open(join(root_dir, 'spk2gender')) as f: 58 | for line in f.read().splitlines(): 59 | sp = line.split() 60 | spkid = sp[0] 61 | gen = sp[1] 62 | spk2gender[spkid] = gen 63 | 64 | with open(join(root_dir, 'wav.scp')) as f: 65 | for line in f.read().splitlines(): 66 | sp = line.split() 67 | uttid = sp[0] 68 | wav_path = ' '.join(sp[1:]) 69 | utt2wav[uttid] = wav_path 70 | 71 | with open(join(root_dir, 'text')) as f: 72 | for line in f.read().splitlines(): 73 | sp = line.split() 74 | uttid = sp[0] 75 | text = ' '.join(sp[1:]) 76 | utt2text[uttid] = text 77 | 78 | with open(join(root_dir, 'utt2spk')) as f: 79 | for line in f.read().splitlines(): 80 | sp = line.split() 81 | uttid = sp[0] 82 | spk = sp[1] 83 | utt2spk[uttid] = spk 84 | 85 | 86 | # Find ntest/2 male and ntest/2 female speakers 87 | test_spks = [] 88 | spklim = int(ntest / 2) 89 | print "Per gender speaker limit for test =", spklim 90 | mspk, fspk = 0, 0 91 | for spk, gender in spk2gender.items(): 92 | if mspk < spklim and gender == 'm': 93 | test_spks.append(spk) 94 | mspk += 1 95 | elif fspk < spklim and gender == 'f': 96 | test_spks.append(spk) 97 | fspk += 1 98 | 99 | print "Selected ", len(test_spks), " test speakers." 100 | 101 | # Find dev spks and utts 102 | dev_spks = [] 103 | dev_utts = [] 104 | spklim = int(ndev / 2) 105 | print "Per gender speaker limit for dev = ", spklim 106 | 107 | mspk, fspk = 0, 0 108 | for spk, gender in spk2gender.items(): 109 | if spk not in test_spks: 110 | if mspk < spklim and gender == 'm': 111 | dev_spks.append(spk) 112 | spk_utts = spk2utt[spk] 113 | #utt_frac = int(devper * len(spk_utts)) 114 | dev_utts.extend(spk_utts) 115 | mspk += 1 116 | elif fspk < spklim and gender == 'f': 117 | dev_spks.append(spk) 118 | spk_utts = spk2utt[spk] 119 | #utt_frac = int(devper * len(spk_utts)) 120 | dev_utts.extend(spk_utts) 121 | fspk += 1 122 | 123 | print "Selected ", len(dev_spks), " dev speakers." 124 | 125 | os.makedirs(test_dir) 126 | with open(join(test_dir, 'spk2utt.unsorted'), 'w') as f: 127 | for spk in test_spks: 128 | f.write(spk + ' ' + ' '.join(spk2utt[spk]) + '\n') 129 | 130 | with open(join(test_dir, 'spk2gender.unsorted'), 'w') as f: 131 | for spk in test_spks: 132 | f.write(spk + ' ' + spk2gender[spk] + '\n') 133 | 134 | with open(join(test_dir, 'utt2spk.unsorted'), 'w') as f: 135 | for spk in test_spks: 136 | for utt in spk2utt[spk]: 137 | f.write(utt + ' ' + spk + '\n') 138 | 139 | with open(join(test_dir, 'text.unsorted'), 'w') as f: 140 | for spk in test_spks: 141 | for utt in spk2utt[spk]: 142 | f.write(utt + ' ' + utt2text[utt] + '\n') 143 | 144 | with open(join(test_dir, 'wav.scp.unsorted'), 'w') as f: 145 | for spk in test_spks: 146 | for utt in spk2utt[spk]: 147 | f.write(utt + ' ' + utt2wav[utt] + '\n') 148 | 149 | print "Finished creating test dir." 150 | 151 | os.makedirs(dev_dir) 152 | with open(join(dev_dir, 'spk2utt.unsorted'), 'w') as f: 153 | for spk in dev_spks: 154 | #spk_utts = [utt for utt in spk2utt[spk] if utt in dev_utts] 155 | spk_utts = spk2utt[spk] 156 | f.write(spk + ' ' + ' '.join(spk_utts) + '\n') 157 | 158 | with open(join(dev_dir, 'spk2gender.unsorted'), 'w') as f: 159 | for spk in dev_spks: 160 | f.write(spk + ' ' + spk2gender[spk] + '\n') 161 | 162 | with open(join(dev_dir, 'utt2spk.unsorted'), 'w') as f: 163 | for utt in dev_utts: 164 | f.write(utt + ' ' + utt2spk[utt] + '\n') 165 | 166 | with open(join(dev_dir, 'text.unsorted'), 'w') as f: 167 | for utt in dev_utts: 168 | f.write(utt + ' ' + utt2text[utt] + '\n') 169 | 170 | with open(join(dev_dir, 'wav.scp.unsorted'), 'w') as f: 171 | for utt in dev_utts: 172 | f.write(utt + ' ' + utt2wav[utt] + '\n') 173 | 174 | print "Finished creating dev dir." 175 | 176 | all_spks = list(spk2gender.keys()) 177 | all_utts = list(utt2spk.keys()) 178 | train_spks = [spk for spk in all_spks if spk not in test_spks and spk not in 179 | dev_spks] 180 | train_utts = [utt for utt in all_utts if utt2spk[utt] not in test_spks and 181 | utt not in dev_utts] 182 | print "Selected", len(train_spks), "train speakers and", len(train_utts), "train utterances." 183 | 184 | os.makedirs(train_dir) 185 | with open(join(train_dir, 'spk2utt.unsorted'), 'w') as f: 186 | for spk in train_spks: 187 | spk_utts = [utt for utt in spk2utt[spk] if utt in train_utts] 188 | f.write(spk + ' ' + ' '.join(spk_utts) + '\n') 189 | 190 | with open(join(train_dir, 'spk2gender.unsorted'), 'w') as f: 191 | for spk in train_spks: 192 | f.write(spk + ' ' + spk2gender[spk] + '\n') 193 | 194 | with open(join(train_dir, 'utt2spk.unsorted'), 'w') as f: 195 | for utt in train_utts: 196 | f.write(utt + ' ' + utt2spk[utt] + '\n') 197 | 198 | with open(join(train_dir, 'text.unsorted'), 'w') as f: 199 | for utt in train_utts: 200 | f.write(utt + ' ' + utt2text[utt] + '\n') 201 | 202 | with open(join(train_dir, 'wav.scp.unsorted'), 'w') as f: 203 | for utt in train_utts: 204 | f.write(utt + ' ' + utt2wav[utt] + '\n') 205 | 206 | print "Finished creating train dir." 207 | -------------------------------------------------------------------------------- /baseline/cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script for The First VoicePrivacy Challenge 2020 3 | # 4 | # This clean-up script should be used before re-running anonymization system (for example, with different parametrs, models, etc.) 5 | # in order to delete all old directories (in data, exp, ..., which should be updated) from the previous run of anonymization and evaluation sripts. 6 | # 7 | # 8 | 9 | set -e 10 | 11 | # ls | awk '{for (i=1; i<=NF; ++i) print $i}' | sort >> ../cleanup.sh 12 | 13 | names=' 14 | libri_dev_asr 15 | libri_dev_asr_anon 16 | libri_dev_enrolls 17 | libri_dev_enrolls_anon 18 | libri_dev_trials_all 19 | libri_dev_trials_f 20 | libri_dev_trials_f_anon 21 | libri_dev_trials_m 22 | libri_dev_trials_m_anon 23 | libri_test_asr 24 | libri_test_asr_anon 25 | libri_test_enrolls 26 | libri_test_enrolls_anon 27 | libri_test_trials_all 28 | libri_test_trials_f 29 | libri_test_trials_f_anon 30 | libri_test_trials_m 31 | libri_test_trials_m_anon 32 | vctk_dev_asr 33 | vctk_dev_asr_anon 34 | vctk_dev_enrolls 35 | vctk_dev_enrolls_anon 36 | vctk_dev_trials_all 37 | vctk_dev_trials_f 38 | vctk_dev_trials_f_all 39 | vctk_dev_trials_f_all_anon 40 | vctk_dev_trials_f_anon 41 | vctk_dev_trials_f_common 42 | vctk_dev_trials_f_common_anon 43 | vctk_dev_trials_m 44 | vctk_dev_trials_m_all 45 | vctk_dev_trials_m_all_anon 46 | vctk_dev_trials_m_anon 47 | vctk_dev_trials_m_common 48 | vctk_dev_trials_m_common_anon 49 | vctk_test_asr 50 | vctk_test_asr_anon 51 | vctk_test_enrolls 52 | vctk_test_enrolls_anon 53 | vctk_test_trials_all 54 | vctk_test_trials_f 55 | vctk_test_trials_f_all 56 | vctk_test_trials_f_all_anon 57 | vctk_test_trials_f_anon 58 | vctk_test_trials_f_common 59 | vctk_test_trials_f_common_anon 60 | vctk_test_trials_m 61 | vctk_test_trials_m_all 62 | vctk_test_trials_m_all_anon 63 | vctk_test_trials_m_anon 64 | vctk_test_trials_m_common 65 | vctk_test_trials_m_common_anon 66 | libri_dev_trials_f_hires 67 | libri_test_asr_hires 68 | vctk_dev_asr_hires 69 | vctk_test_trials_f_all_hires 70 | libri_dev_asr_anon_hires 71 | libri_dev_trials_m_hires 72 | libri_test_enrolls_hires 73 | vctk_dev_enrolls_hires 74 | vctk_test_asr_anon_hires 75 | vctk_test_trials_m_all_hires 76 | libri_dev_asr_hires 77 | libri_test_trials_f_hires 78 | vctk_dev_trials_f_all_hires 79 | vctk_test_asr_hires 80 | libri_dev_enrolls_hires 81 | libri_test_asr_anon_hires 82 | libri_test_trials_m_hires 83 | vctk_dev_asr_anon_hires 84 | vctk_dev_trials_m_all_hires 85 | vctk_test_enrolls_hires' 86 | 87 | 88 | for name in $names; do 89 | dir=data/$name 90 | #[ ! -d $dir ] && echo $dir 91 | if [ -d $dir ]; then echo $dir; rm -r $dir; fi 92 | done 93 | 94 | names=' 95 | decode_libri_dev_asr_anon_tglarge 96 | decode_libri_dev_asr_anon_tgsmall 97 | decode_libri_dev_asr_tglarge 98 | decode_libri_dev_asr_tgsmall 99 | decode_libri_test_asr_anon_tglarge 100 | decode_libri_test_asr_anon_tgsmall 101 | decode_libri_test_asr_tglarge 102 | decode_libri_test_asr_tgsmall 103 | decode_vctk_dev_asr_anon_tglarge 104 | decode_vctk_dev_asr_anon_tgsmall 105 | decode_vctk_dev_asr_tglarge 106 | decode_vctk_dev_asr_tgsmall 107 | decode_vctk_test_asr_anon_tglarge 108 | decode_vctk_test_asr_anon_tgsmall 109 | decode_vctk_test_asr_tglarge 110 | decode_vctk_test_asr_tgsmall' 111 | 112 | for name in $names; do 113 | dir=exp/models/asr_eval/$name 114 | #[ ! -d $dir ] && echo $dir 115 | if [ -d $dir ]; then echo $dir; rm -r $dir; fi 116 | done 117 | 118 | names=' 119 | ivect_libri_dev_asr 120 | ivect_libri_dev_asr_anon 121 | ivect_libri_test_asr 122 | ivect_libri_test_asr_anon 123 | ivect_vctk_dev_asr 124 | ivect_vctk_dev_asr_anon 125 | ivect_vctk_test_asr 126 | ivect_vctk_test_asr_anon' 127 | 128 | for name in $names; do 129 | dir=exp/models/asr_eval/extractor/$name 130 | #[ ! -d $dir ] && echo $dir 131 | if [ -d $dir ]; then echo $dir; rm -r $dir; fi 132 | done 133 | 134 | names=' 135 | ivectors_libri_dev_enrolls_hires 136 | ivectors_libri_dev_trials_f_hires 137 | ivectors_libri_dev_trials_m_hires 138 | ivectors_libri_test_enrolls_hires 139 | ivectors_libri_test_trials_f_hires 140 | ivectors_libri_test_trials_m_hires 141 | ivectors_vctk_dev_enrolls_hires 142 | ivectors_vctk_dev_trials_f_all_hires 143 | ivectors_vctk_dev_trials_m_all_hires 144 | ivectors_vctk_test_enrolls_hires 145 | ivectors_vctk_test_trials_f_all_hires 146 | ivectors_vctk_test_trials_m_all_hires 147 | ppg_libri_dev_enrolls 148 | ppg_libri_dev_trials_f 149 | ppg_libri_dev_trials_m 150 | ppg_libri_test_enrolls 151 | ppg_libri_test_trials_f 152 | ppg_libri_test_trials_m 153 | ppg_vctk_dev_enrolls 154 | ppg_vctk_dev_trials_f_all 155 | ppg_vctk_dev_trials_m_all 156 | ppg_vctk_test_enrolls 157 | ppg_vctk_test_trials_f_all 158 | ppg_vctk_test_trials_m_all' 159 | 160 | for name in $names; do 161 | dir=exp/models/1_asr_am/exp/nnet3_cleaned/$name 162 | #[ ! -d $dir ] && echo $dir 163 | if [ -d $dir ]; then echo $dir; rm -r $dir; fi 164 | done 165 | 166 | names=' 167 | xvect_libri_dev_enrolls 168 | xvect_libri_dev_enrolls_anon 169 | xvect_libri_dev_trials_f 170 | xvect_libri_dev_trials_f_anon 171 | xvect_libri_dev_trials_m 172 | xvect_libri_dev_trials_m_anon 173 | xvect_libri_test_enrolls 174 | xvect_libri_test_enrolls_anon 175 | xvect_libri_test_trials_f 176 | xvect_libri_test_trials_f_anon 177 | xvect_libri_test_trials_m 178 | xvect_libri_test_trials_m_anon 179 | xvect_vctk_dev_enrolls 180 | xvect_vctk_dev_enrolls_anon 181 | xvect_vctk_dev_trials_f 182 | xvect_vctk_dev_trials_f_anon 183 | xvect_vctk_dev_trials_f_common 184 | xvect_vctk_dev_trials_f_common_anon 185 | xvect_vctk_dev_trials_m 186 | xvect_vctk_dev_trials_m_anon 187 | xvect_vctk_dev_trials_m_common 188 | xvect_vctk_dev_trials_m_common_anon 189 | xvect_vctk_test_enrolls 190 | xvect_vctk_test_enrolls_anon 191 | xvect_vctk_test_trials_f 192 | xvect_vctk_test_trials_f_anon 193 | xvect_vctk_test_trials_f_common 194 | xvect_vctk_test_trials_f_common_anon 195 | xvect_vctk_test_trials_m 196 | xvect_vctk_test_trials_m_anon 197 | xvect_vctk_test_trials_m_common 198 | xvect_vctk_test_trials_m_common_anon' 199 | 200 | for name in $names; do 201 | dir=exp/models/asv_eval/xvect_01709_1/$name 202 | #[ ! -d $dir ] && echo $dir 203 | if [ -d $dir ]; then echo $dir; rm -r $dir; fi 204 | done 205 | 206 | names=' 207 | xvectors_libri_dev_enrolls 208 | xvectors_libri_dev_trials_f 209 | xvectors_libri_dev_trials_m 210 | xvectors_libri_test_enrolls 211 | xvectors_libri_test_trials_f 212 | xvectors_libri_test_trials_m 213 | xvectors_vctk_dev_enrolls 214 | xvectors_vctk_dev_trials_f_all 215 | xvectors_vctk_dev_trials_m_all 216 | xvectors_vctk_test_enrolls 217 | xvectors_vctk_test_trials_f_all 218 | xvectors_vctk_test_trials_m_all' 219 | 220 | for name in $names; do 221 | dir=exp/models/2_xvect_extr/exp/xvector_nnet_1a/anon/$name 222 | #[ ! -d $dir ] && echo $dir 223 | if [ -d $dir ]; then echo $dir; rm -r $dir; fi 224 | done 225 | 226 | names=' 227 | libri_dev_enrolls 228 | libri_dev_trials_f 229 | libri_dev_trials_m 230 | libri_test_enrolls 231 | libri_test_trials_f 232 | libri_test_trials_m 233 | vctk_dev_enrolls 234 | vctk_dev_trials_f_all 235 | vctk_dev_trials_m_all 236 | vctk_test_enrolls 237 | vctk_test_trials_f_all 238 | vctk_test_trials_m_all' 239 | 240 | for name in $names; do 241 | dir=exp/am_nsf_data/$name 242 | #[ ! -d $dir ] && echo $dir 243 | if [ -d $dir ]; then echo $dir; rm -r $dir; fi 244 | done 245 | 246 | dir="exp/tmp" 247 | if [ -d $dir ]; then echo $dir; rm -r $dir; fi 248 | 249 | echo Done 250 | --------------------------------------------------------------------------------