├── baseline
    ├── conf
    │   ├── pitch.conf
    │   ├── mspec.conf
    │   ├── online_cmvn.conf
    │   ├── vad.conf
    │   ├── mfcc.conf
    │   └── mfcc_hires.conf
    ├── sid
    ├── steps
    ├── utils
    ├── local
    │   ├── featex
    │   │   ├── f0_yaapt
    │   │   │   ├── amfm_decompy
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── sample.wav
    │   │   │   │   ├── LICENSE.txt
    │   │   │   │   └── basic_tools.py
    │   │   │   ├── 00_batch.sh
    │   │   │   └── get_f0.py
    │   │   ├── make_pitch_yaapt.sh
    │   │   ├── 02_extract_pitch.sh
    │   │   ├── create_ppg_data.py
    │   │   ├── create_melspec_data.py
    │   │   ├── compute_ivect.sh
    │   │   ├── compute_hires.sh
    │   │   ├── extract_melspec.sh
    │   │   ├── extract_bn_nat.sh
    │   │   ├── 01_extract_xvectors.sh
    │   │   ├── 00_make_am_nsf_data.sh
    │   │   ├── split_test_data.py
    │   │   ├── compute_xvect.sh
    │   │   ├── create_xvector_f0_data.py
    │   │   ├── extract_ppg.sh
    │   │   ├── extract_bn.sh
    │   │   ├── check_pitch_feats.py
    │   │   ├── nnet3_compute.sh
    │   │   ├── 03_make_am_nsf_netcdf_data.sh
    │   │   └── split_am_nsf_data.py
    │   ├── chain
    │   │   ├── run_cnn_tdnn.sh
    │   │   ├── run_chain_common.sh
    │   │   └── compare_wer.sh
    │   ├── vc
    │   │   ├── nsf
    │   │   │   ├── 00_run.sh
    │   │   │   ├── init.sh
    │   │   │   └── 01_gen.sh
    │   │   └── am
    │   │   │   ├── 00_run.sh
    │   │   │   ├── init.sh
    │   │   │   └── 01_gen.sh
    │   ├── download_models.sh
    │   ├── prepare_for_eer.py
    │   ├── similarity_matrices
    │   │   ├── compute_DeID.py
    │   │   ├── compute_Gvd.py
    │   │   ├── scores_calibration.py
    │   │   ├── create_trial.py
    │   │   ├── compute_similarity_matrix.py
    │   │   └── compute_similarity_matrices_metrics.sh
    │   ├── make_eval2.sh
    │   ├── anon
    │   │   ├── make_netcdf.sh
    │   │   ├── compute_spk_pool_affinity.sh
    │   │   ├── compute_spk_pool_cosine.py
    │   │   ├── make_pseudospeaker.sh
    │   │   ├── anonymize_data_dir.sh
    │   │   ├── anonymise_dir_mcadams.py
    │   │   └── gen_pseudo_xvecs.py
    │   ├── download_data.sh
    │   ├── train_model_ss_am.sh
    │   ├── train_model_nsf.sh
    │   ├── plot
    │   │   ├── plot_spk_dur.py
    │   │   ├── plot_spk_xvectors.py
    │   │   └── plot_spk_xvectors_voxceleb.py
    │   ├── scoring
    │   │   └── linkability
    │   │   │   └── compute_linkability.py
    │   ├── asr_eval.sh
    │   ├── score.sh
    │   ├── download_and_untar.sh
    │   ├── run_cleanup_segmentation.sh
    │   ├── data_prep_libritts.sh
    │   ├── data_prep_adv.sh
    │   ├── run_prepfeats_am_nsf.sh
    │   ├── create_uniform_segments.py
    │   ├── asv_eval.sh
    │   ├── nnet3
    │   │   ├── tuning
    │   │   │   ├── run_tdnn_1a.sh
    │   │   │   └── run_tdnn_1b.sh
    │   │   └── run_tdnn.sh
    │   ├── fix_eval2.py
    │   └── split_long_utterance.sh
    ├── local_librispeech
    ├── fig
    │   ├── data_dir.jpg
    │   └── baseline_git.jpg
    ├── path.sh
    ├── cmd.sh
    ├── run_asr_eval_train.sh
    ├── run_xvector.sh
    ├── run_asv_eval_train.sh
    ├── RESULTS_mcadams
    ├── RESULTS_baseline
    ├── RESULTS_baseline_cosine
    └── cleanup.sh
├── requirements.txt
├── .gitignore
├── .gitmodules
├── nii_cmake
    └── CMakeLists.txt
└── install.sh


/baseline/conf/pitch.conf:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/baseline/sid:
--------------------------------------------------------------------------------
1 | ../kaldi/egs/sre08/v1/sid


--------------------------------------------------------------------------------
/baseline/steps:
--------------------------------------------------------------------------------
1 | ../kaldi/egs/wsj/s5/steps


--------------------------------------------------------------------------------
/baseline/utils:
--------------------------------------------------------------------------------
1 | ../kaldi/egs/wsj/s5/utils


--------------------------------------------------------------------------------
/baseline/local/featex/f0_yaapt/amfm_decompy/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/baseline/local_librispeech:
--------------------------------------------------------------------------------
1 | ../kaldi/egs/librispeech/s5/local


--------------------------------------------------------------------------------
/baseline/local/chain/run_cnn_tdnn.sh:
--------------------------------------------------------------------------------
1 | tuning/run_cnn_tdnn_1a.sh
2 | 


--------------------------------------------------------------------------------
/baseline/conf/mspec.conf:
--------------------------------------------------------------------------------
1 | --num-mel-bins=80
2 | --allow-downsample=true
3 | 


--------------------------------------------------------------------------------
/baseline/conf/online_cmvn.conf:
--------------------------------------------------------------------------------
1 | # configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
2 | 


--------------------------------------------------------------------------------
/baseline/fig/data_dir.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Voice-Privacy-Challenge/Voice-Privacy-Challenge-2020/HEAD/baseline/fig/data_dir.jpg


--------------------------------------------------------------------------------
/baseline/conf/vad.conf:
--------------------------------------------------------------------------------
1 | --vad-energy-threshold=5.5
2 | --vad-energy-mean-scale=0.5
3 | --vad-proportion-threshold=0.12
4 | --vad-frames-context=2
5 | 


--------------------------------------------------------------------------------
/baseline/fig/baseline_git.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Voice-Privacy-Challenge/Voice-Privacy-Challenge-2020/HEAD/baseline/fig/baseline_git.jpg


--------------------------------------------------------------------------------
/baseline/local/featex/f0_yaapt/amfm_decompy/sample.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Voice-Privacy-Challenge/Voice-Privacy-Challenge-2020/HEAD/baseline/local/featex/f0_yaapt/amfm_decompy/sample.wav


--------------------------------------------------------------------------------
/baseline/conf/mfcc.conf:
--------------------------------------------------------------------------------
1 | --sample-frequency=16000
2 | --frame-length=25 # the default is 25
3 | --low-freq=20 # the default.
4 | --high-freq=7600 # the default is zero meaning use the Nyquist (8k in this case).
5 | --num-mel-bins=30
6 | --num-ceps=30
7 | --snip-edges=false
8 | --allow-downsample=true
9 | 


--------------------------------------------------------------------------------
/baseline/local/vc/nsf/00_run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | . path.sh
 4 | . local/vc/nsf/init.sh
 5 | 
 6 | export AM_NSF_FEAT_OUT="$1"
 7 | 
 8 | proj_dir=${nii_scripts}/waveform-modeling/project-NSF
 9 | 
10 | # preparing data
11 | python ${proj_dir}/../SCRIPTS/00_prepare_data.py config_libri_nsf || exit 1
12 | 
13 | # model training
14 | python ${proj_dir}/../SCRIPTS/01_train_network.py config_libri_nsf || exit 1
15 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | backports.functools-lru-cache==1.6.1
 2 | cycler==0.10.0
 3 | Cython==0.29.14
 4 | kaldiio==2.15.1
 5 | kiwisolver==1.1.0
 6 | matplotlib==3.3.0
 7 | numba==0.48
 8 | numpy==1.16.5
 9 | pandas==1.0.1
10 | pyparsing==2.4.6
11 | python-dateutil==2.8.1
12 | pytz==2019.3
13 | scipy==1.5.2
14 | seaborn==0.10.1
15 | six==1.13.0
16 | subprocess32==3.5.4
17 | librosa==0.7.1
18 | tabulate
19 | tikzplotlib
20 | 


--------------------------------------------------------------------------------
/baseline/local/vc/am/00_run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | . path.sh
 4 | . local/vc/am/init.sh
 5 | 
 6 | export AM_NSF_FEAT_OUT="$1"
 7 | 
 8 | proj_dir=${nii_scripts}/acoustic-modeling/project-DAR-continuous
 9 | 
10 | 
11 | # preparing the training data
12 | python ${proj_dir}/../SCRIPTS/01_prepare.py config_libri_am || exit 1
13 | 
14 | # training the RNN model
15 | python ${proj_dir}/../SCRIPTS/02_train.py config_libri_am || exit 1
16 | 
17 | 


--------------------------------------------------------------------------------
/baseline/local/featex/make_pitch_yaapt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | wav_scp="$1"
 4 | pitch_dir="$2"
 5 | temp_wav="$3"
 6 | 
 7 | echo $wav_scp, $pitch_dir, $temp_wav
 8 | 
 9 | while read line; do
10 |   echo $line
11 |   utid=$(echo $line | cut -d' ' -f1)
12 |   rspec=$(echo $line | cut -d' ' -f2-)
13 |   wav-copy "$rspec" $temp_wav
14 |   python local/featex/f0_yaapt/get_f0.py $temp_wav $pitch_dir/${utid}.f0
15 | done < ${wav_scp}
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /venv/
 2 | .nfs*
 3 | *.pyc
 4 | __pycache__/
 5 | *.swp
 6 | *.tar.gz
 7 | *.tar.xz
 8 | netcdf*/
 9 | /.done-*
10 | /boost_*/
11 | /flac-*/
12 | /env.sh
13 | /baseline/exp/
14 | /baseline/LibriSpeech/
15 | /baseline/data/
16 | /baseline/mfcc/
17 | /baseline/corpora
18 | /baseline/corpora/
19 | /baseline/run_temp.sh
20 | /baseline/run_nt_test.sh
21 | /Miniconda*
22 | temp*/
23 | /baseline/libri*
24 | /baseline/vctk_dev_enrolls_anon*
25 | 


--------------------------------------------------------------------------------
/baseline/local/featex/02_extract_pitch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | . path.sh
 4 | . cmd.sh
 5 | 
 6 | nj=20
 7 | 
 8 | . utils/parse_options.sh
 9 | 
10 | if [ $# != 1 ]; then
11 |   echo "Usage: "
12 |   echo "  $0 [options] <data-dir>"
13 |   echo "Options"
14 |   echo "   --nj=40     # Number of CPUs to use for feature extraction"
15 |   exit 1;
16 | fi
17 | 
18 | data_dir=$1
19 | pitch_dir=${data_dir}/pitch
20 | 
21 | local/featex/make_pitch.sh --nj $nj --cmd "$train_cmd" ${data_dir} \
22 | 	exp/make_pitch ${pitch_dir}
23 | 


--------------------------------------------------------------------------------
/baseline/local/featex/create_ppg_data.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from os.path import join, basename
 3 | 
 4 | from ioTools import readwrite
 5 | from kaldiio import ReadHelper
 6 | 
 7 | args = sys.argv
 8 | ppg_file = args[1]
 9 | out_dir = args[2]
10 | 
11 | ppg_out_dir = join(out_dir, "ppg")
12 | 
13 | print("Writing PPG feats.....")
14 | # Write ppg features
15 | with ReadHelper('scp:'+ppg_file) as reader:
16 |     for key, mat in reader:
17 |         readwrite.write_raw_mat(mat, join(ppg_out_dir, key+'.ppg'))
18 | print("Finished writing PPG feats.")
19 | 
20 | 


--------------------------------------------------------------------------------
/baseline/path.sh:
--------------------------------------------------------------------------------
 1 | export KALDI_ROOT=$(realpath ../kaldi)
 2 | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH
 3 | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 4 | . $KALDI_ROOT/tools/config/common_path.sh
 5 | export LC_ALL=C
 6 | 
 7 | . ../env.sh
 8 | 
 9 | # based on https://stackoverflow.com/a/5947802/12499892
10 | export GREEN='\033[0;32m'
11 | export RED='\033[0;31m'
12 | export NC='\033[0m' # No Color
13 | 


--------------------------------------------------------------------------------
/baseline/local/featex/f0_yaapt/00_batch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # ---- batch script to extract F0
 3 | # Usage:
 4 | #   1. config INPUT_WAV_DIR and OUTPUT_F0_DIR
 5 | #   2. run sh 00_batch.sh
 6 | # No dependency required
 7 | 
 8 | # Directory of input waveform
 9 | INPUT_WAV_DIR=$PWD/../../../test_sample/
10 | # Directory to store output F0
11 | OUTPUT_F0_DIR=$PWD/../../../test_sample/
12 | 
13 | mkdir ${OUTPUT_F0_DIR}
14 | ls ${INPUT_WAV_DIR} | grep wav > file.lst
15 | cat file.lst | parallel python3 get_f0.py ${INPUT_WAV_DIR}/{/.}.wav ${OUTPUT_F0_DIR}/{/.}.f0
16 | rm file.lst
17 | 


--------------------------------------------------------------------------------
/baseline/local/featex/create_melspec_data.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import sys
 3 | from os.path import join, basename
 4 | 
 5 | from ioTools import readwrite
 6 | from kaldiio import WriteHelper, ReadHelper
 7 | 
 8 | args = sys.argv
 9 | mspec_file = args[1]
10 | out_dir = args[2]
11 | 
12 | mspec_out_dir = join(out_dir, "mel")
13 | 
14 | print("Writing MEL feats.....")
15 | # Write mspec features
16 | with ReadHelper('scp:'+mspec_file) as reader:
17 |     for key, mat in reader:
18 |     #print key, mat.shape
19 |         readwrite.write_raw_mat(mat, join(mspec_out_dir, key+'.mel'))
20 | print("Finished writing MEL feats.")
21 | 


--------------------------------------------------------------------------------
/baseline/local/featex/compute_ivect.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | #export CUDA_VISIBLE_DEVICES=1,2,3,4,5,6,7
 6 | 
 7 | . ./cmd.sh
 8 | . ./path.sh
 9 | 
10 | nj=20
11 | model=exp/nnet3_cleaned
12 | dsets=
13 | 
14 | . parse_options.sh
15 | 
16 | for dset in $dsets; do
17 |   expo=$model/ivectors_${dset}_hires
18 |   mark=$expo/.done
19 |   if [ ! -f $mark ]; then
20 |     [ -d $expo ] && rm -r $expo
21 |     steps/online/nnet2/extract_ivectors_online.sh \
22 | 	  --cmd "$train_cmd" --nj $nj data/${dset}_hires \
23 | 	  $model/extractor $expo || exit 1
24 | 	touch $mark
25 |   fi
26 | done
27 | 
28 | echo Done
29 | 


--------------------------------------------------------------------------------
/baseline/local/download_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | home=$PWD
 6 | expo=exp
 7 | check=$expo/models/asv_eval/xvect_01709_1/final.raw
 8 | 
 9 | if [ ! -f $check ]; then
10 |   mkdir -p $expo
11 |   cd $expo
12 |   if [ ! -f models.tar.gz ]; then
13 |     echo "  You will be prompted to enter password for getdata@voiceprivacychallenge.univ-avignon.fr"
14 |     sftp getdata@voiceprivacychallenge.univ-avignon.fr <<EOF
15 | cd challengedata/baseline
16 | get models.tar.gz
17 | bye
18 | EOF
19 |   fi
20 |   echo '  Unpacking models...'
21 |   tar -xf models.tar.gz || exit 1
22 |   cd $home
23 | fi
24 | 
25 | echo '  Done'
26 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "kaldi"]
 2 | 	path = kaldi
 3 | 	url = https://github.com/kaldi-asr/kaldi.git
 4 | [submodule "nii"]
 5 | 	path = nii
 6 | 	url = https://github.com/nii-yamagishilab/project-CURRENNT-public
 7 | [submodule "nii_scripts"]
 8 | 	path = nii_scripts
 9 | 	url = https://github.com/nii-yamagishilab/project-CURRENNT-scripts
10 | [submodule "cllr"]
11 | 	path = cllr
12 | 	url = https://gitlab.eurecom.fr/nautsch/cllr.git
13 | [submodule "anonymization_metrics"]
14 | 	path = anonymization_metrics
15 | 	url = https://gitlab.inria.fr/magnet/anonymization_metrics.git
16 | [submodule "zebra"]
17 | 	path = zebra
18 | 	url = https://gitlab.eurecom.fr/nautsch/zebra.git
19 | 


--------------------------------------------------------------------------------
/baseline/local/featex/compute_hires.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | #export CUDA_VISIBLE_DEVICES=1,2,3,4,5,6,7
 6 | 
 7 | . ./cmd.sh
 8 | . ./path.sh
 9 | 
10 | nj=20
11 | dsets=
12 | . parse_options.sh
13 | 
14 | for dset in $dsets; do
15 |   expo=data/${dset}_hires
16 |   mark=$expo/.done
17 |   if [ ! -f $mark ]; then
18 |     [ -d $expo ] && rm -r $expo
19 | 	utils/copy_data_dir.sh data/$dset $expo || exit 1
20 |     steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \
21 |       --cmd "$train_cmd" $expo || exit 1
22 |     steps/compute_cmvn_stats.sh $expo || exit 1
23 |     utils/fix_data_dir.sh $expo || exit 1
24 | 	touch $mark
25 |   fi
26 | done
27 | 
28 | echo Done
29 | 


--------------------------------------------------------------------------------
/baseline/local/prepare_for_eer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #
 3 | # Copyright 2015   David Snyder
 4 | # Apache 2.0.
 5 | #
 6 | # Copied from egs/sre10/v1/local/prepare_for_eer.py (commit 9cb4c4c2fb0223ee90c38d98af11305074eb7ef8)
 7 | #
 8 | # Given a trials and scores file, this script
 9 | # prepares input for the binary compute-eer.
10 | import sys
11 | trials = open(sys.argv[1], 'r').readlines()
12 | scores = open(sys.argv[2], 'r').readlines()
13 | spkrutt2target = {}
14 | for line in trials:
15 |   spkr, utt, target = line.strip().split()
16 |   spkrutt2target[spkr+utt]=target
17 | for line in scores:
18 |   spkr, utt, score = line.strip().split()
19 |   print("{} {}".format(score, spkrutt2target[spkr+utt]))
20 | 


--------------------------------------------------------------------------------
/baseline/conf/mfcc_hires.conf:
--------------------------------------------------------------------------------
 1 | # config for high-resolution MFCC features, intended for neural network training
 2 | # Note: we keep all cepstra, so it has the same info as filterbank features,
 3 | # but MFCC is more easily compressible (because less correlated) which is why 
 4 | # we prefer this method.
 5 | --use-energy=false   # use average of log energy, not energy.
 6 | --num-mel-bins=40     # similar to Google's setup.
 7 | --num-ceps=40     # there is no dimensionality reduction.
 8 | --low-freq=20     # low cutoff frequency for mel bins... this is high-bandwidth data, so
 9 |                   # there might be some information at the low end.
10 | --high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) 
11 | --allow-downsample=true
12 | 


--------------------------------------------------------------------------------
/baseline/local/featex/extract_melspec.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | . path.sh
 4 | . cmd.sh
 5 | 
 6 | mspec_config=conf/mspec.conf
 7 | nj=32
 8 | 
 9 | . utils/parse_options.sh
10 | 
11 | if [ $# != 2 ]; then
12 |   echo "Usage: "
13 |   echo "  $0 [options] <srcdir> <mspec-destdir>"
14 |   echo "Options"
15 |   echo "   --nj=40     # Number of CPUs to use for feature extraction"
16 |   echo "   --mspec-config=config/mspec.conf  # Melspectrogram config"
17 |   exit 1;
18 | fi
19 | 
20 | odata_dir=$1
21 | data_dir=$2
22 | mspec_dir=${data_dir}/mspec
23 | 
24 | 
25 | utils/copy_data_dir.sh ${odata_dir} ${data_dir}
26 | 
27 | steps/make_fbank.sh --cmd "$train_cmd" --nj $nj \
28 |        	--fbank-config ${mspec_config} ${data_dir} \
29 |        	exp/make_fbank/${data_dir} $mspec_dir
30 | 


--------------------------------------------------------------------------------
/baseline/local/similarity_matrices/compute_DeID.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import argparse
 4 | import numpy as np
 5 | 
 6 | 
 7 | def Ddiag(X):
 8 |     N   = X.shape[0]                #matrix dimension
 9 |     m   = np.mean(X)                #mean of all elements
10 |     md  = np.mean(np.diag(X))       #mean of diagonal elements
11 |     mnd = (N/(N-1))*(m-(md/N))      #mean of off-diagonal elements             
12 |     return abs(md-mnd)
13 | 
14 | 
15 | 
16 | if __name__=="__main__":
17 | 
18 |     parser = argparse.ArgumentParser(description='Compute De-Identification')
19 |     parser.add_argument('Soo',help="npy file of the matrix Soo", type=str)
20 |     parser.add_argument('Sop',help="npy file of the matrix Sop", type=str)
21 |     args = parser.parse_args()
22 | 
23 |     Soo = np.load(args.Soo)
24 |     Sop = np.load(args.Sop)
25 | 
26 |     print(1-(Ddiag(Sop)/Ddiag(Soo)))
27 | 
28 | 


--------------------------------------------------------------------------------
/baseline/local/featex/extract_bn_nat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # Extract BNs using chain model
 5 | 
 6 | . path.sh
 7 | . cmd.sh
 8 | 
 9 | nj=32
10 | stage=0
11 | 
12 | . utils/parse_options.sh
13 | 
14 | 
15 | # remove layers after BN
16 | # nnet3-am-copy --raw=true --prepare-for-test=true --nnet-config='echo output-node name=output input=prefinal-l |' --edits='remove-orphans' final.mdl prefinal-l.raw
17 | 
18 | nj=1
19 | use_gpu=yes
20 | iv_root=exp/nnet3_cleaned
21 | md_name=prefinal-l.raw
22 | cmvn_op='--norm-means=false --norm-vars=false'
23 | dsets="librispeech_dev_clean train_clean_100"
24 | 
25 | 
26 | . parse_options.sh
27 | 
28 | 
29 | ./compute_hires.sh --nj $nj --dsets "$dsets"
30 | 
31 | ./compute_ivect.sh --nj $nj --dsets "$dsets" --model $iv_root
32 | 
33 | ./nnet3_compute.sh --nj 1 --use_gpu $use_gpu --iv_root $iv_root --md_name $md_name --dsets "$dsets"
34 | 
35 | 
36 | 
37 | echo Done
38 | 


--------------------------------------------------------------------------------
/baseline/local/similarity_matrices/compute_Gvd.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import argparse
 4 | import numpy as np
 5 | import math
 6 | 
 7 | 
 8 | def Ddiag(X):
 9 |     N   = X.shape[0]                #matrix dimension
10 |     m   = np.mean(X)                #mean of all elements
11 |     md  = np.mean(np.diag(X))       #mean of diagonal elements
12 |     mnd = (N/(N-1))*(m-(md/N))      #mean of off-diagonal elements             
13 |     return abs(md-mnd)
14 | 
15 | if __name__=="__main__":
16 | 
17 |     parser = argparse.ArgumentParser(description='Compute Gain of Voice Uniqueness')
18 |     parser.add_argument('Soo',help="npy file of the similarity matrix Soo", type=str)
19 |     parser.add_argument('Spp',help="npy file of the similarity matrix Spp", type=str)
20 |     args = parser.parse_args()
21 | 
22 |     Soo = np.load(args.Soo)
23 |     Spp = np.load(args.Spp)
24 | 
25 |     print(10*np.log10(Ddiag(Spp)/Ddiag(Soo)))
26 | 
27 | 


--------------------------------------------------------------------------------
/baseline/local/make_eval2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | . path.sh
 4 | 
 5 | proto_dir="$1"
 6 | librispeech_corpus="$2"
 7 | enroll_data="$3"
 8 | trial_data="$4"
 9 | 
10 | local/data_prep_adv.sh ${librispeech_corpus}/dev-clean data/${enroll_data}
11 | local/data_prep_adv.sh ${librispeech_corpus}/dev-clean data/${trial_data}
12 | 
13 | rm data/${enroll_data}/spk2utt
14 | rm data/${trial_data}/spk2utt
15 | 
16 | python local/fix_eval2.py ${proto_dir} data/${enroll_data} data/${trial_data} || exit 1;
17 | 
18 | utils/utt2spk_to_spk2utt.pl < data/${enroll_data}/utt2spk > data/${enroll_data}/spk2utt || exit 1
19 | utils/utt2spk_to_spk2utt.pl < data/${trial_data}/utt2spk > data/${trial_data}/spk2utt || exit 1
20 | 
21 | utils/fix_data_dir.sh data/${enroll_data}
22 | utils/fix_data_dir.sh data/${trial_data}
23 | 
24 | utils/validate_data_dir.sh --no-text --no-feats data/${enroll_data}
25 | utils/validate_data_dir.sh --no-text --no-feats data/${trial_data}
26 | 


--------------------------------------------------------------------------------
/baseline/cmd.sh:
--------------------------------------------------------------------------------
 1 | # you can change cmd.sh depending on what type of queue you are using.
 2 | # If you have no queueing system and want to run on a local machine, you
 3 | # can change all instances 'queue.pl' to run.pl (but be careful and run
 4 | # commands one by one: most recipes will exhaust the memory on your
 5 | # machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
 6 | # with slurm.  Different queues are configured differently, with different
 7 | # queue names and different ways of specifying things like memory;
 8 | # to account for these differences you can create and edit the file
 9 | # conf/queue.conf to match your queue's configuration.  Search for
10 | # conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
11 | # or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
12 | 
13 | #export train_cmd="queue.pl --mem 4G"
14 | #export decode_cmd="queue.pl --mem 4G"
15 | 
16 | export train_cmd=run.pl
17 | export decode_cmd=run.pl
18 | 


--------------------------------------------------------------------------------
/baseline/local/vc/am/init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | . path.sh
 3 | 
 4 | # PATH to the pyTools
 5 | export TEMP_CURRENNT_PROJECT_PYTOOLS_PATH=${nii_dir}/pyTools
 6 | 
 7 | # PATH to currennt
 8 | export TEMP_CURRENNT_PROJECT_CURRENNT_PATH=${nii_dir}/CURRENNT_codes/build/currennt
 9 | 
10 | # PATH to SOX (http://sox.sourceforge.net/sox.html)
11 | export TEMP_CURRENNT_PROJECT_SOX_PATH=/usr/bin/sox
12 | 
13 | # PATH to SV56 (a software to normalize waveform amplitude.
14 | #  https://www.itu.int/rec/T-REC-P.56
15 | #  This software is not necessary, I used it because it is available in our lab.
16 | #  You can use other tools to normalize the waveforms before put them into this project.
17 | #  Then, you can set TEMP_CURRENNT_PROJECT_SV56_PATH=None)
18 | #export TEMP_CURRENNT_PROJECT_SV56_PATH=/home/smg/wang/WORK/WORK/TOOL/local/bin/sv56demo
19 | export TEMP_CURRENNT_PROJECT_SV56_PATH=None
20 | 
21 | export localpath=`pwd`/local/vc/am
22 | 
23 | # Add pyTools to PYTHONPATH
24 | export PYTHONPATH=${PYTHONPATH}:${TEMP_CURRENNT_PROJECT_PYTOOLS_PATH}:${localpath}
25 | 


--------------------------------------------------------------------------------
/baseline/local/vc/nsf/init.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | . path.sh
 4 | 
 5 | # PATH to the pyTools
 6 | export TEMP_CURRENNT_PROJECT_PYTOOLS_PATH=${nii_dir}/pyTools
 7 | 
 8 | # PATH to currennt
 9 | export TEMP_CURRENNT_PROJECT_CURRENNT_PATH=${nii_dir}/CURRENNT_codes/build/currennt
10 | 
11 | # PATH to SOX (http://sox.sourceforge.net/sox.html)
12 | export TEMP_CURRENNT_PROJECT_SOX_PATH=/usr/bin/sox
13 | 
14 | # PATH to SV56 (a software to normalize waveform amplitude.
15 | #  https://www.itu.int/rec/T-REC-P.56
16 | #  This software is not necessary, I used it because it is available in our lab.
17 | #  You can use other tools to normalize the waveforms before put them into this project.
18 | #  Then, you can set TEMP_CURRENNT_PROJECT_SV56_PATH=None)
19 | #export TEMP_CURRENNT_PROJECT_SV56_PATH=/home/smg/wang/WORK/WORK/TOOL/local/bin/sv56demo
20 | export TEMP_CURRENNT_PROJECT_SV56_PATH=None
21 | 
22 | export localpath=`pwd`/local/vc/nsf
23 | 
24 | # Add pyTools to PYTHONPATH
25 | export PYTHONPATH=${PYTHONPATH}:${TEMP_CURRENNT_PROJECT_PYTOOLS_PATH}:${localpath}
26 | 


--------------------------------------------------------------------------------
/baseline/local/anon/make_netcdf.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | . path.sh
 4 | . cmd.sh
 5 | 
 6 | stage=0
 7 | 
 8 | . utils/parse_options.sh
 9 | 
10 | if [ $# != 4 ]; then
11 |   echo "Usage: "
12 |   echo "  $0 [options] <train-dir> <ppg-file> <xvec-out-dir> <data-out-dir>"
13 |   echo "Options"
14 |   echo "   --stage 0     # Number of CPUs to use for feature extraction"
15 |   exit 1;
16 | fi
17 | 
18 | src_data=$1
19 | 
20 | ppg_file=$2
21 | xvector_file=$3
22 | 
23 | out_dir=$4
24 | 
25 | 
26 | if [ $stage -le 0 ]; then
27 |   mkdir -p $out_dir/scp $out_dir/xvector $out_dir/f0 $out_dir/ppg
28 | 
29 |   echo "Writing SCP file.."
30 |   cut -f 1 -d' ' ${src_data}/utt2spk > ${out_dir}/scp/data.lst || exit 1;
31 | fi
32 | 
33 | # initialize pytools
34 | . local/vc/am/init.sh
35 | 
36 | if [ $stage -le 1 ]; then
37 |   python local/featex/create_ppg_data.py ${ppg_file} ${out_dir} || exit 1;
38 | fi
39 | 
40 | if [ $stage -le 2 ]; then
41 |   echo "Writing xvector and F0 for train."
42 |   python local/featex/create_xvector_f0_data.py ${src_data} ${xvector_file} ${out_dir} || exit 1;
43 | fi
44 | 
45 | 


--------------------------------------------------------------------------------
/baseline/local/featex/01_extract_xvectors.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | . path.sh
 6 | . cmd.sh
 7 | 
 8 | nj=$(nproc)
 9 | 
10 | . utils/parse_options.sh
11 | 
12 | if [ $# != 3 ]; then
13 |   echo "Usage: "
14 |   echo "  $0 [options] <data-dir> <nnet-dir> <xvector-out-dir>"
15 |   echo "Options"
16 |   echo "   --nj=40     # Number of CPUs to use for feature extraction"
17 |   exit 1;
18 | fi
19 | 
20 | data_dir=$1
21 | nnet_dir=$2
22 | out_dir=$3
23 | 
24 | mfccdir=`pwd`/mfcc
25 | vaddir=`pwd`/mfcc
26 | 
27 | mkdir -p ${out_dir}
28 | dataname=$(basename $data_dir)
29 | 
30 | steps/make_mfcc.sh --write-utt2num-frames true --mfcc-config conf/mfcc.conf \
31 |     --nj $nj --cmd "$train_cmd" ${data_dir} exp/make_mfcc $mfccdir || exit 1
32 | 
33 | utils/fix_data_dir.sh ${data_dir} || exit 1
34 |     
35 | sid/compute_vad_decision.sh --nj $nj --cmd "$train_cmd" ${data_dir} exp/make_vad $vaddir || exit 1
36 | 
37 | utils/fix_data_dir.sh ${data_dir} || exit 1
38 | 
39 | sid/nnet3/xvector/extract_xvectors.sh --cmd "$train_cmd" --nj $nj \
40 |     $nnet_dir ${data_dir} $out_dir/xvectors_$dataname || exit 1
41 | 


--------------------------------------------------------------------------------
/baseline/local/download_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | if [ $# != 1 ]; then
 6 |   echo "Usage: "
 7 |   echo "  $0 [options] <data-set>"
 8 |   exit 1;
 9 | fi
10 | 
11 | data_set=$1
12 | expo_dir=data/$data_set
13 | 
14 | dir=$expo_dir
15 | if [ ! -f $dir/wav.scp ]; then
16 |   [ -d $dir ] && rm -r $dir
17 |   if [ ! -f $data_set.tar.gz ]; then
18 |     echo "  You will be prompted to enter password for getdata@voiceprivacychallenge.univ-avignon.fr"
19 |     sftp getdata@voiceprivacychallenge.univ-avignon.fr <<EOF
20 | cd /challengedata/corpora
21 | get $data_set.tar.gz
22 | bye
23 | EOF
24 |   fi
25 |   echo "  Unpacking $data_set data set..."
26 |   tar -xf $data_set.tar.gz || exit 1
27 |   [ ! -f $dir/text ] && echo "File $dir/text does not exist" && exit 1
28 |   cut -d' ' -f1 $dir/text > $dir/text1
29 |   cut -d' ' -f2- $dir/text | sed -r 's/,|!|\?|\./ /g' | sed -r 's/ +/ /g' | awk '{print toupper($0)}' > $dir/text2
30 |   paste -d' ' $dir/text1 $dir/text2 > $dir/text
31 |   rm $dir/text1 $dir/text2
32 |   utils/fix_data_dir.sh $dir || exit 1
33 |   utils/validate_data_dir.sh --no-feats $dir || exit 1
34 | fi
35 | 
36 | echo '  Done'
37 | 


--------------------------------------------------------------------------------
/baseline/local/train_model_ss_am.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Training speech synthesis acoustic model (see the trained model in /baseline/exp/models/3_ss_am/) LibriTTS-train-clean-100
 4 | 
 5 | # TO CORRECT
 6 | 
 7 | . ./cmd.sh
 8 | . ./path.sh
 9 | 
10 | set -e
11 | 
12 | libritts_corpus=$(realpath $corpora/LibriTTS)
13 | 
14 | ppg_model=exp/models/1_asr_am/exp
15 | ppg_dir=${ppg_model}/nnet3_cleaned
16 | xvec_nnet_dir=exp/models/2_xvect_extr/exp/xvector_nnet_1a
17 | 
18 | am_nsf_train_data="libritts_train_clean_100"
19 | feats_out_dir=$(realpath exp/am_nsf_data)
20 | 
21 | stage=0
22 | 
23 | . utils/parse_options.sh
24 | 
25 | if [ $stage -le 0 ]; then
26 |   local/data_prep_libritts.sh ${libritts_corpus}/train-clean-100 data/${am_nsf_train_data} || exit 1;
27 |   local/run_prepfeats_am_nsf.sh --ppg-model ${ppg_model} --ppg-dir ${ppg_dir} \
28 | 	--xvec-nnet-dir ${xvec_nnet_dir} \
29 | 	${am_nsf_train_data} ${feats_out_dir} || exit 1;
30 | fi
31 | 
32 | if [ $stage -le 1 ]; then
33 |   local/vc/am/00_run.sh ${feats_out_dir} || exit 1;
34 |   echo "Model is trained and stored at ${nii_scripts}/acoustic-modeling/project-DAR-continuous/MODELS/DAR_001/"
35 | fi
36 | 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/baseline/local/train_model_nsf.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Training speech synthesis neural source filter (NSF) model  (see the trained model in /baseline/exp/models/4_nsf_am/) on LibriTTS-train-clean-100 
 4 | # TO CORRECT
 5 | 
 6 | . ./cmd.sh
 7 | . ./path.sh
 8 | 
 9 | set -e
10 | 
11 | libritts_corpus=$(realpath $corpora/LibriTTS)
12 | 
13 | ppg_model=exp/models/1_asr_am/exp
14 | ppg_dir=${ppg_model}/nnet3_cleaned
15 | xvec_nnet_dir=exp/models/2_xvect_extr/exp/xvector_nnet_1a
16 | 
17 | am_nsf_train_data="libritts_train_clean_100"
18 | feats_out_dir=$(realpath exp/am_nsf_data)
19 | 
20 | stage=0
21 | 
22 | . utils/parse_options.sh
23 | 
24 | if [ $stage -le 0 ]; then
25 |   local/data_prep_libritts.sh ${libritts_corpus}/train-clean-100 data/${am_nsf_train_data} || exit 1;
26 |   local/run_prepfeats_am_nsf.sh --ppg-model ${ppg_model} --ppg-dir ${ppg_dir} \
27 | 	--xvec-nnet-dir ${xvec_nnet_dir} \
28 | 	${am_nsf_train_data} ${feats_out_dir} || exit 1;
29 | fi
30 | 
31 | if [ $stage -le 1 ]; then
32 |   local/vc/nsf/00_run.sh ${feats_out_dir} || exit 1;
33 |   echo "Model is trained and stored at ${nii_scripts}/waveform-modeling/project-NSF/MODELS/h-sinc-NSF/"
34 | fi
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/baseline/local/anon/compute_spk_pool_affinity.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | . path.sh
 4 | . cmd.sh
 5 | 
 6 | set -e
 7 | 
 8 | plda_dir=$1
 9 | src_xvectors_dir=$2
10 | pool_xvectors_dir=$3
11 | src_spk=$4
12 | trial_scores=$5
13 | 
14 | fake_trials_dir=${src_xvectors_dir}/fake_trials
15 | mkdir -p ${fake_trials_dir}
16 | fake_trials=${fake_trials_dir}/trial_${src_spk}
17 | 
18 | # Creating the fake trials file
19 | cut -d' ' -f 1 ${pool_xvectors_dir}/spk_xvector.scp | awk -v a="${src_spk}" '{print a,$1}'  - > ${fake_trials}
20 | 
21 | $train_cmd exp/scores/log/libritts_pool_scoring.log \
22 |   ivector-plda-scoring --normalize-length=true \
23 |     "ivector-copy-plda --smoothing=0.0 $plda_dir/plda - |" \
24 |     "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:${src_xvectors_dir}/spk_xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
25 |     "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:${pool_xvectors_dir}/spk_xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
26 |     "cat '${fake_trials}' | cut -d\  --fields=1,2 |" ${trial_scores} || exit 1;
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/baseline/local/featex/f0_yaapt/amfm_decompy/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Bernardo J. B. Schmitt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | The above copyright notice and this permission notice shall be included in all
12 | copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | SOFTWARE.


--------------------------------------------------------------------------------
/baseline/local/featex/00_make_am_nsf_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | . path.sh
 4 | . cmd.sh
 5 | 
 6 | dev_spks=20
 7 | test_spks=20
 8 | 
 9 | . utils/parse_options.sh
10 | 
11 | if [ $# != 2 ]; then
12 |   echo "Usage: "
13 |   echo "  $0 [options] <srcdir> <split-destdir>"
14 |   echo "Options"
15 |   echo "   --dev-spks=40     # Number of speakers in dev dataset"
16 |   echo "   --test-spks=40  # Number of speakers in test dataset"
17 |   exit 1;
18 | fi
19 | 
20 | in_dir=$1
21 | out_dir=$2
22 | mkdir -p ${out_dir}
23 | 
24 | python local/featex/split_am_nsf_data.py ${in_dir} ${out_dir} ${dev_spks} ${test_spks}
25 | 
26 | # sort each file
27 | train_dir=$out_dir/$(basename $in_dir)_train
28 | dev_dir=$out_dir/$(basename $in_dir)_dev
29 | test_dir=$out_dir/$(basename $in_dir)_test
30 | 
31 | echo "Sorting : ${train_dir}, ${dev_dir} and ${test_dir}" 
32 | 
33 | for f in `ls ${train_dir}`; do
34 |   echo "Sorting $f"
35 |   sort -u ${train_dir}/$f > ${train_dir}/${f%.*}
36 |   rm ${train_dir}/$f
37 | done
38 | 
39 | for f in `ls ${dev_dir}`; do
40 |   echo "Sorting $f"
41 |   sort -u ${dev_dir}/$f > ${dev_dir}/${f%.*}
42 |   rm ${dev_dir}/$f
43 | done
44 | 
45 | for f in `ls ${test_dir}`; do
46 |   echo "Sorting $f"
47 |   sort -u ${test_dir}/$f > ${test_dir}/${f%.*}
48 |   rm ${test_dir}/$f
49 | done
50 | 


--------------------------------------------------------------------------------
/baseline/local/featex/split_test_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from os.path import join, exists
 3 | import sys
 4 | 
 5 | args = sys.argv
 6 | 
 7 | root_dir = args[1]
 8 | test_file = join(root_dir, 'scp/test.lst')
 9 | 
10 | test_dir = args[2]
11 | 
12 | xvector_dir = join(root_dir, 'xvector')
13 | f0_dir = join(root_dir, 'f0')
14 | mel_dir = join(root_dir, 'mel')
15 | ppg_dir = join(root_dir, 'ppg')
16 | 
17 | out_xvector_dir = join(test_dir, 'xvector')
18 | out_f0_dir = join(test_dir, 'f0')
19 | out_mel_dir = join(test_dir, 'mel')
20 | out_ppg_dir = join(test_dir, 'ppg')
21 | 
22 | if not exists(out_xvector_dir):
23 |     os.makedirs(out_xvector_dir)
24 | if not exists(out_f0_dir):
25 |     os.makedirs(out_f0_dir)
26 | if not exists(out_mel_dir):
27 |     os.makedirs(out_mel_dir)
28 | if not exists(out_ppg_dir):
29 |     os.makedirs(out_ppg_dir)
30 | 
31 | with open(test_file) as f:
32 |     for line in f.read().splitlines():
33 |         os.rename(join(xvector_dir, line+'.xvector'), join(out_xvector_dir,
34 |                                                            line+'.xvector'))
35 |         os.rename(join(f0_dir, line+'.f0'), join(out_f0_dir, line+'.f0'))
36 |         os.rename(join(mel_dir, line+'.mel'), join(out_mel_dir, line+'.mel'))
37 |         os.rename(join(ppg_dir, line+'.ppg'), join(out_ppg_dir, line+'.ppg'))
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/baseline/local/plot/plot_spk_dur.py:
--------------------------------------------------------------------------------
 1 | from os.path import join
 2 | 
 3 | import numpy as np
 4 | import matplotlib as mpl
 5 | mpl.use('Agg')
 6 | import matplotlib.pyplot as plt
 7 | import operator
 8 | 
 9 | 
10 | # Each data dir must contain an utt2dur
11 | data_dirs = ['data/test_clean', 'data/dev_clean', 'data/test_other', 'data/dev_other', 'data/train_960']
12 | plot_file = 'data/spks_stats.png'
13 | 
14 | 
15 | spk2dur = {}
16 | for ddir in data_dirs:
17 |     with open(join(ddir, 'utt2dur')) as f:
18 |         for line in f.read().splitlines():
19 |             sp = line.split()
20 |             spk = sp[0].split('-')[0]
21 |             cdur = float(sp[1])
22 |             spk2dur[spk] = spk2dur.get(spk, 0.0) + cdur
23 | 
24 | print(f"Found {len(spk2dur)} of speakers")
25 | 
26 | sorted_spk2dur = sorted(spk2dur.items(), key=operator.itemgetter(1))
27 | 
28 | #ditems = spk2dur.items()
29 | spks = [x[0] for x in sorted_spk2dur]
30 | durs = [x[1] for x in sorted_spk2dur]
31 | 
32 | mean_dur = round(np.mean(durs), 2)
33 | 
34 | x_pos = np.arange(len(spks))
35 | 
36 | plt.bar(x_pos, durs, align='center')
37 | plt.axhline(y=mean_dur, color='r', linestyle='-')
38 | plt.annotate(f'Mean duration = {mean_dur}', xy=(20, mean_dur+10))
39 | #plt.xticks(x_pos, spks)
40 | plt.ylabel('Duration (sec.)')
41 | plt.grid(True)
42 | 
43 | plt.title(f'Durations of {len(spks)} speakers found in LibriSpeech')
44 | 
45 | 
46 | plt.savefig(plot_file, dpi=300) 
47 | 


--------------------------------------------------------------------------------
/baseline/local/anon/compute_spk_pool_cosine.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from kaldiio import WriteHelper, ReadHelper
 3 | import os
 4 | from os.path import join, isdir
 5 | 
 6 | from scipy.spatial import distance
 7 | 
 8 | args = sys.argv
 9 | 
10 | src_xvec_dir = args[1]
11 | pool_xvec_dir = args[2]
12 | scores_dir = args[3]
13 | 
14 | if not isdir(scores_dir):
15 |     os.makedirs(scores_dir)
16 | 
17 | src_xvec_file = join(src_xvec_dir, 'spk_xvector.scp')
18 | pool_xvec_file = join(pool_xvec_dir, 'spk_xvector.scp')
19 | 
20 | pool_xvectors = {}
21 | c = 0
22 | with ReadHelper('scp:'+pool_xvec_file) as reader:
23 |     for key, xvec in reader:
24 |         #print key, mat.shape
25 |         pool_xvectors[key] = xvec
26 |         c += 1
27 | print("Read ", c, "pool xvectors")
28 | 
29 | with ReadHelper('scp:'+src_xvec_file) as reader:
30 |     for sspk, sxvec in reader:
31 |         print("Computing cosine measure for " + sspk)
32 |         with open(join(scores_dir, 'affinity_'+sspk), 'w') as sf:
33 |             for pspk, pxvec in pool_xvectors.items():
34 |                 # compute cosine distance between src and pool spk
35 |                 # Multiplying by -1 to ensure compatibility with affinity
36 |                 # Now lower value will indicate less affinity as compared
37 |                 # to original cosine distance
38 |                 dist = -1.0 * distance.cosine(sxvec, pxvec)
39 |                 sf.write(sspk + ' ' + pspk + ' ' + str(dist) + '\n')
40 | 
41 | 


--------------------------------------------------------------------------------
/baseline/local/vc/am/01_gen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # -------
 3 | # input feature directories
 4 | #  here, we use features in ../TESTDATA/vctk_vctk_anonymize for demonstration
 5 | # 
 6 | . path.sh
 7 | . local/vc/am/init.sh
 8 | 
 9 | proj_dir=${nii_scripts}/acoustic-modeling/project-DAR-continuous
10 | test_data_dir=$1
11 | 
12 | output_dir=${test_data_dir}/am_out_mel
13 | output_tmp_dir=${test_data_dir}/am_out_tmp
14 | export TEMP_ACOUSTIC_MODEL_INPUT_DIRS=${test_data_dir}/ppg,${test_data_dir}/xvector,${test_data_dir}/f0
15 | 
16 | # where is the directory of the trained model
17 | export TEMP_ACOUSTIC_MODEL_DIRECTORY=exp/models/3_ss_am
18 | 
19 | # where is the trained model?
20 | #  here, we use network.jsn for demonstration.
21 | #  of course, it will generate random noise only
22 | export TEMP_ACOUSTIC_NETWORK_PATH=${TEMP_ACOUSTIC_MODEL_DIRECTORY}/trained_network.jsn
23 | 
24 | # where to store the features generated by the trained network?
25 | export TEMP_ACOUSTIC_OUTPUT_DIRECTORY=${output_dir}
26 | 
27 | # directory to save intermediate files (it will be deleted after)
28 | export TEMP_ACOUSTIC_TEMP_OUTPUT_DIRECTORY=${output_tmp_dir}
29 | 
30 | temp_dir="exp/tmp"
31 | mkdir -p $temp_dir
32 | export TEMP_ADDITIONAL_COMMAND="--cache_path $temp_dir"
33 | 
34 | # 
35 | python ${proj_dir}/../SCRIPTS/03_syn.py config_libri_am || exit 1
36 | # after running this scripts, the generated features should be in ${TEMP_ACOUSTIC_OUTPUT_DIRECTORY}
37 | 
38 | rm -r ${TEMP_ACOUSTIC_TEMP_OUTPUT_DIRECTORY}
39 | 


--------------------------------------------------------------------------------
/baseline/local/featex/compute_xvect.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | . ./cmd.sh
 4 | . ./path.sh
 5 | 
 6 | set -e
 7 | 
 8 | #Compute x-vectors using x-vector extractor (trained on VoxCeleb-1,2 data)
 9 | 
10 | nj_mfcc=20
11 | nj_xvec=20
12 | use_gpu=false
13 | 
14 | model=exp/xvector_nnet_1as
15 | #dsets='train_clean_100'
16 | #dsets='train_other_500'
17 | #dsets='librispeech_dev_clean'
18 | #dsets='librispeech_dev_clean_uniq'
19 | #dsets='vctk_dev'
20 | #dsets='vctk_test'
21 | #dsets='vctk_dev_mic1'
22 | #dsets='vctk_dev_mic2'
23 | dsets='librispeech_train_clean_360_uniq'
24 | 
25 | 
26 | 
27 | for dset in $dsets; do
28 |   data=data/${dset}_mfcc
29 |   mark=$data/.done
30 |   if [ ! -f $mark ]; then
31 |     [ -d $data ] && rm -r $data
32 | 	utils/copy_data_dir.sh data/$dset $data
33 |     steps/make_mfcc.sh \
34 | 	  --nj $nj_mfcc \
35 | 	  --cmd "$train_cmd" \
36 | 	  --write-utt2num-frames true \
37 | 	  --mfcc-config conf/mfcc.conf \
38 |       $data
39 |     utils/fix_data_dir.sh $data
40 |     sid/compute_vad_decision.sh \
41 | 	  --nj $nj_mfcc \
42 | 	  --cmd "$train_cmd" \
43 |       $data
44 |     utils/fix_data_dir.sh $data
45 | 	touch $mark
46 |   fi
47 |   expo=$model/xvectors_$dset
48 |   mark=$expo/.done
49 |   if [ ! -f $mark ]; then
50 |      [ -d $expo ] && rm -r $expo
51 |     sid/nnet3/xvector/extract_xvectors.sh \
52 | 	  --nj $nj_xvec \
53 | 	  --cmd "$train_cmd --mem 4G" \
54 | 	  --use_gpu $use_gpu \
55 |       $model $data $expo
56 | 	touch $mark
57 |   fi
58 | done
59 | 
60 | echo Done
61 | 


--------------------------------------------------------------------------------
/baseline/local/featex/create_xvector_f0_data.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from os.path import join, basename
 3 | 
 4 | from ioTools import readwrite
 5 | from kaldiio import WriteHelper, ReadHelper
 6 | import numpy as np
 7 | 
 8 | args = sys.argv
 9 | data_dir = args[1]
10 | xvector_file = args[2]
11 | out_dir = args[3]
12 | 
13 | dataname = basename(data_dir)
14 | yaap_pitch_dir = join(data_dir, 'yaapt_pitch')
15 | xvec_out_dir = join(out_dir, "xvector")
16 | pitch_out_dir = join(out_dir, "f0")
17 | 
18 | # Write pitch features
19 | pitch_file = join(data_dir, 'pitch.scp')
20 | pitch2shape = {}
21 | with ReadHelper('scp:'+pitch_file) as reader:
22 |     for key, mat in reader:
23 |         pitch2shape[key] = mat.shape[0]
24 |         kaldi_f0 = mat[:, 1].squeeze().copy()
25 |         yaapt_f0 = readwrite.read_raw_mat(join(yaap_pitch_dir, key+'.f0'), 1)
26 |         #unvoiced = np.where(yaapt_f0 == 0)[0]
27 |         #kaldi_f0[unvoiced] = 0
28 |         #readwrite.write_raw_mat(kaldi_f0, join(pitch_out_dir, key+'.f0'))
29 |         f0 = np.zeros(kaldi_f0.shape)
30 |         f0[:yaapt_f0.shape[0]] = yaapt_f0
31 |         readwrite.write_raw_mat(f0, join(pitch_out_dir, key+'.f0'))
32 | 
33 | 
34 | # Write xvector features
35 | with ReadHelper('scp:'+xvector_file) as reader:
36 |     for key, mat in reader:
37 |         #print key, mat.shape
38 |         plen = pitch2shape[key]
39 |         mat = mat[np.newaxis]
40 |         xvec = np.repeat(mat, plen, axis=0)
41 |         readwrite.write_raw_mat(xvec, join(xvec_out_dir, key+'.xvector'))
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/baseline/local/featex/extract_ppg.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # Extract PPGs using chain model
 5 | # This script extract word position dependent phonemes (346) posteriors and 256-bottleneck PPGs based on ppg-type  option.
 6 | #
 7 | . path.sh
 8 | . cmd.sh
 9 | 
10 | nj=32
11 | stage=0
12 | 
13 | . utils/parse_options.sh
14 | 
15 | if [ $# != 3 ]; then
16 |   echo "Usage: "
17 |   echo "  $0 [options] <srcdir> <model-dir> <ppg-destdir>"
18 |   echo "Options"
19 |   echo "   --nj=40             # Number of CPUs to use for feature extraction"
20 |   echo "   --stage=0           # Extraction stage"
21 |   exit 1;
22 | fi
23 | 
24 | data=$1
25 | ppg_model=$2
26 | ppg_dir=$3
27 | 
28 | original_data_dir=data/${data}
29 | data_dir=data/${data}_hires
30 | 
31 | ivec_extractor=${ppg_model}/nnet3_cleaned/extractor
32 | ivec_data_dir=${ppg_model}/nnet3_cleaned/ivectors_${data}_hires
33 | 
34 | model_dir=${ppg_model}/chain_cleaned/tdnn_1d_sp
35 | 
36 | 
37 | 
38 | export LC_ALL=C
39 | if [ $stage -le 0 ]; then
40 |   utils/copy_data_dir.sh ${original_data_dir} ${data_dir}
41 |   steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \
42 | 	--cmd "$train_cmd" ${data_dir}
43 | 
44 |   steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \
45 |        	${data_dir} ${ivec_extractor} ${ivec_data_dir} 
46 | fi
47 | 
48 | if [ $stage -le 1 ]; then
49 |     # Keeping nj to 1 due to GPU memory issues
50 |     local/featex/extract_bn.sh --cmd "$train_cmd" --nj 1 \
51 | 	--iv-root ${ivec_data_dir} --model-dir ${model_dir} \
52 |        	${data} ${ppg_dir} || exit 1;
53 | fi
54 | 


--------------------------------------------------------------------------------
/baseline/local/vc/nsf/01_gen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Copied from init.sh
 3 | 
 4 | . path.sh
 5 | . local/vc/nsf/init.sh
 6 | 
 7 | # ----- Method 1 
 8 | # For generation, you can configure config.py and run
 9 | #python ../SCRIPTS/02_genwaveform.py config
10 | 
11 | # ----- Method 2
12 | # Equivalently, you can set the environment variables below
13 | #  rather than manually changing config.py
14 | 
15 | # Directories of the input features, which are separated by ','
16 | #test_mel=/home/bsrivast/asr_data/LibriTTS/am_nsf_data/libritts/test/mel
17 | 
18 | test_data_dir=$1
19 | 
20 | proj_dir=${nii_scripts}/waveform-modeling/project-NSF
21 | 
22 | test_mel=${test_data_dir}/am_out_mel
23 | test_xvector=${test_data_dir}/xvector
24 | test_f0=${test_data_dir}/f0
25 | export TEMP_WAVEFORM_MODEL_INPUT_DIRS=${test_mel},${test_xvector},${test_f0}
26 | 
27 | # Path to the model directory
28 | export TEMP_WAVEFORM_MODEL_DIRECTORY=${proj_dir}/MODELS/h-sinc-NSF
29 | 
30 | # Path to the directory that will save the generated waveforms
31 | export TEMP_WAVEFORM_OUTPUT_DIRECTORY="${test_data_dir}/nsf_output_wav"
32 | 
33 | # Path to the trained_network.jsn (or epoch*.autosave)
34 | export TEMP_WAVEFORM_MODEL_NETWORK_PATH=exp/models/4_nsf/trained_network.jsn
35 | 
36 | # Path to a temporary directory to save intermediate files (which will be deleted after generation)
37 | export TEMP_WAVEFORM_TEMP_OUTPUT_DIRECTORY="${test_data_dir}/output_tmp"
38 | 
39 | temp_dir="exp/tmp"
40 | mkdir -p $temp_dir
41 | export TEMP_ADDITIONAL_COMMAND="--cache_path $temp_dir"
42 | 
43 | # generating
44 | python ${proj_dir}/../SCRIPTS/02_genwaveform.py config_libri_nsf || exit 1
45 | 
46 | rm -r ${TEMP_WAVEFORM_TEMP_OUTPUT_DIRECTORY}
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/nii_cmake/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | PROJECT(currennt)
 2 | 
 3 | CMAKE_MINIMUM_REQUIRED (VERSION 2.8)
 4 | 
 5 | #
 6 | # CUDA
 7 | #
 8 | 
 9 | FIND_PACKAGE (CUDA REQUIRED)
10 | MESSAGE ("-- CUDA_VERSION:        ${CUDA_VERSION}")
11 | MESSAGE ("-- CUDA_INCLUDE_DIRS:   ${CUDA_INCLUDE_DIRS}")
12 | MESSAGE ("-- CUDA_CUDA_LIBRARY:   ${CUDA_CUDA_LIBRARY}")
13 | MESSAGE ("-- CUDA_CUDART_LIBRARY: ${CUDA_CUDART_LIBRARY}")
14 | MESSAGE ("-- CUDA_cublas_LIBRARY: ${CUDA_cublas_LIBRARY}")
15 | MESSAGE ("-- CUDA_CUFFT_LIBRARIES: ${CUDA_CUFFT_LIBRARIES}")
16 | MESSAGE ("-- CUDA_curand_LIBRARY: ${CUDA_curand_LIBRARY}")
17 | SET (CUDA_ALL_LIBRARIES ${CUDA_CUDA_LIBRARY} ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_CUFFT_LIBRARIES} ${CUDA_curand_LIBRARY})
18 | INCLUDE_DIRECTORIES ("${CUDA_INCLUDE_DIRS}")
19 | SET (CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
20 | 
21 | #
22 | # BOOST
23 | #
24 | 
25 | FIND_PACKAGE (Boost 1.59 REQUIRED COMPONENTS program_options system filesystem random thread)
26 | MESSAGE ("-- Boost_INCLUDE_DIRS: ${Boost_INCLUDE_DIRS}")
27 | MESSAGE ("-- Boost_LIBRARIES:    ${Boost_LIBRARIES}")
28 | INCLUDE_DIRECTORIES (${Boost_INCLUDE_DIRS})
29 | 
30 | #
31 | # NetCDF
32 | #
33 | 
34 | LINK_DIRECTORIES (${NETCDF_LIB})
35 | 
36 | #
37 | # FLAGS
38 | #
39 | 
40 | SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-security")
41 | 
42 | #
43 | # PROJECTS
44 | #
45 | 
46 | FILE (GLOB_RECURSE src_lib     currennt_lib/*.cpp currennt_lib/*.hpp     currennt_lib/*.h     currennt_lib/*.cu     currennt_lib/*.cuh)
47 | FILE (GLOB_RECURSE src_trainer currennt/*.cpp     currennt/*.hpp         currennt/*.h         currennt/*.cu         currennt/*.cuh)
48 | CUDA_ADD_EXECUTABLE (${PROJECT_NAME} ${src_lib} ${src_trainer})
49 | TARGET_LINK_LIBRARIES (${PROJECT_NAME} ${Boost_LIBRARIES} ${CUDA_cublas_LIBRARY} ${CUDA_CUFFT_LIBRARIES} netcdf)
50 | 


--------------------------------------------------------------------------------
/baseline/local/similarity_matrices/scores_calibration.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | sys.path.append('../cllr/')
 5 | import argparse
 6 | import numpy as np
 7 | 
 8 | from performance import optimal_llr
 9 | 
10 | def readMat(ar):
11 |     fh = open(ar)
12 |     x = []
13 |     for line in fh.readlines():
14 |         y = [str(value) for value in line.split()]
15 |         x.append(y)
16 |     fh.close()
17 |     return x
18 | 
19 | if __name__=="__main__":
20 | 
21 |     parser = argparse.ArgumentParser(description="Scores calibration")
22 |     parser.add_argument('scores',help="", type=str)
23 |     parser.add_argument('spk',help="spk trials",type=str)
24 |     args = parser.parse_args()
25 | 
26 |     S = np.array(readMat(args.scores))
27 |     SPK = np.array(readMat(args.spk))
28 |     S = S.astype(np.str)
29 |     SPK = SPK.astype(np.str)
30 | 
31 |     NON     = []
32 |     TAR     = []
33 |     SPK_NON = []
34 |     SPK_TAR = []
35 |     
36 |     for i in range(len(SPK)):
37 |         if SPK[i,0] == SPK[i,1]:
38 |             TAR.append(S[i,:])
39 |             SPK_TAR.append(SPK[i,:])
40 |         else:
41 |             NON.append(S[i,:])
42 |             SPK_NON.append(SPK[i,:])
43 | 
44 |     NON = np.array(NON)
45 |     TAR = np.array(TAR)
46 |     SPK_NON = np.array(SPK_NON)
47 |     SPK_TAR = np.array(SPK_TAR)
48 |     
49 |     non = NON[:,2].astype(np.float)
50 |     tar = TAR[:,2].astype(np.float)
51 | 
52 |     tar, non = optimal_llr(tar, non, laplace=True)
53 | 
54 |     non = non.astype(np.str)
55 |     tar = tar.astype(np.str)
56 | 
57 |     NON = NON.astype(non.dtype)
58 |     TAR = TAR.astype(tar.dtype)
59 | 
60 |     NON[:,2] = non
61 |     TAR[:,2] = tar
62 | 
63 |     S = np.concatenate((TAR,NON))   
64 |     SPK = np.concatenate((SPK_TAR,SPK_NON))
65 |     np.savetxt(args.scores+".calibrated",S,fmt="%s")
66 |     np.savetxt(args.spk+".calibrated",SPK,fmt="%s")
67 | 
68 | 


--------------------------------------------------------------------------------
/baseline/local/scoring/linkability/compute_linkability.py:
--------------------------------------------------------------------------------
 1 | from performance import linkability, draw_scores
 2 | import argparse
 3 | import pandas
 4 | 
 5 | 
 6 | parser = argparse.ArgumentParser(description='Computing the global linkability measure for a list of linkage function score')
 7 | parser.add_argument('-s', dest='score_file', type=str, nargs=1, required=True, help='path to score file')
 8 | parser.add_argument('-k', dest='key_file', type=str, nargs=1, required=True,  help='path to key file')
 9 | parser.add_argument('--omega', dest='omega', type=float, nargs=1, required=False, default=1,   help='prior ratio (default is 1)')
10 | parser.add_argument('-d', dest='draw_scores', action='store_true', help='flag: draw the score distribution in a figure')
11 | parser.add_argument('-o', dest='output_file', type=str, nargs=1, required=False,   help='output path of the png and pdf file (default is linkability_<score_file>)')
12 | 
13 | 
14 | 
15 | args = parser.parse_args()
16 | # args = parser.parse_args('-s scores.txt -k key.txt'.split(' '))
17 | # args = parser.parse_args('-s scores.txt -k key.txt -e'.split(' '))
18 | 
19 | scr = pandas.read_csv(args.score_file[0], sep=' ', header=None).pivot_table(index=0, columns=1, values=2)
20 | key = pandas.read_csv(args.key_file[0], sep=' ', header=None).replace('nontarget', False).replace('target', True).pivot_table(index=0, columns=1, values=2)
21 | 
22 | matedScores = scr.values[key.values == True]
23 | nonMatedScores = scr.values[key.values == False]
24 | 
25 | Dsys, D, bin_centers, bin_edges  = linkability(matedScores, nonMatedScores, args.omega)
26 | 
27 | if args.draw_scores:
28 |   output_file= "linkability_"+args.score_file[0]
29 |   if args.output_file is not None:
30 |     output_file = args.output_file[0]
31 |   draw_scores(matedScores, nonMatedScores, Dsys, D, bin_centers, bin_edges, output_file)
32 | 
33 | 
34 | 
35 | print("linkability: %f" % (Dsys))
36 | print("")
37 | 


--------------------------------------------------------------------------------
/baseline/local/featex/extract_bn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | #export CUDA_VISIBLE_DEVICES=1,2,3,4,5,6,7
 6 | 
 7 | . cmd.sh
 8 | . path.sh
 9 | 
10 | # nnet3-am-copy --raw=true --prepare-for-test=true --nnet-config='echo output-node name=output input=prefinal-l |' --edits='remove-orphans' final.mdl prefinal-l.raw
11 | 
12 | nj=1
13 | cmd=run.pl
14 | use_gpu=yes
15 | iv_root=exp/nnet3_cleaned
16 | model_dir=exp/chain_cleaned/tdnn_1d_sp
17 | md_name=prefinal-l.raw
18 | cmvn_op='--norm-means=false --norm-vars=false'
19 | 
20 | . parse_options.sh
21 | 
22 | dsets="$1"
23 | ppg_dir="$2"
24 | 
25 | for dset in $dsets; do
26 |   ivect=scp:$iv_root/ivector_online.scp
27 |   expo=$ppg_dir
28 |   mark=$expo/.done
29 |   if [ ! -f $mark ]; then
30 |     data=data/${dset}_hires
31 |     for name in $data/feats.scp $model_dir/$md_name; do
32 |       [ ! -f $name ] && echo "File $name does not exist" && exit 1
33 |     done
34 |     sdata=$data/split$nj
35 |     [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
36 |     feats="ark:apply-cmvn $cmvn_op --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
37 |     [ -d $expo ] && rm -r $expo
38 |     mkdir -p $expo/log
39 |     mkdir -p $expo/data
40 |     $cmd JOB=1:$nj $expo/log/ppg256_${dset}.JOB.log \
41 |       nnet3-compute \
42 |         --extra-left-context=0 --extra-right-context=0 \
43 |         --extra-left-context-initial=-1 --extra-right-context-final=-1 \
44 |         --frames-per-chunk=50 --use-gpu=$use_gpu --online-ivector-period=10 \
45 |         --online-ivectors=$ivect $model_dir/$md_name "$feats" ark:- \| \
46 |         copy-feats --compress=true ark:- \
47 |             ark,scp:$expo/data/feats.JOB.ark,$expo/data/feats.JOB.scp || exit 1
48 |     cat $expo/data/feats.*.scp | sort > $expo/phone_post.scp
49 |     rm $expo/data/feats.*.scp
50 |     touch $mark
51 |   fi
52 | done
53 | 
54 | echo Done
55 | 


--------------------------------------------------------------------------------
/baseline/local/featex/check_pitch_feats.py:
--------------------------------------------------------------------------------
 1 | from kaldiio import WriteHelper, ReadHelper
 2 | from ioTools import readwrite
 3 | 
 4 | import numpy as np
 5 | from os.path import join
 6 | 
 7 | import matplotlib as mpl
 8 | mpl.use('Agg')
 9 | import matplotlib.pyplot as plt
10 | 
11 | data_dir = 'data/eval1_enroll'
12 | yaap_pitch_dir = join(data_dir, 'yaapt_pitch')
13 | 
14 | pitch_feats_file = join(data_dir, 'pitch.scp')
15 | pro_pitch_feats_file = join(data_dir, 'processed_pitch.scp')
16 | save_plot_pov = join(data_dir, 'pov.png')
17 | save_plot_nccf = join(data_dir, 'nccf.png')
18 | save_plot_pitch = join(data_dir, 'pitch.png')
19 | save_plot_ypitch = join(data_dir, 'yaapt_pitch.png')
20 | 
21 | #with open(pitch_feats_file) as f:
22 | with ReadHelper('scp:'+pitch_feats_file) as reader:
23 |     for key, mat in reader:
24 |         print key, mat.shape
25 |         nccf = mat[:, 0]
26 |         pitch = mat[:, 1]
27 |         break
28 | 
29 | with ReadHelper('scp:'+pro_pitch_feats_file) as reader:
30 |     for key, mat in reader:
31 |         print key, mat.shape
32 |         pov = mat[:, 0]
33 |         yaapt_f0 = readwrite.read_raw_mat(join(yaap_pitch_dir, key+'.f0'), 1)
34 |         print "yaapt pitch: ", yaapt_f0.shape
35 |         #pov = pov / np.sum(pov)
36 |         #pitch = mat[:, 1]
37 |         break
38 | 
39 | x = np.arange(nccf.shape[0])
40 | x1 = np.arange(yaapt_f0.shape[0])
41 | 
42 | 
43 | fig = plt.figure()
44 | ax1 = fig.add_subplot(111)
45 | ax1.plot(x, nccf, 'r')
46 | plt.savefig(save_plot_nccf, dpi=300)
47 | 
48 | plt.clf()
49 | 
50 | fig = plt.figure()
51 | ax1 = fig.add_subplot(111)
52 | ax1.plot(x, pitch, 'b')
53 | plt.savefig(save_plot_pitch, dpi=300)
54 | 
55 | plt.clf()
56 | 
57 | fig = plt.figure()
58 | ax1 = fig.add_subplot(111)
59 | ax1.plot(x, pov, 'r')
60 | plt.savefig(save_plot_pov, dpi=300)
61 | 
62 | plt.clf()
63 | 
64 | fig = plt.figure()
65 | ax1 = fig.add_subplot(111)
66 | ax1.plot(x1, yaapt_f0, 'r')
67 | plt.savefig(save_plot_ypitch, dpi=300)
68 | 


--------------------------------------------------------------------------------
/baseline/local/featex/nnet3_compute.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | #export CUDA_VISIBLE_DEVICES=1,2,3,4,5,6,7
 6 | 
 7 | . ./cmd.sh
 8 | . ./path.sh
 9 | 
10 | # nnet3-am-copy --raw=true --prepare-for-test=true --nnet-config='echo output-node name=output input=prefinal-l |' --edits='remove-orphans' final.mdl prefinal-l.raw
11 | 
12 | nj=1
13 | use_gpu=yes
14 | iv_root=exp/nnet3_cleaned
15 | md_name=prefinal-l.raw
16 | cmvn_op='--norm-means=false --norm-vars=false'
17 | dsets=train_clean_100
18 | 
19 | . parse_options.sh
20 | 
21 | for dset in $dsets; do
22 |   ivect=scp:$iv_root/ivectors_${dset}_hires/ivector_online.scp
23 |   for model in exp/chain_cleaned/tdnn_1d_sp; do
24 |     expo=$model/$dset
25 |     mark=$expo/.done
26 |     if [ ! -f $mark ]; then
27 |       data=data/${dset}_hires
28 |       for name in $data/feats.scp $model/$md_name; do
29 |         [ ! -f $name ] && echo "File $name does not exist" && exit 1
30 |       done
31 |       sdata=$data/split$nj
32 |       [[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
33 |       feats="ark:apply-cmvn $cmvn_op --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
34 |       [ -d $expo ] && rm -r $expo
35 |       mkdir -p $expo/log
36 |       mkdir -p $expo/data
37 |       "$train_cmd" JOB=1:$nj $expo/log/nnet3-compute.JOB.log \
38 |         nnet3-compute \
39 |           --extra-left-context=0 --extra-right-context=0 \
40 |           --extra-left-context-initial=-1 --extra-right-context-final=-1 \
41 |           --frames-per-chunk=50 --use-gpu=$use_gpu --online-ivector-period=10 \
42 | 		  --online-ivectors=$ivect $model/$md_name "$feats" ark:- \| \
43 | 		    copy-feats --compress=true ark:- \
44 |               ark,scp:$expo/data/feats.JOB.ark,$expo/data/feats.JOB.scp || exit 1
45 |           cat $expo/data/feats.*.scp | sort > $expo/feats.scp
46 |       rm $expo/data/feats.*.scp
47 |       touch $mark
48 |     fi
49 |   done
50 | done
51 | 
52 | echo Done
53 | 


--------------------------------------------------------------------------------
/baseline/local/featex/03_make_am_nsf_netcdf_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | . path.sh
 4 | . cmd.sh
 5 | 
 6 | stage=0
 7 | 
 8 | . utils/parse_options.sh
 9 | 
10 | if [ $# != 8 ]; then
11 |   echo "Usage: "
12 |   echo "  $0 [options] <train-dir> <dev-dir> <test-dir> <ppg-file> <melspec-file> <xvec-out-dir> <out-dir> <test-dir>"
13 |   echo "Options"
14 |   echo "   --stage 0     # Number of CPUs to use for feature extraction"
15 |   exit 1;
16 | fi
17 | 
18 | train_data=$1
19 | dev_data=$2
20 | test_data=$3
21 | 
22 | ppg_file=$4
23 | melspec_file=$5
24 | xvec_out_dir=$6
25 | 
26 | out_dir=$7
27 | test_dir=$8
28 | 
29 | 
30 | if [ $stage -le 0 ]; then
31 |   mkdir -p $out_dir/scp $out_dir/xvector $out_dir/f0 $out_dir/ppg $out_dir/mel
32 | 
33 |   echo "Writing SCP files.."
34 |   cut -f 1 -d' ' ${train_data}/utt2spk > ${out_dir}/scp/train.lst || exit 1;
35 |   cut -f 1 -d' ' ${dev_data}/utt2spk > ${out_dir}/scp/dev.lst || exit 1;
36 |   cut -f 1 -d' ' ${test_data}/utt2spk > ${out_dir}/scp/test.lst || exit 1;
37 | fi
38 | 
39 | 
40 | if [ $stage -le 1 ]; then
41 |   python local/featex/create_ppg_data.py ${ppg_file} ${out_dir} || exit 1;
42 |   python local/featex/create_melspec_data.py ${melspec_file} ${out_dir} || exit 1;
43 | fi
44 | 
45 | if [ $stage -le 2 ]; then
46 |   echo "Writing xvector and F0 for train."
47 |   xvec_file=${xvec_out_dir}/xvectors_$(basename ${train_data})/xvector.scp
48 |   python local/featex/create_xvector_f0_data.py ${train_data} ${xvec_file} ${out_dir} || exit 1;
49 |   echo "Writing xvector and F0 for dev."
50 |   xvec_file=${xvec_out_dir}/xvectors_$(basename ${dev_data})/xvector.scp
51 |   python local/featex/create_xvector_f0_data.py ${dev_data} ${xvec_file} ${out_dir} || exit 1;
52 |   echo "Writing xvector and F0 for test."
53 |   xvec_file=${xvec_out_dir}/xvectors_$(basename ${test_data})/xvector.scp
54 |   python local/featex/create_xvector_f0_data.py ${test_data} ${xvec_file} ${out_dir} || exit 1;
55 | fi
56 | 
57 | if [ $stage -le 3 ]; then
58 |   echo "Splitting test data in separate folder..."
59 |   python local/featex/split_test_data.py ${out_dir} ${test_dir} || exit 1;
60 | fi
61 | 


--------------------------------------------------------------------------------
/baseline/local/asr_eval.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | . path.sh
 6 | . cmd.sh
 7 | 
 8 | nj=$(nproc)
 9 | dset=vctk_dev_trials_f_all
10 | model=exp/models/asr_eval
11 | printf -v results '%(%Y-%m-%d-%H-%M-%S)T' -1
12 | results=exp/results-$results
13 | 
14 | . utils/parse_options.sh
15 | 
16 | ivec_extr=$model/extractor
17 | graph_dir=$model/graph_tgsmall
18 | large_lang=$model/lang_test_tglarge
19 | small_lang=$model/lang_test_tgsmall
20 | data=data/${dset}_hires
21 | ivect=$ivec_extr/ivect_$dset
22 | 
23 | spk2utt=data/$dset/spk2utt
24 | [ ! -f $spk2utt ] && echo "File $spk2utt does not exist" && exit 1
25 | num_spk=$(wc -l < $spk2utt)
26 | [ $nj -gt $num_spk ] && nj=$num_spk
27 | 
28 | if [ ! -f $data/.done_mfcc ]; then
29 |   printf "${RED}  compute MFCC: $dset${NC}\n"
30 |   utils/copy_data_dir.sh data/$dset $data || exit 1
31 |   steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" --mfcc-config conf/mfcc_hires.conf $data || exit 1
32 |   steps/compute_cmvn_stats.sh $data || exit 1
33 |   utils/fix_data_dir.sh $data || exit 1
34 |   touch $data/.done_mfcc
35 | fi
36 | 
37 | if [ ! -f $ivect/.done ]; then
38 |   printf "${RED}  compute i-vect: $dset${NC}\n"
39 |   steps/online/nnet2/extract_ivectors_online.sh --nj $nj --cmd "$train_cmd" \
40 |     $data ${ivec_extr} $ivect || exit 1
41 |   touch $ivect/.done
42 | fi
43 | 
44 | expo=$model/decode_${dset}_tgsmall
45 | if [ ! -f $expo/.done ]; then
46 |   printf "${RED}  decoding: $dset${NC}\n"
47 |   steps/nnet3/decode.sh \
48 |     --nj $nj --cmd "$decode_cmd" \
49 |     --acwt 1.0 --post-decode-acwt 10.0 \
50 |     --online-ivector-dir $ivect \
51 |     $graph_dir $data $expo || exit 1
52 |   mkdir -p $results
53 |   grep WER $expo/wer* | utils/best_wer.sh | tee -a $results/ASR-$dset
54 |   touch $expo/.done
55 | fi
56 | 
57 | expo=$model/decode_${dset}_tglarge
58 | if [ ! -f $expo/.done ]; then
59 |   printf "${RED}  rescoring: $dset${NC}\n"
60 |   steps/lmrescore_const_arpa.sh \
61 |     --cmd "$decode_cmd" $small_lang $large_lang \
62 |     $data $model/decode_${dset}_tgsmall $expo || exit 1
63 |   mkdir -p $results
64 |   grep WER $expo/wer* | utils/best_wer.sh | tee -a $results/ASR-$dset
65 |   touch $expo/.done
66 | fi
67 | 


--------------------------------------------------------------------------------
/baseline/local/similarity_matrices/create_trial.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import argparse
 4 | import numpy as np
 5 | 
 6 | def readMat(ar):
 7 |     fh = open(ar)
 8 |     x = []
 9 |     for line in fh.readlines():
10 |         y = [str(value) for value in line.split()]
11 |         x.append(y)
12 |     fh.close()
13 |     return x
14 | 
15 | def readList(ar):
16 |     fh = open(ar)
17 |     x = []
18 |     for line in fh.readlines():
19 |         y = [str(value) for value in line.split()]
20 |         x.append((y[0]))
21 |     fh.close()
22 |     return x
23 | 
24 | if __name__=="__main__":
25 | 
26 |     parser = argparse.ArgumentParser(description='This computes the trial file given to list of segments and utt2spk')
27 |     parser.add_argument('osp_segments_scp',help="Original speech segment list", type=str)
28 |     parser.add_argument('asp_segments_scp',help="Anonymized speech segment list", type=str)
29 |     parser.add_argument('name',help="name of the trial file",type=str)
30 |     parser.add_argument('out_dir',help="output directory",type=str)
31 |     parser.add_argument('utt2spk',help="utt2spk file", type=str)
32 |     args = parser.parse_args()
33 | 
34 |     osp_segments_scp    = readList(args.osp_segments_scp)
35 |     asp_segments_scp    = readList(args.asp_segments_scp)
36 |     name                = args.name
37 |     utt2spk             = readMat(args.utt2spk)
38 |     out_dir             = args.out_dir
39 | 
40 |     #Dictionary from utt to spk
41 |     D_utt2spk = dict()
42 |     for i in range(len(utt2spk)):
43 |         D_utt2spk[utt2spk[i][0]] = utt2spk[i][1]
44 | 
45 |     k = 0
46 |     trial = []
47 |     for i in range(len(osp_segments_scp)):
48 |         for j in range(k,len(asp_segments_scp)):
49 |             if osp_segments_scp[i] != asp_segments_scp[j]:
50 |                 trial.append([D_utt2spk[osp_segments_scp[i]], osp_segments_scp[i], D_utt2spk[asp_segments_scp[j]], asp_segments_scp[j]])
51 |             
52 |         k += 1
53 | 
54 |     trial = np.array(trial)
55 |     segment_trial = trial[:,[1,3]]
56 |     spk_trial = trial[:,[0,2]]
57 |     np.savetxt(out_dir+"/segments_"+name+"_trial.txt", segment_trial, delimiter=" ", newline = "\n", fmt="%s")
58 |     np.savetxt(out_dir+"/spk_"+name+"_trial.txt", spk_trial, delimiter=" ", newline = "\n", fmt="%s")
59 | 


--------------------------------------------------------------------------------
/baseline/local/score.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 3 | #           2014  Guoguo Chen
 4 | # Apache 2.0
 5 | 
 6 | [ -f ./path.sh ] && . ./path.sh
 7 | 
 8 | # begin configuration section.
 9 | cmd=run.pl
10 | stage=0
11 | decode_mbr=true
12 | word_ins_penalty=0.0,0.5,1.0
13 | min_lmwt=7
14 | max_lmwt=17
15 | iter=final
16 | #end configuration section.
17 | 
18 | [ -f ./path.sh ] && . ./path.sh
19 | . parse_options.sh || exit 1;
20 | 
21 | if [ $# -ne 3 ]; then
22 |   echo "Usage: local/score.sh [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>"
23 |   echo " Options:"
24 |   echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
25 |   echo "    --stage (0|1|2)                 # start scoring script from part-way through."
26 |   echo "    --decode_mbr (true/false)       # maximum bayes risk decoding (confusion network)."
27 |   echo "    --min_lmwt <int>                # minumum LM-weight for lattice rescoring "
28 |   echo "    --max_lmwt <int>                # maximum LM-weight for lattice rescoring "
29 |   exit 1;
30 | fi
31 | 
32 | data=$1
33 | lang_or_graph=$2
34 | dir=$3
35 | 
36 | symtab=$lang_or_graph/words.txt
37 | 
38 | for f in $symtab $dir/lat.1.gz $data/text; do
39 |   [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1;
40 | done
41 | 
42 | mkdir -p $dir/scoring/log
43 | 
44 | cat $data/text | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/scoring/test_filt.txt
45 | 
46 | for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
47 |   $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \
48 |     lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
49 |     lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
50 |     lattice-best-path --word-symbol-table=$symtab \
51 |       ark:- ark,t:$dir/scoring/LMWT.$wip.tra || exit 1;
52 | done
53 | 
54 | # Note: the double level of quoting for the sed command
55 | for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
56 |   $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.$wip.log \
57 |     cat $dir/scoring/LMWT.$wip.tra \| \
58 |     utils/int2sym.pl -f 2- $symtab \| sed 's:\<UNK\>::g' \| \
59 |     compute-wer --text --mode=present \
60 |     ark:$dir/scoring/test_filt.txt  ark,p:- ">&" $dir/wer_LMWT_$wip || exit 1;
61 | done
62 | 
63 | exit 0;
64 | 


--------------------------------------------------------------------------------
/baseline/local/download_and_untar.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright   2014  Johns Hopkins University (author: Daniel Povey)
 4 | # Apache 2.0
 5 | 
 6 | remove_archive=false
 7 | 
 8 | if [ "$1" == --remove-archive ]; then
 9 |   remove_archive=true
10 |   shift
11 | fi
12 | 
13 | if [ $# -ne 4 ]; then
14 |   echo "Usage: $0 [--remove-archive] <data-base> <url-base> <corpus-part>"
15 |   echo "e.g.: $0 /export/a15/vpanayotov/data www.openslr.org/resources/11 dev-clean"
16 |   echo "With --remove-archive it will remove the archive after successfully un-tarring it."
17 |   echo "<corpus-part> can be one of: dev-clean, test-clean, dev-other, test-other,"
18 |   echo "          train-clean-100, train-clean-360, train-other-500."
19 |   exit 1
20 | fi
21 | 
22 | data=$1
23 | url=$2
24 | part=$3
25 | corpus=$4
26 | 
27 | #if [ ! -d "$data" ]; then
28 | #  echo "$0: no such directory $data"
29 | #  exit 1;
30 | #fi
31 | 
32 | mkdir -p $data || exit 1
33 | 
34 | part_ok=false
35 | list="dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500"
36 | for x in $list; do 
37 |   if [ "$part" == $x ]; then part_ok=true; fi
38 | done
39 | if ! $part_ok; then
40 |   echo "$0: expected <corpus-part> to be one of $list, but got '$part'"
41 |   exit 1;
42 | fi
43 | 
44 | if [ -z "$url" ]; then
45 |   echo "$0: empty URL base."
46 |   exit 1;
47 | fi
48 | 
49 | if [ -f $data/$corpus/$part/.complete ]; then
50 |   echo "$0: data part $part was already successfully extracted, nothing to do."
51 |   exit 0;
52 | fi
53 | 
54 | pushd $data
55 | 
56 | if [ ! -f $part.tar.gz ]; then
57 |   if ! which wget >/dev/null; then
58 |     echo "$0: wget is not installed."
59 |     exit 1;
60 |   fi
61 |   full_url=$url/$part.tar.gz
62 |   echo "$0: downloading data from $full_url.  This may take some time, please be patient."
63 | 
64 |   if ! wget --no-check-certificate $full_url; then
65 |     echo "$0: error executing wget $full_url"
66 |     exit 1;
67 |   fi
68 | fi
69 | 
70 | if ! tar -xvzf $part.tar.gz; then
71 |   echo "$0: error un-tarring archive $data/$part.tar.gz"
72 |   exit 1;
73 | fi
74 | 
75 | popd >&/dev/null
76 | 
77 | touch $data/$corpus/$part/.complete
78 | 
79 | echo "$0: Successfully downloaded and un-tarred $data/$part.tar.gz"
80 | 
81 | if $remove_archive; then
82 |   echo "$0: removing $data/$part.tar.gz file since --remove-archive option was supplied."
83 |   rm $data/$part.tar.gz
84 | fi
85 | 


--------------------------------------------------------------------------------
/baseline/local/featex/f0_yaapt/get_f0.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """
 3 | This script use pYAAPT to extract F0, which is robust to low-quality waveform
 4 | http://bingweb.binghamton.edu/~hhu1/pitch/YAPT.pdf
 5 | http://bjbschmitt.github.io/AMFM_decompy/pYAAPT.html
 6 | 
 7 | Usage:
 8 | 1. specify configuration in __main__
 9 | 2. $: python 00_get_f0.py input_wav output_f0
10 | 
11 | Note: 
12 | 1. the output will be binary, float32, litten-endian, which
13 |    is compatible to HTS-scripts, CURRENNT-scripts
14 | 
15 | 2. you can print it to string using SPTK x2x: 
16 |    $: x2x +fa *.f0 > *.f0.txt
17 | 
18 | 3. you can read it through Numpy
19 |    >> f = open("PATH_TO_F0",'rb')
20 |    >> datatype = np.dtype(("<f4",(col,)))
21 |    >> f0 = np.fromfile(f,dtype=datatype)
22 |    >> f.close()
23 | 
24 | 4. you can also use pyTools by xin wang
25 |    >> from ioTools import readwrite
26 |    >> f0 = readwrite.read_raw_mat("PATH_TO_F0", 1)
27 | 
28 | """
29 | import os
30 | import sys
31 | import numpy
32 | 
33 | import amfm_decompy.pYAAPT as pYAAPT
34 | import amfm_decompy.basic_tools as basic
35 | 
36 | def extractF0(input_wav, output_f0, min_f0 = 60, max_f0 = 400, frame_length = 35, frame_shift = 10):
37 |     if os.path.isfile(input_wav):
38 |         signal = basic.SignalObj(input_wav)
39 |         pitch = pYAAPT.yaapt(signal, **{'f0_min': min_f0, 'f0_max': max_f0,
40 |                                         'frame_length':frame_length,
41 |                                         'frame_space':frame_shift})
42 |         f0_value = pitch.samp_values
43 |         datatype = numpy.dtype(('<f4',1))
44 |         f0_value = numpy.asarray(f0_value, dtype=datatype)
45 | 
46 |         f = open(output_f0,'wb')
47 |         f0_value.tofile(f,'')
48 |         f.close()
49 |         print("F0 processed: %s" % (output_f0))
50 |     else:
51 |         print("Cannot find %s" % (input_wav))
52 |     return
53 | 
54 | if __name__ == "__main__":
55 |     # configuration
56 |     try:
57 |         input_wave = sys.argv[1]
58 |         output_f0 = sys.argv[2]
59 |     except IndexError:
60 |         print("Usage: python get_f0.py INPUT_WAV OUTPUT_F0")
61 |         quit()
62 |     
63 |     # minimum F0 (Hz)
64 |     min_f0 = 60
65 |     # maximum F0 (Hz)    
66 |     max_f0 = 600
67 |     # frame length (ms)
68 |     frame_length = 35
69 |     # frame shift (ms)
70 |     frame_shift = 10
71 | 
72 |     # no need to specify sampling rate
73 |     
74 |     extractF0(input_wave, output_f0, min_f0 = min_f0, max_f0 = max_f0,
75 |               frame_length = frame_length, frame_shift = frame_shift)
76 | 


--------------------------------------------------------------------------------
/baseline/local/anon/make_pseudospeaker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | . path.sh
 3 | . cmd.sh
 4 | 
 5 | rand_level="spk"
 6 | cross_gender="false"
 7 | distance="cosine"
 8 | proximity="farthest"
 9 | 
10 | rand_seed=2020
11 | 
12 | stage=0
13 | 
14 | . utils/parse_options.sh
15 | 
16 | if [ $# != 4 ]; then
17 |   echo "Usage: "
18 |   echo "  $0 [options] <src-data-dir> <pool-data-dir> <xvector-out-dir> <plda-dir>"
19 |   echo "Options"
20 |   echo "   --rand-level=utt     # [utt, spk] Level of randomness while computing the pseudo-xvectors"
21 |   echo "   --rand-seed=<int>     #  Random seed while computing the pseudo-xvectors"
22 |   echo "   --cross-gender=true     # [true, false] Whether to select same or
23 |                                                    other gender while computing the pseudo-xvectors"
24 |   exit 1;
25 | fi
26 | 
27 | src_data=$1
28 | pool_data=$2
29 | xvec_out_dir=$3
30 | plda_dir=$4
31 | 
32 | src_dataname=$(basename $src_data)
33 | pool_dataname=$(basename $pool_data)
34 | src_xvec_dir=${xvec_out_dir}/xvectors_${src_dataname}
35 | pool_xvec_dir=${xvec_out_dir}/xvectors_${pool_dataname}
36 | affinity_scores_dir=${src_xvec_dir}/spk_pool_scores
37 | pseudo_xvecs_dir=${src_xvec_dir}/pseudo_xvecs
38 | 
39 | mkdir -p ${affinity_scores_dir} ${pseudo_xvecs_dir}
40 | 
41 | # Iterate over all the source speakers and generate 
42 | # affinity distribution over anonymization pool
43 | src_spk2gender=${src_data}/spk2gender
44 | pool_spk2gender=${pool_data}/spk2gender
45 | 
46 | if [ $stage -le 0 ]; then
47 |   if [ "$distance" = "cosine" ]; then
48 |     echo "Computing cosine similarity between source to each pool speaker."
49 |     python local/anon/compute_spk_pool_cosine.py ${src_xvec_dir} ${pool_xvec_dir} \
50 | 	    ${affinity_scores_dir}
51 |   elif [ "$distance" = "plda" ]; then
52 |     echo "Computing PLDA affinity scores of each source speaker to each pool speaker."
53 |     cut -d\  -f 1 ${src_spk2gender} | while read s; do
54 |       #echo "Speaker: $s"
55 |       local/anon/compute_spk_pool_affinity.sh ${plda_dir} ${src_xvec_dir} ${pool_xvec_dir} \
56 | 	   "$s" "${affinity_scores_dir}/affinity_${s}" || exit 1;
57 |     done
58 |   fi
59 | fi
60 | 
61 | if [ $stage -le 1 ]; then
62 | # Filter the scores based on gender and then sort them based on affinity. 
63 | # Select the xvectors of 100 farthest speakers and average them to get pseudospeaker.
64 |   python local/anon/gen_pseudo_xvecs.py ${src_data} ${pool_data} ${affinity_scores_dir} \
65 | 	  ${xvec_out_dir} ${pseudo_xvecs_dir} ${rand_level} ${cross_gender} ${proximity} ${rand_seed} || exit 1;
66 | fi
67 | 
68 | 


--------------------------------------------------------------------------------
/baseline/local/run_cleanup_segmentation.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2016  Vimal Manohar
 4 | #           2016  Yiming Wang
 5 | #           2016  Johns Hopkins University (author: Daniel Povey)
 6 | # Apache 2.0
 7 | 
 8 | # This script demonstrates how to re-segment training data selecting only the
 9 | # "good" audio that matches the transcripts.
10 | # The basic idea is to decode with an existing in-domain acoustic model, and a
11 | # biased language model built from the reference, and then work out the
12 | # segmentation from a ctm like file.
13 | 
14 | # For nnet3 and chain results after cleanup, see the scripts in
15 | # local/nnet3/run_tdnn.sh and local/chain/run_tdnn_6z.sh
16 | 
17 | # GMM Results for speaker-independent (SI) and speaker adaptive training (SAT) systems on dev and test sets
18 | # [will add these later].
19 | 
20 | set -e
21 | set -o pipefail
22 | set -u
23 | 
24 | stage=0
25 | cleanup_stage=0
26 | data=data/train_clean_360
27 | cleanup_affix=cleaned
28 | srcdir=exp/tri3b
29 | nj=10 #40
30 | decode_nj=10
31 | decode_num_threads=4
32 | 
33 | . ./path.sh
34 | . ./cmd.sh
35 | . ./utils/parse_options.sh
36 | 
37 | cleaned_data=${data}_${cleanup_affix}
38 | 
39 | dir=${srcdir}_${cleanup_affix}_work
40 | cleaned_dir=${srcdir}_${cleanup_affix}
41 | 
42 | if [ $stage -le 1 ]; then
43 |   # This does the actual data cleanup.
44 |   steps/cleanup/clean_and_segment_data.sh --stage $cleanup_stage --nj $nj --cmd "$train_cmd" \
45 |     $data data/lang $srcdir $dir $cleaned_data
46 | fi
47 | 
48 | if [ $stage -le 2 ]; then
49 |   steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
50 |     $cleaned_data data/lang $srcdir ${srcdir}_ali_${cleanup_affix}
51 | fi
52 | 
53 | if [ $stage -le 3 ]; then
54 |   steps/train_sat.sh --cmd "$train_cmd" \
55 |     7000 150000 $cleaned_data data/lang ${srcdir}_ali_${cleanup_affix} ${cleaned_dir}
56 | fi
57 | 
58 | if [ $stage -le 4 ]; then
59 |   # Test with the models trained on cleaned-up data.
60 |   utils/mkgraph.sh data/lang_test_tgsmall ${cleaned_dir} ${cleaned_dir}/graph_tgsmall
61 | 
62 |   for dset in test_clean test_other dev_clean dev_other; do
63 |     (
64 |     steps/decode_fmllr.sh --nj $decode_nj --num-threads $decode_num_threads \
65 |        --cmd "$decode_cmd" \
66 |        ${cleaned_dir}/graph_tgsmall data/${dset} ${cleaned_dir}/decode_${dset}_tgsmall
67 |     steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
68 |       data/${dset} ${cleaned_dir}/decode_${dset}_{tgsmall,tgmed}
69 |     steps/lmrescore_const_arpa.sh \
70 |       --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
71 |       data/${dset} ${cleaned_dir}/decode_${dset}_{tgsmall,tglarge}
72 |     steps/lmrescore_const_arpa.sh \
73 |       --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
74 |       data/${dset} ${cleaned_dir}/decode_${dset}_{tgsmall,fglarge}
75 |    ) &
76 |   done
77 | fi
78 | 
79 | wait
80 | exit 0
81 | 


--------------------------------------------------------------------------------
/baseline/local/similarity_matrices/compute_similarity_matrix.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import argparse
 4 | import numpy as np
 5 | import math
 6 | from scipy.stats.mstats import gmean
 7 | from math import log10, log2
 8 | 
 9 | def readMat(ar):
10 |     fh = open(ar)
11 |     x = []
12 |     for line in fh.readlines():
13 |         y = [str(value) for value in line.split()]
14 |         x.append(y)
15 |     fh.close()
16 |     return x
17 | 
18 | def getListOfSpk(spk_trial):
19 |     L = []
20 |     for i in range(len(spk_trial)):
21 |         if not spk_trial[i,0] in L:
22 |             L.append(spk_trial[i,0])
23 |     return L
24 | 
25 | def getListOfLlrGivenAandB(scores,spk_trial,A,B):
26 |     a = spk_trial[:,0]
27 |     b = spk_trial[:,1]
28 |     indexes_a = np.where(a == A)[0]
29 |     indexes_b = np.where(b == B)[0]
30 |     indexes = list(set(indexes_a)&set(indexes_b))
31 |     return scores[indexes]
32 |     #return (10**scores[indexes]/(1 + 10**scores[indexes]))
33 | 
34 | if __name__=="__main__":
35 | 
36 |     parser = argparse.ArgumentParser(description='Compute the similarity matrix given the PLDA output scores and the speaker id trial file')
37 |     parser.add_argument('scores',help="PLDA output scores file", type=str)
38 |     parser.add_argument('spk_trial',help="speaker trial file (speaker id corresponding to the trial file)", type=str)
39 |     parser.add_argument('out_dir',help="output directory",type=str)
40 |     parser.add_argument('name',help="name of the similarity matrix",type=str)
41 |     args = parser.parse_args()
42 | 
43 |     scores      = np.array(readMat(args.scores))[:,2]
44 |     scores      = np.array([float(s) for s in scores])
45 |     spk_trial   = np.array(readMat(args.spk_trial))
46 |     out_dir     = args.out_dir
47 |     name        = args.name
48 | 
49 |     #sum_llrs    = sum(10**scores)
50 |     spk_list            = getListOfSpk(spk_trial)
51 |     N_spk               = len(spk_list)
52 | 
53 |     similarity_matrix    = np.zeros((N_spk,N_spk))
54 |     k = 0
55 |     for i in range(N_spk):
56 |         for j in range(k,N_spk):
57 |             LLR = getListOfLlrGivenAandB(scores,spk_trial,spk_list[i],spk_list[j])
58 |             #c = gmean(LLR)
59 |             LLR = np.array(LLR)
60 |             #if i == j:
61 |             #    c = np.sum(np.log2(1+ 1/LR))/len(LR)
62 |             #else:
63 |             #    c = np.sum(np.log2(1+ LR))/len(LR)
64 |             #c = sum(np.log2(1+LR)/len(LR))
65 |             c = 1/(1 + np.exp(-(np.sum(LLR)/len(LLR))))
66 |             similarity_matrix[i,j] = c #(sum(LLR)/len(LLR))
67 |             similarity_matrix[j,i] = c #(sum(LLR)/len(LLR))
68 |         k += 1
69 | 
70 |     #print("sum conf")
71 |     #print(np.sum(similarity_matrix))
72 |    
73 |     #similarity_matrix = similarity_matrix/np.sum(similarity_matrix)
74 | 
75 |     np.save(out_dir+"/similarity_matrix_"+name,similarity_matrix)
76 | 


--------------------------------------------------------------------------------
/baseline/local/featex/f0_yaapt/amfm_decompy/basic_tools.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Auxiliary classes and functions for used by the other AMFM_decompy modules.
 4 | 
 5 | Version 1.0.8.1
 6 | 09/Jul/2018 Bernardo J.B. Schmitt - bernardo.jb.schmitt@gmail.com
 7 | """
 8 | 
 9 | import numpy as np
10 | from scipy.signal import lfilter
11 | 
12 | 
13 | """
14 | Creates a signal object.
15 | """
16 | 
17 | class SignalObj(object):
18 | 
19 |     def __init__(self, *args):
20 | 
21 |         if len(args) == 1:
22 |             try:
23 |                 from scipy.io import wavfile
24 |             except:
25 |                 print("ERROR: Wav modules could not loaded!")
26 |                 raise KeyboardInterrupt
27 |             self.fs, self.data = wavfile.read(args[0])
28 |             self.name = args[0]
29 |         elif len(args) == 2:
30 |             self.data = args[0]
31 |             self.fs = args[1]
32 | 
33 |         if self.data.dtype.kind == 'i':
34 |             self.nbits = self.data.itemsize*8
35 |             self.data = pcm2float(self.data, dtype='f')
36 | 
37 |         self.size = len(self.data)
38 |         self.fs = float(self.fs)
39 | 
40 |         if self.size == self.data.size/2:
41 |             print("Warning: stereo wav file. Converting it to mono for the analysis.")
42 |             self.data = (self.data[:,0]+self.data[:,1])/2
43 | 
44 | 
45 |     """
46 |     Filters the signal data by a bandpass filter.
47 |     """
48 |     def filtered_version(self, bp_filter):
49 | 
50 |         tempData = lfilter(bp_filter.b, bp_filter.a, self.data)
51 | 
52 |         self.filtered = tempData[0:self.size:bp_filter.dec_factor]
53 |         self.new_fs = self.fs/bp_filter.dec_factor
54 | 
55 |     """
56 |     Method that uses the pitch values to estimate the number of modulated
57 |     components in the signal.
58 |     """
59 | 
60 |     def set_nharm(self, pitch_track, n_harm_max):
61 | 
62 |         n_harm = (self.fs/2)/np.amax(pitch_track) - 0.5
63 |         self.n_harm = int(np.floor(min(n_harm, n_harm_max)))
64 | 
65 |     """
66 |     Adds a zero-mean gaussian noise to the signal.
67 |     """
68 | 
69 |     def noiser(self, pitch_track, SNR):
70 | 
71 |         self.clean = np.empty((self.size))
72 |         self.clean[:] = self.data
73 | 
74 |         RMS = np.std(self.data[pitch_track > 0])
75 |         noise = np.random.normal(0, RMS/(10**(SNR/20)), self.size)
76 |         self.data += noise
77 | 
78 | """
79 | Transform a pcm raw signal into a float one, with values limited between -1 and
80 | 1.
81 | """
82 | 
83 | def pcm2float(sig, dtype=np.float64):
84 | 
85 |     sig = np.asarray(sig) # make sure it's a NumPy array
86 |     assert sig.dtype.kind == 'i', "'sig' must be an array of signed integers!"
87 |     dtype = np.dtype(dtype) # allow string input (e.g. 'f')
88 | 
89 |     # Note that 'min' has a greater (by 1) absolute value than 'max'!
90 |     # Therefore, we use 'min' here to avoid clipping.
91 |     return sig.astype(dtype) / dtype.type(-np.iinfo(sig.dtype).min)
92 | 
93 | 


--------------------------------------------------------------------------------
/baseline/local/data_prep_libritts.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014  Vassil Panayotov
 4 | #           2014  Johns Hopkins University (author: Daniel Povey)
 5 | # Modifications Copyright 2019  Nagoya University (author: Takenori Yoshimura)
 6 | # Apache 2.0
 7 | 
 8 | if [ "$#" -ne 2 ]; then
 9 |   echo "Usage: $0 <src-dir> <dst-dir>"
10 |   echo "e.g.: $0 /export/a15/vpanayotov/data/LibriTTS/dev-clean data/dev-clean"
11 |   exit 1
12 | fi
13 | 
14 | src=$1
15 | dst=$2
16 | 
17 | spk_file=$src/../SPEAKERS.txt
18 | 
19 | mkdir -p $dst || exit 1
20 | 
21 | [ ! -d $src ] && echo "$0: no such directory $src" && exit 1
22 | [ ! -f $spk_file ] && echo "$0: expected file $spk_file to exist" && exit 1
23 | 
24 | 
25 | wav_scp=$dst/wav.scp; [[ -f "$wav_scp" ]] && rm $wav_scp
26 | trans=$dst/text; [[ -f "$trans" ]] && rm $trans
27 | utt2spk=$dst/utt2spk; [[ -f "$utt2spk" ]] && rm $utt2spk
28 | spk2gender=$dst/spk2gender; [[ -f $spk2gender ]] && rm $spk2gender
29 | 
30 | for reader_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sed -e "s/$/_/" | sort); do
31 |   reader_dir=$(echo $reader_dir | sed -e "s/_$//")
32 |   reader=$(basename $reader_dir)
33 |   if ! [ $reader -eq $reader ]; then  # not integer.
34 |     echo "$0: unexpected subdirectory name $reader"
35 |     exit 1
36 |   fi
37 | 
38 |   reader_gender=$(egrep "^$reader[ ]+\|" $spk_file | awk -F'|' '{gsub(/[ ]+/, ""); print tolower($2)}')
39 |   if [ "$reader_gender" != 'm' ] && [ "$reader_gender" != 'f' ]; then
40 |     echo "Unexpected gender: '$reader_gender'"
41 |     exit 1
42 |   fi
43 | 
44 |   for chapter_dir in $(find -L $reader_dir/ -mindepth 1 -maxdepth 1 -type d | sort); do
45 |     chapter=$(basename $chapter_dir)
46 |     if ! [ "$chapter" -eq "$chapter" ]; then
47 |       echo "$0: unexpected chapter-subdirectory name $chapter"
48 |       exit 1
49 |     fi
50 | 
51 |     #spk="${reader}_${chapter}"
52 |     spk="${reader}"
53 | 
54 |     find -L $chapter_dir/ -iname "*.wav" | sort | while read -r wav_file; do
55 |        id="$reader"-$(basename $wav_file .wav)
56 |        echo "$id $wav_file" >>$wav_scp
57 | 
58 |        txt=$(cat $(echo $wav_file | sed -e "s/\.wav$/.normalized.txt/"))
59 |        echo "$id $txt" >>$trans
60 | 
61 |        # NOTE: For now we are using per-chapter utt2spk. That is each chapter is considered
62 |        #       to be a different speaker. This is done for simplicity and because we want
63 |        #       e.g. the CMVN to be calculated per-chapter
64 |        echo "$id $reader" >>$utt2spk
65 |     done
66 |   done
67 |   # reader -> gender map (again using per-chapter granularity)
68 |   echo "$reader $reader_gender" >>$spk2gender
69 | done
70 | 
71 | spk2utt=$dst/spk2utt
72 | utils/utt2spk_to_spk2utt.pl <$utt2spk >$spk2utt || exit 1
73 | 
74 | ntrans=$(wc -l <$trans)
75 | nutt2spk=$(wc -l <$utt2spk)
76 | ! [ "$ntrans" -eq "$nutt2spk" ] && \
77 |   echo "Inconsistent #transcripts($ntrans) and #utt2spk($nutt2spk)" && exit 1
78 | 
79 | utils/fix_data_dir.sh $dst || exit 1
80 | utils/validate_data_dir.sh --no-feats $dst || exit 1
81 | 
82 | echo "$0: successfully prepared data in $dst"
83 | 
84 | exit 0
85 | 


--------------------------------------------------------------------------------
/baseline/local/chain/run_chain_common.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # this script has common stages shared across librispeech chain recipes.
 4 | # It generates a new topology in a new lang directory, gets the alignments as
 5 | # lattices, and builds a tree for the new topology
 6 | set -e
 7 | 
 8 | stage=11
 9 | 
10 | # input directory names. These options are actually compulsory, and they have
11 | # been named for convenience
12 | gmm_dir=
13 | ali_dir=
14 | lores_train_data_dir=
15 | 
16 | num_leaves=6000
17 | 
18 | # output directory names. They are also compulsory.
19 | lang=
20 | lat_dir=
21 | tree_dir=
22 | # End configuration section.
23 | echo "$0 $@"  # Print the command line for logging
24 | 
25 | . ./cmd.sh
26 | . ./path.sh
27 | . ./utils/parse_options.sh
28 | 
29 | [ -z $lang ] && echo "Set --lang, this specifies the new lang directory which will have the new topology" && exit 1;
30 | [ -z $lat_dir ] && echo "Set --lat-dir, this specifies the experiment directory to store lattice" && exit 1;
31 | [ -z $tree_dir ] && echo "Set --tree-dir, this specifies the directory to store new tree " && exit 1;
32 | 
33 | for f in $gmm_dir/final.mdl $ali_dir/ali.1.gz $lores_train_data_dir/feats.scp; do
34 |   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
35 | done
36 | 
37 | if [ $stage -le 11 ]; then
38 |   echo "$0: creating lang directory with one state per phone."
39 |   # Create a version of the lang/ directory that has one state per phone in the
40 |   # topo file. [note, it really has two states.. the first one is only repeated
41 |   # once, the second one has zero or more repeats.]
42 |   if [ -d $lang ]; then
43 |     if [ $lang/L.fst -nt data/lang/L.fst ]; then
44 |       echo "$0: $lang already exists, not overwriting it; continuing"
45 |     else
46 |       echo "$0: $lang already exists and seems to be older than data/lang..."
47 |       echo " ... not sure what to do.  Exiting."
48 |       exit 1;
49 |     fi
50 |   else
51 |     cp -r data/lang $lang
52 |     silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
53 |     nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
54 |     # Use our special topology... note that later on may have to tune this
55 |     # topology.
56 |     steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
57 |   fi
58 | fi
59 | 
60 | if [ $stage -le 12 ]; then
61 |   # Get the alignments as lattices (gives the chain training more freedom).
62 |   # use the same num-jobs as the alignments
63 |   nj=$(cat ${ali_dir}/num_jobs) || exit 1;
64 |   steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" ${lores_train_data_dir} \
65 |     $lang $gmm_dir $lat_dir
66 |   rm $lat_dir/fsts.*.gz # save space
67 | fi
68 | 
69 | if [ $stage -le 13 ]; then
70 |   # Build a tree using our new topology. We know we have alignments for the
71 |   # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
72 |   # those.
73 |   if [ -f $tree_dir/final.mdl ]; then
74 |     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
75 |     exit 1;
76 |   fi
77 |   steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
78 |       --context-opts "--context-width=2 --central-position=1" \
79 |       --cmd "$train_cmd" $num_leaves ${lores_train_data_dir} $lang $ali_dir $tree_dir
80 | fi
81 | 
82 | exit 0;
83 | 


--------------------------------------------------------------------------------
/baseline/local/data_prep_adv.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2014  Vassil Panayotov
 4 | #           2014  Johns Hopkins University (author: Daniel Povey)
 5 | # Apache 2.0
 6 | 
 7 | if [ "$#" -ne 2 ]; then
 8 |   echo "Usage: $0 <src-dir> <dst-dir>"
 9 |   echo "e.g.: $0 /export/a15/vpanayotov/data/LibriSpeech/dev-clean data/dev-clean"
10 |   exit 1
11 | fi
12 | 
13 | src=$1
14 | dst=$2
15 | 
16 | # all utterances are FLAC compressed
17 | if ! which flac >&/dev/null; then
18 |    echo "Please install 'flac' on ALL worker nodes!"
19 |    exit 1
20 | fi
21 | 
22 | spk_file=$src/../SPEAKERS.TXT
23 | 
24 | mkdir -p $dst || exit 1;
25 | 
26 | [ ! -d $src ] && echo "$0: no such directory $src" && exit 1;
27 | [ ! -f $spk_file ] && echo "$0: expected file $spk_file to exist" && exit 1;
28 | 
29 | 
30 | wav_scp=$dst/wav.scp; [[ -f "$wav_scp" ]] && rm $wav_scp
31 | trans=$dst/text; [[ -f "$trans" ]] && rm $trans
32 | utt2spk=$dst/utt2spk; [[ -f "$utt2spk" ]] && rm $utt2spk
33 | spk2gender=$dst/spk2gender; [[ -f $spk2gender ]] && rm $spk2gender
34 | 
35 | for reader_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sort); do
36 |   reader=$(basename $reader_dir)
37 |   if ! [ $reader -eq $reader ]; then  # not integer.
38 |     echo "$0: unexpected subdirectory name $reader"
39 |     exit 1;
40 |   fi
41 | 
42 |   reader_gender=$(egrep "^$reader[ ]+\|" $spk_file | awk -F'|' '{gsub(/[ ]+/, ""); print tolower($2)}')
43 |   if [ "$reader_gender" != 'm' ] && [ "$reader_gender" != 'f' ]; then
44 |     echo "Unexpected gender: '$reader_gender'"
45 |     exit 1;
46 |   fi
47 | 
48 |   # reader -> gender map (again using per-chapter granularity)
49 |   echo "${reader} $reader_gender" >>$spk2gender
50 | 
51 |   for chapter_dir in $(find -L $reader_dir/ -mindepth 1 -maxdepth 1 -type d | sort); do
52 |     chapter=$(basename $chapter_dir)
53 |     if ! [ "$chapter" -eq "$chapter" ]; then
54 |       echo "$0: unexpected chapter-subdirectory name $chapter"
55 |       exit 1;
56 |     fi
57 | 
58 |     find -L $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \
59 |       awk -v "dir=$chapter_dir" '{printf "%s flac -c -d -s %s/%s.flac |\n", $0, dir, $0}' >>$wav_scp|| exit 1
60 | 
61 |     chapter_trans=$chapter_dir/${reader}-${chapter}.trans.txt
62 |     [ ! -f  $chapter_trans ] && echo "$0: expected file $chapter_trans to exist" && exit 1
63 |     cat $chapter_trans >>$trans
64 | 
65 |     # NOTE: For now we are using per-chapter utt2spk. That is each chapter is considered
66 |     #       to be a different speaker. This is done for simplicity and because we want
67 |     #       e.g. the CMVN to be calculated per-chapter
68 |     # NOT DOING THE ABOVE FOR ADVERSARIAL TRAINING - KEEPING SPEAKER LEVEL DATA
69 |     awk -v "reader=$reader" '{printf "%s %s\n", $1, reader}' \
70 |       <$chapter_trans >>$utt2spk || exit 1
71 | 
72 |   done
73 | done
74 | 
75 | spk2utt=$dst/spk2utt
76 | utils/utt2spk_to_spk2utt.pl <$utt2spk >$spk2utt || exit 1
77 | 
78 | ntrans=$(wc -l <$trans)
79 | nutt2spk=$(wc -l <$utt2spk)
80 | ! [ "$ntrans" -eq "$nutt2spk" ] && \
81 |   echo "Inconsistent #transcripts($ntrans) and #utt2spk($nutt2spk)" && exit 1;
82 | 
83 | utils/fix_data_dir.sh $dst || exit 1;
84 | utils/validate_data_dir.sh --no-feats $dst || exit 1;
85 | 
86 | echo "$0: successfully prepared data in $dst"
87 | 
88 | exit 0
89 | 


--------------------------------------------------------------------------------
/baseline/local/plot/plot_spk_xvectors.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This is a general TSNE plotting script
 3 | It needs spk2gender and spk_xvector.scp
 4 | '''
 5 | 
 6 | import matplotlib as mpl
 7 | mpl.use('Agg')
 8 | import matplotlib.pyplot as plt
 9 | import matplotlib.lines as mlines
10 | 
11 | from os.path import join
12 | 
13 | import numpy as np
14 | from sklearn.manifold import TSNE
15 | 
16 | import kaldi_io
17 | 
18 | data = 'train'
19 | spk_xvector_dir = f'exp/0007_voxceleb_v2_1a/exp/xvector_nnet_1a/am_nsf/xvectors_train_clean_360_{data}'
20 | spk_xvector_file = join(spk_xvector_dir, 'spk_xvector.scp')
21 | tsne_file = join(spk_xvector_dir, 'spk_xvector.png')
22 | 
23 | spk2gender_file = f'data/am_nsf/train_clean_360_{data}/spk2gender'
24 | 
25 | def get_cmap(n, name='hsv'):
26 |     return plt.cm.get_cmap(name, n)
27 | 
28 | # get gender info
29 | spk2gender = {}
30 | with open(spk2gender_file) as f:
31 |     for line in f.read().splitlines():
32 |         sp = line.split()
33 |         spkid = sp[0]
34 |         gen = sp[1]
35 |         spk2gender[spkid] = gen
36 | 
37 | X = []
38 | spks = []
39 | for key, mat in kaldi_io.read_vec_flt_scp(spk_xvector_file):
40 |     #print(key, mat.shape)
41 |     spks.append(key)
42 |     X.append(mat[np.newaxis])
43 | 
44 | X = np.concatenate(X)
45 | print("X = ", X.shape)
46 | mean_X = np.mean(X, axis=0)
47 | std_X = np.std(X, axis=0)
48 | X = (X - mean_X) / std_X
49 | 
50 | tsne = TSNE(n_components=2, init='random', random_state=42,
51 |                      perplexity=5)
52 | Y = tsne.fit_transform(X)
53 | 
54 | nspk = Y.shape[0]
55 | #nspk = 3
56 | fig = plt.figure()
57 | ax1 = fig.add_subplot(111)
58 | 
59 | #cmap = get_cmap(3, name='tab10') # for male, female and others
60 | #colors = [cmap(i) for i in range(nspk)]
61 | #colors = ['b'] * nspk
62 | #smark = ['s'] * nspk
63 | for i, spkid in enumerate(spks):
64 |     # Check gender
65 |     scolor = 'b'
66 |     smark = '*'
67 |     if spkid in spk2gender:
68 |         if spk2gender[spkid] == 'm':
69 |             scolor = 'g'
70 |         elif spk2gender[spkid] == 'f':
71 |             scolor = 'r'
72 |     ax1.scatter(Y[i, 0], Y[i, 1], c=scolor, s=5, marker=smark)
73 | 
74 | plt.title(f'TSNE for {nspk} speakers in AM&NSF {data}. One vector per speaker.')
75 | 
76 | 
77 | # Legend
78 | #other_leg = mlines.Line2D([], [], color='blue', marker='s', linestyle='None',
79 | #                        markersize=10, label='Others')
80 | #v1male_leg = mlines.Line2D([], [], color='green', marker='*',
81 | #                        linestyle='None', markersize=5, label='Voxceleb1 Male')
82 | #v2male_leg = mlines.Line2D([], [], color='green', marker='^', linestyle='None',
83 | #                        markersize=5, label='Voxceleb2 Male')
84 | #v1female_leg = mlines.Line2D([], [], color='red', marker='*',
85 | #                        linestyle='None', markersize=5, label='Voxceleb1 Female')
86 | #v2female_leg = mlines.Line2D([], [], color='red', marker='^', linestyle='None',
87 | #                        markersize=5, label='Voxceleb2 Female')
88 | v2_leg = mlines.Line2D([], [], color='green', marker='*', linestyle='None',
89 |                         markersize=5, label='Male')
90 | v1_leg = mlines.Line2D([], [], color='red', marker='*',
91 |                         linestyle='None', markersize=5, label='Female')
92 | 
93 | plt.legend(handles=[v1_leg, v2_leg])
94 | 
95 | plt.savefig(tsne_file, dpi=300)
96 | 


--------------------------------------------------------------------------------
/baseline/local/run_prepfeats_am_nsf.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | . path.sh
  4 | . cmd.sh
  5 | 
  6 | set -e
  7 | 
  8 | #===== begin config =======
  9 | nj=40
 10 | stage=0
 11 | 
 12 | # Chain model for PPG extraction
 13 | ppg_model=         # change this to your pretrained chain model
 14 | ppg_dir=           # change this to the dir where PPGs will be stored
 15 | 
 16 | # Xvector extractor
 17 | xvec_nnet_dir=     # change this to pretrained xvector model
 18 | 
 19 | #===== end config =========
 20 | 
 21 | . utils/parse_options.sh
 22 | 
 23 | if [ $# != 2 ]; then
 24 |   echo "Usage: "
 25 |   echo "  $0 [options] <data-dir>"
 26 |   echo "Options"
 27 |   echo "   --nj=40     # Number of CPUs to use for feature extraction"
 28 |   exit 1;
 29 | fi
 30 | 
 31 | # Original data in ./data folder which will be splitted into train, dev and test based on speakers
 32 | train_data="$1" # change this to your actual data
 33 | feat_out_dir="$2"
 34 | 
 35 | # Mel spectrogram config
 36 | melspec_dir=data/${train_data}_mspec
 37 | melspec_file=${melspec_dir}/feats.scp
 38 | 
 39 | # Split data
 40 | dev_spks=20
 41 | test_spks=20
 42 | split_dir=data/am_nsf_train
 43 | 
 44 | # x-vector extraction
 45 | train_split=${train_data}_train
 46 | dev_split=${train_data}_dev
 47 | test_split=${train_data}_test
 48 | split_data="${train_split} ${dev_split} ${test_split}"
 49 | xvec_out_dir=${xvec_nnet_dir}/am_nsf
 50 | 
 51 | # Output directories for netcdf data that will be used by AM & NSF training
 52 | train_out=${feat_out_dir}/am_nsf_train # change this to the dir where train, dev data and scp files will be stored
 53 | test_out=${feat_out_dir}/am_nsf_test # change this to dir where test data will be stored
 54 | 
 55 | 
 56 | # Extract PPG using chain model
 57 | if [ $stage -le 0 ]; then
 58 |   echo "Stage 0: PPG extraction."
 59 |   local/featex/extract_ppg.sh --nj $nj --stage 0 data/${train_data} \
 60 | 	  ${ppg_model} ${ppg_dir}/ppg_${train_data}
 61 | fi
 62 | 
 63 | # Extract 80 dimensional mel spectrograms
 64 | if [ $stage -le 1 ]; then
 65 |   echo "Stage 1: Mel spectrogram extraction."
 66 |   local/featex/extract_melspec.sh --nj $nj data/${train_data} ${melspec_dir}
 67 | fi
 68 | 
 69 | # Split the data into train, dev and test
 70 | if [ $stage -le 2 ]; then
 71 |   echo "Stage 2: Splitting the data into train, dev and test based on speakers."
 72 |   local/featex/00_make_am_nsf_data.sh --dev-spks ${dev_spks} --test-spks ${test_spks} \
 73 | 	  data/${train_data} ${split_dir}
 74 | fi
 75 | 
 76 | # Extract xvectors from each split of data
 77 | if [ $stage -le 3 ]; then
 78 |   echo "Stage 3: x-vector extraction."
 79 |   for sdata in ${split_data}; do
 80 |     local/featex/01_extract_xvectors.sh --nj ${dev_spks} ${split_dir}/${sdata} ${xvec_nnet_dir} \
 81 | 	  ${xvec_out_dir}
 82 |   done
 83 | fi
 84 | 
 85 | # Extract pitch from each split of data
 86 | if [ $stage -le 4 ]; then
 87 |   echo "Stage 4: Pitch extraction."
 88 |   for sdata in ${split_data}; do
 89 |     local/featex/02_extract_pitch.sh --nj ${dev_spks} ${split_dir}/${sdata}
 90 |   done
 91 | fi
 92 | 
 93 | # Create NetCDF data from each split
 94 | if [ $stage -le 5 ]; then
 95 |   echo "Stage 5: Making netcdf data for AM & NSF training."
 96 |   local/featex/03_make_am_nsf_netcdf_data.sh ${train_split} ${dev_split} ${test_split} \
 97 | 	  ${ppg_dir}/ppg_${train_data}/phone_post.scp ${melspec_file} \
 98 | 	  ${xvec_out_dir} ${train_out} ${test_out}
 99 | fi
100 | 


--------------------------------------------------------------------------------
/baseline/local/create_uniform_segments.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2014  Johns Hopkins University (Authors: Daniel Povey, Vijayaditya Peddinti).  Apache 2.0.
 3 | 
 4 | # creates a segments file in the provided data directory
 5 | # into uniform segments with specified window and overlap
 6 | 
 7 | from __future__ import division
 8 | import imp, sys, argparse, os, math, subprocess
 9 | 
10 | min_segment_length = 10 # in seconds
11 | def segment(total_length, window_length, overlap = 0):
12 |   increment = window_length - overlap
13 |   num_windows = int(math.ceil(float(total_length)/increment))
14 |   segments = [(x * increment, min( total_length, (x * increment) + window_length)) for x in range(0, num_windows)]
15 |   if segments[-1][1] - segments[-1][0] < min_segment_length:
16 |     segments[-2] = (segments[-2][0], segments[-1][1])
17 |     segments.pop()
18 |   return segments
19 | 
20 | def get_wave_segments(wav_command, window_length, overlap):
21 |   raw_output = subprocess.check_output(wav_command+" sox -t wav - -n stat 2>&1 | grep Length ", shell = True)
22 |   parts = raw_output.split(":")
23 |   if parts[0].strip() != "Length (seconds)":
24 |     raise Exception("Failed while processing file ", wav_command)
25 |   total_length = float(parts[1])
26 |   segments = segment(total_length, window_length, overlap)
27 |   return segments
28 | 
29 | def prepare_segments_file(kaldi_data_dir, window_length, overlap):
30 |   if not os.path.exists(kaldi_data_dir+'/wav.scp'):
31 |     raise Exception("Not a proper kaldi data directory")
32 |   ids = []
33 |   files = []
34 |   for line in  open(kaldi_data_dir+'/wav.scp').readlines():
35 |     parts = line.split()
36 |     ids.append(parts[0])
37 |     files.append(" ".join(parts[1:]))
38 |   segments_total = []
39 |   segments_per_recording = []
40 |   for i in range(0, len(ids)):
41 |     segments = get_wave_segments(files[i], window_length, overlap)
42 |     segments_current_recording = []
43 |     for segment in segments:
44 |       segment_string = "{0}-{1:06}-{2:06} {0} {3} {4}".format(ids[i], int(segment[0] * 1000), int(segment[1]* 1000), segment[0], segment[1])
45 |       segments_total.append(segment_string)
46 |       segments_current_recording.append(segment_string.split()[0])
47 |     segments_per_recording.append([ids[i], segments_current_recording])
48 |   return segments_total, segments_per_recording
49 | if __name__ == "__main__":
50 |   usage = """ Python script to create segments file with uniform segment
51 |   given the kaldi data directory."""
52 |   sys.stderr.write(str(" ".join(sys.argv)))
53 |   main_parser = argparse.ArgumentParser(usage)
54 |   parser = argparse.ArgumentParser()
55 |   parser.add_argument('--window-length', type = float, default = 30.0, help = 'length of the window used to cut the segment')
56 |   parser.add_argument('--overlap', type = float, default = 5.0, help = 'overlap of neighboring windows')
57 |   parser.add_argument('data_dir', help='directory such as data/train')
58 | 
59 |   params = parser.parse_args()
60 | 
61 |   # write the segments file
62 |   segments_file = open(params.data_dir+"/segments", "w")
63 |   segments, segments_per_recording = prepare_segments_file(params.data_dir, params.window_length, params.overlap)
64 |   segments_file.write("\n".join(segments))
65 |   segments_file.close()
66 | 
67 |   utt2spk_file = open(params.data_dir + "/utt2spk", "w")
68 |   spk2utt_file = open(params.data_dir + "/spk2utt", "w")
69 |   # write the utt2spk file
70 |   # assumes the recording id is the speaker ir
71 |   for i in range(len(segments_per_recording)):
72 |     segments = segments_per_recording[i][1]
73 |     recording = segments_per_recording[i][0]
74 |     spk2utt_file.write("{0} {1}\n".format(recording, " ".join(segments)))
75 |     for segment in segments:
76 |       utt2spk_file.write("{0} {1}\n".format(segment, recording))
77 | 
78 |   spk2utt_file.close()
79 |   utt2spk_file.close()
80 | 
81 | 


--------------------------------------------------------------------------------
/baseline/local/plot/plot_spk_xvectors_voxceleb.py:
--------------------------------------------------------------------------------
  1 | import matplotlib as mpl
  2 | mpl.use('Agg')
  3 | import matplotlib.pyplot as plt
  4 | import matplotlib.lines as mlines
  5 | 
  6 | 
  7 | import numpy as np
  8 | from sklearn.manifold import TSNE
  9 | 
 10 | import kaldi_io
 11 | 
 12 | spk_xvector_file = 'exp/xvector_nnet_1a/xvectors_train/spk_xvector.scp'
 13 | tsne_file = 'exp/xvector_nnet_1a/xvectors_train/voxceleb_spk_xvector_voxversion.png'
 14 | 
 15 | vox1_meta_file = '/home/bsrivast/asr_data/VoxCeleb/voxceleb/vox1_meta_map.csv'
 16 | vox2_meta_file = '/home/bsrivast/asr_data/VoxCeleb/voxceleb2/vox2_meta.csv'
 17 | 
 18 | def get_cmap(n, name='hsv'):
 19 |     return plt.cm.get_cmap(name, n)
 20 | 
 21 | # get gender info
 22 | spk2gender = {}
 23 | spk2vox = {}
 24 | with open(vox1_meta_file) as f:
 25 |     for line in f.read().splitlines():
 26 |         sp = line.split()
 27 |         spkid = sp[1]
 28 |         gen = sp[2]
 29 |         spk2gender[spkid] = gen
 30 |         spk2vox[spkid] = 1
 31 | with open(vox2_meta_file) as f:
 32 |     for line in f.read().splitlines()[1:]:
 33 |         sp = line.split(',')
 34 |         spkid = sp[0].strip()
 35 |         gen = sp[2].strip()
 36 |         spk2gender[spkid] = gen
 37 |         spk2vox[spkid] = 2
 38 | 
 39 | X = []
 40 | spks = []
 41 | for key, mat in kaldi_io.read_vec_flt_scp(spk_xvector_file):
 42 |     #print(key, mat.shape)
 43 |     spks.append(key)
 44 |     X.append(mat[np.newaxis])
 45 | 
 46 | X = np.concatenate(X)
 47 | print("X = ", X.shape)
 48 | mean_X = np.mean(X, axis=0)
 49 | std_X = np.std(X, axis=0)
 50 | X = (X - mean_X) / std_X
 51 | 
 52 | tsne = TSNE(n_components=2, init='random', random_state=42,
 53 |                      perplexity=100)
 54 | Y = tsne.fit_transform(X)
 55 | 
 56 | nspk = Y.shape[0]
 57 | #nspk = 3
 58 | fig = plt.figure()
 59 | ax1 = fig.add_subplot(111)
 60 | 
 61 | #cmap = get_cmap(3, name='tab10') # for male, female and others
 62 | #colors = [cmap(i) for i in range(nspk)]
 63 | #colors = ['b'] * nspk
 64 | #smark = ['s'] * nspk
 65 | for i, spkid in enumerate(spks):
 66 |     # Check gender
 67 |     scolor = 'b'
 68 |     #if spkid in spk2gender:
 69 |     #    if spk2gender[spkid] == 'm':
 70 |     #        scolor = 'g'
 71 |     #    elif spk2gender[spkid] == 'f':
 72 |     #        scolor = 'r'
 73 |     # Check voxceleb version
 74 |     smark = 's'
 75 |     if spkid in spk2vox:
 76 |         if spk2vox[spkid] == 1:
 77 |             smark = '*'
 78 |             scolor = 'r'
 79 |         elif spk2vox[spkid] == 2:
 80 |             smark = '^'
 81 |             scolor = 'g'
 82 | 
 83 |     ax1.scatter(Y[i, 0], Y[i, 1], c=scolor, s=1, marker=smark)
 84 | 
 85 | #ax1.scatter(Y[:, 0], Y[:, 1], c=colors, s=1, marker=smark)
 86 | plt.title(f'TSNE for {nspk} speakers in Voxceleb train. One vector per speaker.')
 87 | 
 88 | 
 89 | # Legend
 90 | #other_leg = mlines.Line2D([], [], color='blue', marker='s', linestyle='None',
 91 | #                        markersize=10, label='Others')
 92 | #v1male_leg = mlines.Line2D([], [], color='green', marker='*',
 93 | #                        linestyle='None', markersize=5, label='Voxceleb1 Male')
 94 | #v2male_leg = mlines.Line2D([], [], color='green', marker='^', linestyle='None',
 95 | #                        markersize=5, label='Voxceleb2 Male')
 96 | #v1female_leg = mlines.Line2D([], [], color='red', marker='*',
 97 | #                        linestyle='None', markersize=5, label='Voxceleb1 Female')
 98 | #v2female_leg = mlines.Line2D([], [], color='red', marker='^', linestyle='None',
 99 | #                        markersize=5, label='Voxceleb2 Female')
100 | v2_leg = mlines.Line2D([], [], color='green', marker='^', linestyle='None',
101 |                         markersize=5, label='Voxceleb2')
102 | v1_leg = mlines.Line2D([], [], color='red', marker='*',
103 |                         linestyle='None', markersize=5, label='Voxceleb1')
104 | 
105 | plt.legend(handles=[v1_leg, v2_leg])
106 | 
107 | plt.savefig(tsne_file, dpi=300)
108 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -e
  4 | 
  5 | nj=$(nproc)
  6 | 
  7 | home=$PWD
  8 | 
  9 | #conda_url=https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
 10 | conda_url=https://repo.anaconda.com/miniconda/Miniconda3-py38_4.10.3-Linux-x86_64.sh
 11 | venv_dir=$PWD/venv
 12 | 
 13 | netcdf=https://github.com/Unidata/netcdf-c/archive/v4.3.3.1.tar.gz
 14 | netcdf_dir=$PWD/netcdf-c-4.3.3.1
 15 | 
 16 | boost=https://netix.dl.sourceforge.net/project/boost/boost/1.59.0/boost_1_59_0.tar.gz
 17 | boost_dir=$PWD/boost_1_59_0
 18 | 
 19 | flac=https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.3.tar.xz
 20 | flac_dir=$PWD/flac-1.3.3
 21 | 
 22 | nii_cmake=$PWD/nii_cmake/CMakeLists.txt
 23 | nii_dir=$PWD/nii
 24 | currennt_dir=$nii_dir/CURRENNT_codes
 25 | 
 26 | mark=.done-venv
 27 | if [ ! -f $mark ]; then
 28 |   echo 'Making python virtual environment'
 29 |   name=$(basename $conda_url)
 30 |   if [ ! -f $name ]; then
 31 |     wget $conda_url || exit 1
 32 |   fi
 33 |   [ ! -f $name ] && echo "File $name does not exist" && exit 1
 34 |   [ -d $venv_dir ] && rm -r $venv_dir
 35 |   sh $name -b -p $venv_dir || exit 1
 36 |   . $venv_dir/bin/activate
 37 |   echo 'Installing python dependencies'
 38 |   pip install -r requirements.txt || exit 1
 39 |   touch $mark
 40 | fi
 41 | echo "if [ \$(which python) != $venv_dir/bin/python ]; then source $venv_dir/bin/activate; fi" > env.sh
 42 | 
 43 | mark=.done-kaldi-tools
 44 | if [ ! -f $mark ]; then
 45 |   echo 'Building Kaldi tools'
 46 |   cd kaldi/tools
 47 |   extras/check_dependencies.sh || exit 1
 48 |   make -j $nj || exit 1
 49 |   cd $home
 50 |   touch $mark
 51 | fi
 52 | 
 53 | mark=.done-kaldi-src
 54 | if [ ! -f $mark ]; then
 55 |   echo 'Building Kaldi src'
 56 |   cd kaldi/src
 57 |   ./configure --shared || exit 1
 58 |   make clean || exit 1
 59 |   make depend -j $nj || exit 1
 60 |   make -j $nj || exit 1
 61 |   cd $home
 62 |   touch $mark
 63 | fi
 64 | 
 65 | mark=.done-netcdf
 66 | if [ ! -f $mark ]; then
 67 |   if [ ! -f $(basename $netcdf) ]; then
 68 |     wget $netcdf || exit 1
 69 |   fi
 70 |   echo 'Unpacking NetCDF source files'
 71 |   dir=$netcdf_dir
 72 |   [ -d $dir ] && rm -r $dir
 73 |   tar -xf $(basename $netcdf) || exit 1
 74 |   echo 'Building NetCDF'
 75 |   build=$dir/build
 76 |   cd $dir
 77 |   ./configure --disable-netcdf-4 --prefix=$build || exit 1
 78 |   make -j $nj || exit 1
 79 |   make install || exit 1
 80 |   cd $home
 81 |   touch $mark
 82 | fi
 83 | netcdf_bin=$netcdf_dir/build/bin
 84 | netcdf_lib=$netcdf_dir/build/lib
 85 | echo "export PATH=$netcdf_bin:\$PATH" >> env.sh
 86 | echo "export LD_LIBRARY_PATH=$netcdf_bin:\$LD_LIBRARY_PATH" >> env.sh
 87 | 
 88 | mark=.done-boost
 89 | if [ ! -f $mark ]; then
 90 |   if [ ! -f $(basename $boost) ]; then
 91 |     wget $boost || exit 1
 92 |   fi
 93 |   echo 'Unpacking boost source files'
 94 |   dir=$boost_dir
 95 |   [ -d $dir ] && rm -r $dir
 96 |   tar -xf $(basename $boost) || exit 1
 97 |   echo 'Building boost libraries'
 98 |   build=$dir/build
 99 |   cd $dir
100 |   ./bootstrap.sh --with-libraries=program_options,filesystem,system,random,thread || exit 1
101 |   ./b2 -j $nj --prefix=$build || exit 1
102 |   cd $home
103 |   touch $mark
104 | fi
105 | boost_root=$boost_dir
106 | echo "export LD_LIBRARY_PATH=$boost_root/stage/lib:\$LD_LIBRARY_PATH" >> env.sh
107 | 
108 | mark=.done-flac
109 | if [ ! -f $mark ]; then
110 |   if [ -z "$(which flac)" ]; then
111 |     if [ ! -f $(basename $flac) ]; then
112 |       wget $flac || exit 1
113 |     fi
114 |     echo 'Unpacking flac source files'
115 |     [ -d $flac_dir ] && rm -r $flac_dir
116 |     tar -xf $(basename $flac) || exit 1
117 |     echo 'Building flac'
118 |     cd $flac_dir
119 |     ./configure --prefix=$PWD/install || exit 1
120 |     make -j $nj || exit 1
121 |     # make -j $nj check || exit 1
122 |     make install || exit 1
123 |   fi
124 |   cd $home
125 |   touch $mark
126 | fi
127 | [ -f $flac_dir/install/bin/flac ] && \
128 |   echo "export PATH=$flac_dir/install/bin:\$PATH" >> env.sh
129 | 
130 | mark=.done-nii
131 | if [ ! -f $mark ]; then
132 |   echo 'Building nii'
133 |   cp $nii_cmake $currennt_dir || exit 1
134 |   dir=$currennt_dir/build
135 |   [ -d $dir ] && rm -r $dir
136 |   mkdir -p $dir || exit 1
137 |   cd $dir
138 |   cmake .. \
139 |     -DCMAKE_BUILD_TYPE=Release \
140 |     -DBOOST_ROOT=$boost_root \
141 |     -DNETCDF_LIB=$netcdf_lib || exit 1
142 |   make -j $(npoc) || exit 1
143 |   cd $home
144 |   touch $mark
145 | fi
146 | echo "export PATH=$currennt_dir/build:\$PATH" >> env.sh
147 | echo "export PYTHONPATH=$currennt_dir:$nii_dir/pyTools:$PWD/nii_scripts:\$PYTHONPATH" >> env.sh
148 | echo "export nii_scripts=$PWD/nii_scripts" >> env.sh
149 | echo "export nii_dir=$nii_dir" >> env.sh
150 | 
151 | echo Done
152 | 


--------------------------------------------------------------------------------
/baseline/local/asv_eval.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -e
  4 | 
  5 | . ./cmd.sh
  6 | . ./path.sh
  7 | 
  8 | nj=$(nproc)
  9 | asv_eval_model=exp/models/asv_eval/xvect_01709_1
 10 | plda_dir=$asv_eval_model/xvect_train_clean_360
 11 | 
 12 | #enrolls=vctk_dev_enrolls
 13 | #trials=vctk_dev_trials_f_common
 14 | 
 15 | enrolls=libri_dev_enrolls
 16 | trials=libri_dev_trials_f
 17 | 
 18 | printf -v results '%(%Y-%m-%d-%H-%M-%S)T' -1
 19 | results=exp/results-$results
 20 | 
 21 | . ./utils/parse_options.sh
 22 | 
 23 | for name in $asv_eval_model/final.raw $plda_dir/plda $plda_dir/mean.vec \
 24 |     $plda_dir/transform.mat data/$enrolls/enrolls data/$trials/trials ; do
 25 |   [ ! -f $name ] && echo "File $name does not exist" && exit 1
 26 | done
 27 | 
 28 | for dset in $enrolls $trials; do
 29 |   data=data/$dset
 30 |   spk2utt=$data/spk2utt
 31 |   [ ! -f $spk2utt ] && echo "File $spk2utt does not exist" && exit 1
 32 |   num_spk=$(wc -l < $spk2utt)
 33 |   njobs=$([ $num_spk -le $nj ] && echo $num_spk || echo $nj)
 34 |   if [ ! -f $data/.done_mfcc ]; then
 35 |     printf "${RED}  compute MFCC: $dset${NC}\n"
 36 |     steps/make_mfcc.sh --nj $njobs --cmd "$train_cmd" \
 37 |       --write-utt2num-frames true $data || exit 1
 38 |     utils/fix_data_dir.sh $data || exit 1
 39 |     touch $data/.done_mfcc
 40 |   fi
 41 |   if [ ! -f $data/.done_vad ]; then
 42 |     printf "${RED}  compute VAD: $dset${NC}\n"
 43 |     sid/compute_vad_decision.sh --nj $njobs --cmd "$train_cmd" $data || exit 1
 44 |     utils/fix_data_dir.sh $data || exit 1
 45 |     touch $data/.done_vad
 46 |   fi
 47 | done
 48 | 
 49 | for dset in $enrolls $trials; do
 50 |   data=data/$dset
 51 |   spk2utt=$data/spk2utt
 52 |   [ ! -f $spk2utt ] && echo "File $spk2utt does not exist" && exit 1
 53 |   num_spk=$(wc -l < $spk2utt)
 54 |   njobs=$([ $num_spk -le $nj ] && echo $num_spk || echo $nj)
 55 |   expo=$asv_eval_model/xvect_$dset
 56 |   if [ ! -f $expo/.done ]; then
 57 |     printf "${RED}  compute x-vect: $dset${NC}\n"
 58 |     sid/nnet3/xvector/extract_xvectors.sh --nj $njobs --cmd "$train_cmd" \
 59 |       $asv_eval_model $data $expo || exit 1
 60 |     touch $expo/.done
 61 |   fi
 62 | done
 63 | 
 64 | expo=$results/ASV-$enrolls-$trials
 65 | if [ ! -f $expo/.done ]; then
 66 |   printf "${RED}  ASV scoring: $expo${NC}\n"
 67 |   mkdir -p $expo
 68 |   xvect_enrolls=$asv_eval_model/xvect_$enrolls/xvector.scp
 69 |   xvect_trials=$asv_eval_model/xvect_$trials/xvector.scp
 70 |   for name in $xvect_enrolls $xvect_trials; do
 71 |     [ ! -f $name ] && echo "File $name does not exist" && exit 1
 72 |   done
 73 |   $train_cmd $expo/log/ivector-plda-scoring.log \
 74 |     sed -r 's/_|-/ /g' data/$enrolls/enrolls \| awk '{split($1, val, "_"); ++num[val[1]]}END{for (spk in num) print spk, num[spk]}' \| \
 75 |       ivector-plda-scoring --normalize-length=true --num-utts=ark:- \
 76 |         "ivector-copy-plda --smoothing=0.0 $plda_dir/plda - |" \
 77 |         "ark:cut -d' ' -f1 data/$enrolls/enrolls | grep -Ff - $xvect_enrolls | ivector-mean ark:data/$enrolls/spk2utt scp:- ark:- | ivector-subtract-global-mean $plda_dir/mean.vec ark:- ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
 78 |         "ark:cut -d' ' -f2 data/$trials/trials | sort | uniq | grep -Ff - $xvect_trials | ivector-subtract-global-mean $plda_dir/mean.vec scp:- ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
 79 |         "cat data/$trials/trials | cut -d' ' --fields=1,2 |" $expo/scores || exit 1
 80 |   eer=`compute-eer <(local/prepare_for_eer.py data/$trials/trials $expo/scores) 2> /dev/null`
 81 |   mindcf1=`sid/compute_min_dcf.py --p-target 0.01 $expo/scores data/$trials/trials 2> /dev/null`
 82 |   mindcf2=`sid/compute_min_dcf.py --p-target 0.001 $expo/scores data/$trials/trials 2> /dev/null`
 83 |   echo "EER: $eer%" | tee $expo/EER
 84 |   echo "minDCF(p-target=0.01): $mindcf1" | tee -a $expo/EER
 85 |   echo "minDCF(p-target=0.001): $mindcf2" | tee -a $expo/EER
 86 |   PYTHONPATH=$(realpath ../cllr) python ../cllr/compute_cllr.py \
 87 |     -k data/$trials/trials -s $expo/scores -e | tee $expo/Cllr || exit 1
 88 | 
 89 |   # Compute linkability
 90 |   PYTHONPATH=$(realpath ../anonymization_metrics) python local/scoring/linkability/compute_linkability.py \
 91 |     -k data/$trials/trials -s $expo/scores \
 92 |     -d -o $expo/linkability | tee $expo/linkability_log || exit 1
 93 | 
 94 |   # Zebra
 95 |   label=$enrolls-$trials
 96 |   PYTHONPATH=$(realpath ../zebra) python ../zebra/zero_evidence.py \
 97 |     -k data/$trials/trials -s $expo/scores -l $label | tee $expo/zebra || exit 1
 98 |     #-k data/$trials/trials -s $expo/scores -l $label -e png | tee $expo/zebra || exit 1
 99 | 
100 |   touch $expo/.done
101 | fi
102 | 


--------------------------------------------------------------------------------
/baseline/local/anon/anonymize_data_dir.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Script for first voice privacy challenge 2020
  3 | #
  4 | # This script anonymizes a kaldi data directory and produces a new 
  5 | # directory with given suffix in the name
  6 | 
  7 | . path.sh
  8 | . cmd.sh
  9 | 
 10 | set -e
 11 | 
 12 | #===== begin config =======
 13 | nj=20
 14 | stage=0
 15 | 
 16 | anoni_pool="libritts_train_other_500" # change this to the data you want to use for anonymization pool
 17 | data_netcdf= # change this to dir where VC features data will be stored
 18 | 
 19 | # Chain model for PPG extraction
 20 | ppg_model=
 21 | ppg_type=
 22 | 
 23 | ppg_dir=exp/nnet3_cleaned # change this to the dir where PPGs will be stored
 24 | 
 25 | # x-vector extraction
 26 | xvec_nnet_dir= # change this to pretrained xvector model downloaded from Kaldi website
 27 | anon_xvec_out_dir=${xvec_nnet_dir}/anon
 28 | 
 29 | plda_dir=${xvec_nnet_dir}/xvectors_train
 30 | 
 31 | pseudo_xvec_rand_level=spk  # spk (all utterances will have same xvector) or utt (each utterance will have randomly selected xvector)
 32 | cross_gender="false"        # true, same gender xvectors will be selected; false, other gender xvectors
 33 | distance="cosine"           # cosine/plda
 34 | proximity="farthest"        # nearest/farthest
 35 | 
 36 | anon_data_suffix=_anon_${pseudo_xvec_rand_level}_${cross_gender}_${distance}_${proximity}
 37 | 
 38 | rand_seed=2020
 39 | 
 40 | #=========== end config ===========
 41 | 
 42 | . utils/parse_options.sh
 43 | 
 44 | if [ $# != 1 ]; then
 45 |   echo "Usage: "
 46 |   echo "  $0 [options] <data-dir>"
 47 |   echo "Options"
 48 |   echo "   --nj=40     # Number of CPUs to use for feature extraction"
 49 |   exit 1;
 50 | fi
 51 | 
 52 | data_dir="$1" # Data to be anonymized, must be in Kaldi format
 53 | 
 54 | spk2utt=data/$data_dir/spk2utt
 55 | [ ! -f $spk2utt ] && echo "File $spk2utt does not exist" && exit 1
 56 | num_spk=$(wc -l < $spk2utt)
 57 | [ $nj -gt $num_spk ] && nj=$num_spk
 58 | 
 59 | # Extract xvectors from data which has to be anonymized
 60 | if [ $stage -le 0 ]; then
 61 |   printf "${RED}\nStage a.0: Extracting xvectors for ${data_dir}.${NC}\n"
 62 |   local/featex/01_extract_xvectors.sh --nj $nj data/${data_dir} ${xvec_nnet_dir} \
 63 | 	  ${anon_xvec_out_dir} || exit 1;
 64 | fi
 65 | 
 66 | # Generate pseudo-speakers for source data
 67 | if [ $stage -le 1 ]; then
 68 |   printf "${RED}\nStage a.1: Generating pseudo-speakers for ${data_dir}.${NC}\n"
 69 |   local/anon/make_pseudospeaker.sh --rand-level ${pseudo_xvec_rand_level} \
 70 |       	  --cross-gender ${cross_gender} --distance ${distance} \
 71 | 	  --proximity ${proximity} --rand-seed ${rand_seed} \
 72 | 	  data/${data_dir} data/${anoni_pool} ${anon_xvec_out_dir} \
 73 | 	  ${plda_dir} || exit 1;
 74 | fi
 75 | 
 76 | # Extract pitch for source data
 77 | if [ $stage -le 2 ]; then
 78 |   printf "${RED}\nStage a.2: Pitch extraction for ${data_dir}.${NC}\n"
 79 |   local/featex/02_extract_pitch.sh --nj ${nj} data/${data_dir} || exit 1;
 80 | fi
 81 | 
 82 | # Extract PPGs for source data
 83 | if [ $stage -le 3 ]; then
 84 |   printf "${RED}\nStage a.3: PPG extraction for ${data_dir}.${NC}\n"
 85 |   local/featex/extract_ppg.sh --nj $nj --stage 0 \
 86 | 	  ${data_dir} ${ppg_model} ${ppg_dir}/ppg_${data_dir} || exit 1;
 87 | fi
 88 | 
 89 | # Create netcdf data for voice conversion
 90 | if [ $stage -le 4 ]; then
 91 |   printf "${RED}\nStage a.4: Make netcdf data for VC.${NC}\n"
 92 |   local/anon/make_netcdf.sh --stage 0 data/${data_dir} ${ppg_dir}/ppg_${data_dir}/phone_post.scp \
 93 | 	  ${anon_xvec_out_dir}/xvectors_${data_dir}/pseudo_xvecs/pseudo_xvector.scp \
 94 | 	  ${data_netcdf}/${data_dir} || exit 1;
 95 | fi
 96 | 
 97 | if [ $stage -le 5 ]; then
 98 |   printf "${RED}\nStage a.5: Extract melspec from acoustic model for ${data_dir}.${NC}\n"
 99 |   local/vc/am/01_gen.sh ${data_netcdf}/${data_dir} ${ppg_type} || exit 1;
100 | fi
101 | 
102 | if [ $stage -le 6 ]; then
103 |   printf "${RED}\nStage a.6: Generate waveform from NSF model for ${data_dir}.${NC}\n"
104 |   local/vc/nsf/01_gen.sh ${data_netcdf}/${data_dir} || exit 1;
105 | fi
106 | 
107 | if [ $stage -le 7 ]; then
108 |   printf "${RED}\nStage a.7: Creating new data directories corresponding to anonymization.${NC}\n"
109 |   wav_path=${data_netcdf}/${data_dir}/nsf_output_wav
110 |   new_data_dir=data/${data_dir}${anon_data_suffix}
111 |   if [ -d "$new_data_dir" ]; then
112 |     rm -rf ${new_data_dir}
113 |   fi
114 |   utils/copy_data_dir.sh data/${data_dir} ${new_data_dir}
115 |   [ -f ${new_data_dir}/feats.scp ] && rm ${new_data_dir}/feats.scp
116 |   [ -f ${new_data_dir}/vad.scp ] && rm ${new_data_dir}/vad.scp
117 |     # Copy new spk2gender in case cross_gender vc has been done
118 |   cp ${anon_xvec_out_dir}/xvectors_${data_dir}/pseudo_xvecs/spk2gender ${new_data_dir}/
119 |   awk -v p="$wav_path" '{print $1, "sox", p"/"$1".wav", "-t wav -R -b 16 - |"}' data/${data_dir}/wav.scp > ${new_data_dir}/wav.scp
120 | fi
121 | 


--------------------------------------------------------------------------------
/baseline/local/nnet3/tuning/run_tdnn_1a.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # this is the standard "tdnn" system, built in nnet3; it's what we use to
  4 | # call multi-splice.
  5 | 
  6 | # without cleanup:
  7 | # local/nnet3/run_tdnn.sh  --train-set train960 --gmm tri6b --nnet3-affix "" &
  8 | 
  9 | 
 10 | # At this script level we don't support not running on GPU, as it would be painfully slow.
 11 | # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
 12 | # --num-threads 16 and --minibatch-size 128.
 13 | 
 14 | # First the options that are passed through to run_ivector_common.sh
 15 | # (some of which are also used in this script directly).
 16 | stage=0
 17 | decode_nj=30
 18 | train_set=train_960_cleaned
 19 | gmm=tri6b_cleaned  # this is the source gmm-dir for the data-type of interest; it
 20 |                    # should have alignments for the specified training data.
 21 | nnet3_affix=_cleaned
 22 | 
 23 | # Options which are not passed through to run_ivector_common.sh
 24 | affix=
 25 | train_stage=-10
 26 | common_egs_dir=
 27 | reporting_email=
 28 | remove_egs=true
 29 | 
 30 | . ./cmd.sh
 31 | . ./path.sh
 32 | . ./utils/parse_options.sh
 33 | 
 34 | 
 35 | if ! cuda-compiled; then
 36 |   cat <<EOF && exit 1
 37 | This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
 38 | If you want to use GPUs (and have them), go to src/, and configure and make on a machine
 39 | where "nvcc" is installed.
 40 | EOF
 41 | fi
 42 | 
 43 | local/nnet3/run_ivector_common.sh --stage $stage \
 44 |                                   --train-set $train_set \
 45 |                                   --gmm $gmm \
 46 |                                   --nnet3-affix "$nnet3_affix" || exit 1;
 47 | 
 48 | 
 49 | gmm_dir=exp/${gmm}
 50 | graph_dir=$gmm_dir/graph_tgsmall
 51 | ali_dir=exp/${gmm}_ali_${train_set}_sp
 52 | dir=exp/nnet3${nnet3_affix}/tdnn${affix:+_$affix}_sp
 53 | train_data_dir=data/${train_set}_sp_hires
 54 | train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
 55 | 
 56 | 
 57 | for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
 58 |      $graph_dir/HCLG.fst $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
 59 |   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
 60 | done
 61 | 
 62 | if [ $stage -le 11 ]; then
 63 |   echo "$0: creating neural net configs";
 64 | 
 65 |   # create the config files for nnet initialization
 66 |   python steps/nnet3/tdnn/make_configs.py  \
 67 |     --feat-dir $train_data_dir \
 68 |     --ivector-dir $train_ivector_dir \
 69 |     --ali-dir $ali_dir \
 70 |     --relu-dim 1280 \
 71 |     --splice-indexes "-2,-1,0,1,2 -1,2 -3,3 -7,2 0"  \
 72 |     --use-presoftmax-prior-scale true \
 73 |    $dir/configs || exit 1;
 74 | fi
 75 | 
 76 | 
 77 | 
 78 | if [ $stage -le 12 ]; then
 79 |   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
 80 |     utils/create_split_dir.pl \
 81 |      /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
 82 |   fi
 83 | 
 84 |   steps/nnet3/train_dnn.py --stage=$train_stage \
 85 |     --cmd="$decode_cmd" \
 86 |     --feat.online-ivector-dir $train_ivector_dir \
 87 |     --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
 88 |     --trainer.num-epochs 4 \
 89 |     --trainer.optimization.num-jobs-initial 3 \
 90 |     --trainer.optimization.num-jobs-final 16 \
 91 |     --trainer.optimization.initial-effective-lrate 0.0017 \
 92 |     --trainer.optimization.final-effective-lrate 0.00017 \
 93 |     --egs.dir "$common_egs_dir" \
 94 |     --cleanup.remove-egs $remove_egs \
 95 |     --cleanup.preserve-model-interval 100 \
 96 |     --feat-dir=$train_data_dir \
 97 |     --ali-dir $ali_dir \
 98 |     --lang data/lang \
 99 |     --reporting.email="$reporting_email" \
100 |     --dir=$dir  || exit 1;
101 | 
102 | fi
103 | 
104 | if [ $stage -le 13 ]; then
105 |   # this does offline decoding that should give about the same results as the
106 |   # real online decoding (the one with --per-utt true)
107 |   rm $dir/.error 2>/dev/null || true
108 |   for test in test_clean test_other dev_clean dev_other; do
109 |     (
110 |     steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
111 |       --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \
112 |       ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1
113 |     steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
114 |       data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed}  || exit 1
115 |     steps/lmrescore_const_arpa.sh \
116 |       --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
117 |       data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1
118 |     steps/lmrescore_const_arpa.sh \
119 |       --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
120 |       data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1
121 |     ) || touch $dir/.error &
122 |   done
123 |   wait
124 |   [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
125 | fi
126 | 
127 | exit 0;
128 | 


--------------------------------------------------------------------------------
/baseline/local/anon/anonymise_dir_mcadams.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3.0
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | @author: Jose Patino, Massimiliano Todisco, Pramod Bachhav, Nicholas Evans
  5 | Audio Security and Privacy Group, EURECOM
  6 | """
  7 | import os
  8 | import librosa
  9 | import numpy as np
 10 | import scipy
 11 | import argparse
 12 | 
 13 | def anonym(file, output_dir, winLengthinms=20, shiftLengthinms=10, lp_order=20, mcadams=0.8):    
 14 |     filename = file[0]
 15 |     filepath = file[1]
 16 |     output_file = output_dir + filename + '.wav'
 17 |     if not os.path.exists(output_dir): os.makedirs(output_dir)
 18 |     sig, fs = librosa.load(filepath,sr=None)    
 19 |     eps = np.finfo(np.float32).eps
 20 |     sig = sig+eps
 21 |     
 22 |     # simulation parameters
 23 |     winlen = np.floor(winLengthinms*0.001*fs).astype(int)
 24 |     shift = np.floor(shiftLengthinms*0.001*fs).astype(int)
 25 |     length_sig = len(sig)
 26 |     
 27 |     # fft processing parameters
 28 |     NFFT = 2**(np.ceil((np.log2(winlen)))).astype(int)
 29 |     # anaysis and synth window which satisfies the constraint
 30 |     wPR = np.hanning(winlen)
 31 |     K = np.sum(wPR)/shift
 32 |     win = np.sqrt(wPR/K)
 33 |     Nframes = 1+np.floor((length_sig-winlen)/shift).astype(int) # nr of complete frames
 34 |     
 35 |     # carry out the overlap - add FFT processing
 36 |     sig_rec = np.zeros([length_sig]) # allocate output+'ringing' vector
 37 |     
 38 |     for m in np.arange(1,Nframes):
 39 |         # indices of the mth frame
 40 |         index = np.arange(m*shift,np.minimum(m*shift+winlen,length_sig))    
 41 |         # windowed mth frame (other than rectangular window)
 42 |         frame = sig[index]*win 
 43 |         # get lpc coefficients
 44 |         a_lpc = librosa.core.lpc(frame+eps,lp_order)
 45 |         # get poles
 46 |         poles = scipy.signal.tf2zpk(np.array([1]), a_lpc)[1]
 47 |         #index of imaginary poles
 48 |         ind_imag = np.where(np.isreal(poles)==False)[0]
 49 |         #index of first imaginary poles
 50 |         ind_imag_con = ind_imag[np.arange(0,np.size(ind_imag),2)]
 51 |         
 52 |         # here we define the new angles of the poles, shifted accordingly to the mcadams coefficient
 53 |         # values >1 expand the spectrum, while values <1 constract it for angles>1
 54 | 	# values >1 constract the spectrum, while values <1 expand it for angles<1
 55 | 	# the choice of this value is strongly linked to the number of lpc coefficients
 56 |         # a bigger lpc coefficients number constraints the effect of the coefficient to very small variations
 57 |         # a smaller lpc coefficients number allows for a bigger flexibility
 58 |         new_angles = np.angle(poles[ind_imag_con])**mcadams
 59 | 
 60 |         # make sure new angles stay between 0 and pi
 61 |         new_angles[np.where(new_angles>=np.pi)] = np.pi        
 62 |         new_angles[np.where(new_angles<=0)] = 0  
 63 |         
 64 |         # copy of the original poles to be adjusted with the new angles
 65 |         new_poles = poles
 66 |         for k in np.arange(np.size(ind_imag_con)):
 67 |             # compute new poles with the same magnitued and new angles
 68 |             new_poles[ind_imag_con[k]] = np.abs(poles[ind_imag_con[k]])*np.exp(1j*new_angles[k])
 69 |             # applied also to the conjugate pole
 70 |             new_poles[ind_imag_con[k]+1] = np.abs(poles[ind_imag_con[k]+1])*np.exp(-1j*new_angles[k])            
 71 |         
 72 |         # recover new, modified lpc coefficients
 73 |         a_lpc_new = np.real(np.poly(new_poles))
 74 |         # get residual excitation for reconstruction
 75 |         res = scipy.signal.lfilter(a_lpc,np.array(1),frame)
 76 |         # reconstruct frames with new lpc coefficient
 77 |         frame_rec = scipy.signal.lfilter(np.array([1]),a_lpc_new,res)
 78 |         frame_rec = frame_rec*win    
 79 |  
 80 |         outindex = np.arange(m*shift,m*shift+len(frame_rec))
 81 |         # overlap add
 82 |         sig_rec[outindex] = sig_rec[outindex] + frame_rec
 83 |     sig_rec = sig_rec/np.max(np.abs(sig_rec))
 84 |     scipy.io.wavfile.write(output_file, fs, np.float32(sig_rec)) 
 85 |     return []
 86 | 
 87 | if __name__ == "__main__":
 88 |     #Parse args    
 89 |     parser = argparse.ArgumentParser()
 90 |     parser.add_argument('--data_dir',type=str,default='../data/libri_test_enrolls_anon')
 91 |     parser.add_argument('--anon_suffix',type=str,default='_anon')
 92 |     parser.add_argument('--n_coeffs',type=int,default=20)
 93 |     parser.add_argument('--mc_coeff',type=float,default=0.8)
 94 |     parser.add_argument('--winLengthinms',type=int,default=20)
 95 |     parser.add_argument('--shiftLengthinms',type=int,default=10)
 96 |     config = parser.parse_args()
 97 |     
 98 |     #Load protocol file
 99 |     list_name= config.data_dir + '/wav.scp'
100 |     list_files = np.genfromtxt(list_name,dtype='U')
101 |     
102 |     config.data_dir = config.data_dir+config.anon_suffix
103 |     
104 |     for idx,file in enumerate(list_files):   
105 |         print(str(idx+1),'/',len(list_files))
106 |         anonym(file, output_dir=config.data_dir+'/wav/'+file[0]+'/', winLengthinms=config.winLengthinms, shiftLengthinms=config.shiftLengthinms, lp_order=config.n_coeffs, mcadams=config.mc_coeff)
107 |        
108 | 


--------------------------------------------------------------------------------
/baseline/local/fix_eval2.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Usage: python local/fix_eval2.py <protocol_dir> <enroll_dir> <trial_dir>
  3 | '''
  4 | import sys
  5 | import os
  6 | import shutil
  7 | from os.path import join, exists
  8 | 
  9 | args = sys.argv
 10 | proto_path = args[1]
 11 | enroll_dir = args[2]
 12 | trial_dir = args[3]
 13 | 
 14 | enroll_files = ["enroll.txt"]
 15 | trial_files = ["trials.txt"]
 16 | 
 17 | # Prepare enroll data
 18 | wav_scp = []
 19 | utt2spk = []
 20 | spk2gender = []
 21 | text = []
 22 | 
 23 | enroll_spks = set()
 24 | enroll_uttids = set()
 25 | 
 26 | for ef in enroll_files:
 27 |     ef_path = join(proto_path, ef)
 28 |     with open(ef_path) as f:
 29 |         for line in f.read().splitlines():
 30 |             line = line.strip().split()
 31 | 
 32 |             # Register all speaker ids
 33 |             spkid = line[0]
 34 |             enroll_spks.add(spkid)
 35 | 
 36 |             # Register all utterance ids
 37 |             uttarr = line[1].split(',')
 38 |             for utt in uttarr:
 39 |                 uttid = utt.split('/')[-1].split('.')[0]
 40 |                 enroll_uttids.add(uttid)
 41 | 
 42 | # Filter wav.scp, utt2spk, text and spk2gender
 43 | # based on spkids and uttids
 44 | with open(join(enroll_dir, 'wav.scp')) as f:
 45 |     for line in f.readlines():
 46 |         uttid = line.split()[0]
 47 |         if uttid in enroll_uttids:
 48 |             wav_scp.append(line)
 49 | with open(join(enroll_dir, 'text')) as f:
 50 |     for line in f.readlines():
 51 |         uttid = line.split()[0]
 52 |         if uttid in enroll_uttids:
 53 |             text.append(line)
 54 | with open(join(enroll_dir, 'utt2spk')) as f:
 55 |     for line in f.readlines():
 56 |         uttid = line.split()[0]
 57 |         if uttid in enroll_uttids:
 58 |             utt2spk.append(line)
 59 | with open(join(enroll_dir, 'spk2gender')) as f:
 60 |     for line in f.readlines():
 61 |         spkid = line.split()[0]
 62 |         if spkid in enroll_spks:
 63 |             spk2gender.append(line)
 64 | with open(join(enroll_dir, 'wav.scp'), 'w') as f:
 65 |     for line in wav_scp:
 66 |         f.write(line)
 67 | with open(join(enroll_dir, 'text'), 'w') as f:
 68 |     for line in text:
 69 |         f.write(line)
 70 | with open(join(enroll_dir, 'utt2spk'), 'w') as f:
 71 |     for line in utt2spk:
 72 |         f.write(line)
 73 | with open(join(enroll_dir, 'spk2gender'), 'w') as f:
 74 |     for line in spk2gender:
 75 |         f.write(line)
 76 | 
 77 | 
 78 | 
 79 | # Prepare trial data
 80 | wav_scp = []
 81 | utt2spk = []
 82 | spk2gender = []
 83 | text = []
 84 | trials_male = []
 85 | trials_female = []
 86 | 
 87 | trial_spks = set()
 88 | trial_uttids = set()
 89 | 
 90 | for i, tf in enumerate(trial_files):
 91 |     tf_path = join(proto_path, tf)
 92 |     with open(tf_path) as f:
 93 |         for line in f.read().splitlines():
 94 |             line = line.strip().split()
 95 |             spkid = line[0]
 96 |             utt = line[1]
 97 |             target_type = line[2]
 98 |             gender = line[3]
 99 | 
100 |             uttid = utt.split('/')[-1].split('.')[0]
101 |             utt_spkid = uttid.split('-')[0]
102 | 
103 |             trial_spks.add(spkid)
104 |             trial_spks.add(utt_spkid)
105 |             trial_uttids.add(uttid)
106 | 
107 |             if gender == 'M':
108 |                 trials_male.append(spkid + ' ' + uttid + ' ' + target_type)
109 |             else:
110 |                 trials_female.append(spkid + ' ' + uttid + ' ' + target_type)
111 | 
112 | 
113 | # Filter wav.scp, utt2spk and spk2gender
114 | # based on spkids and uttids
115 | with open(join(trial_dir, 'wav.scp')) as f:
116 |     for line in f.readlines():
117 |         uttid = line.split()[0]
118 |         if uttid in trial_uttids:
119 |             wav_scp.append(line)
120 | with open(join(trial_dir, 'text')) as f:
121 |     for line in f.readlines():
122 |         uttid = line.split()[0]
123 |         if uttid in trial_uttids:
124 |             text.append(line)
125 | with open(join(trial_dir, 'utt2spk')) as f:
126 |     for line in f.readlines():
127 |         uttid = line.split()[0]
128 |         if uttid in trial_uttids:
129 |             utt2spk.append(line)
130 | with open(join(trial_dir, 'spk2gender')) as f:
131 |     for line in f.readlines():
132 |         spkid = line.split()[0]
133 |         if spkid in trial_spks:
134 |             spk2gender.append(line)
135 | with open(join(trial_dir, 'wav.scp'), 'w') as f:
136 |     for line in wav_scp:
137 |         f.write(line)
138 | with open(join(trial_dir, 'text'), 'w') as f:
139 |     for line in text:
140 |         f.write(line)
141 | with open(join(trial_dir, 'utt2spk'), 'w') as f:
142 |     for line in utt2spk:
143 |         f.write(line)
144 | with open(join(trial_dir, 'spk2gender'), 'w') as f:
145 |     for line in spk2gender:
146 |         f.write(line)
147 | 
148 | 
149 | all_trials = sorted(trials_male + trials_female)
150 | with open(join(trial_dir, 'trials'), 'w') as f:
151 |     f.write('\n'.join(all_trials) + '\n')
152 | 
153 | tt_male = sorted(trials_male)
154 | tt_female = sorted(trials_female)
155 | with open(join(trial_dir, 'trials_male'), 'w') as f:
156 |     f.write('\n'.join(tt_male) + '\n')
157 | with open(join(trial_dir, 'trials_female'), 'w') as f:
158 |     f.write('\n'.join(tt_female) + '\n')
159 | 
160 | 


--------------------------------------------------------------------------------
/baseline/local/chain/compare_wer.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # this script is used for comparing decoding results between systems.
  4 | # e.g. local/chain/compare_wer.sh exp/chain/tdnn_{c,d}_sp
  5 | # For use with discriminatively trained systems you specify the epochs after a colon:
  6 | # for instance,
  7 | # local/chain/compare_wer.sh exp/chain/tdnn_c_sp exp/chain/tdnn_c_sp_smbr:{1,2,3}
  8 | 
  9 | 
 10 | if [ $# == 0 ]; then
 11 |   echo "Usage: $0: [--looped] [--online] <dir1> [<dir2> ... ]"
 12 |   echo "e.g.: $0 exp/chain/tdnn_{b,c}_sp"
 13 |   echo "or (with epoch numbers for discriminative training):"
 14 |   echo "$0 exp/chain/tdnn_b_sp_disc:{1,2,3}"
 15 |   exit 1
 16 | fi
 17 | 
 18 | echo "# $0 $*"
 19 | 
 20 | include_looped=false
 21 | if [ "$1" == "--looped" ]; then
 22 |   include_looped=true
 23 |   shift
 24 | fi
 25 | include_online=false
 26 | if [ "$1" == "--online" ]; then
 27 |   include_online=true
 28 |   shift
 29 | fi
 30 | 
 31 | 
 32 | used_epochs=false
 33 | 
 34 | # this function set_names is used to separate the epoch-related parts of the name
 35 | # [for discriminative training] and the regular parts of the name.
 36 | # If called with a colon-free directory name, like:
 37 | #  set_names exp/chain/tdnn_lstm1e_sp_bi_smbr
 38 | # it will set dir=exp/chain/tdnn_lstm1e_sp_bi_smbr and epoch_infix=""
 39 | # If called with something like:
 40 | #  set_names exp/chain/tdnn_d_sp_smbr:3
 41 | # it will set dir=exp/chain/tdnn_d_sp_smbr and epoch_infix="_epoch3"
 42 | 
 43 | 
 44 | set_names() {
 45 |   if [ $# != 1 ]; then
 46 |     echo "compare_wer_general.sh: internal error"
 47 |     exit 1  # exit the program
 48 |   fi
 49 |   dirname=$(echo $1 | cut -d: -f1)
 50 |   epoch=$(echo $1 | cut -s -d: -f2)
 51 |   if [ -z $epoch ]; then
 52 |     epoch_infix=""
 53 |   else
 54 |     used_epochs=true
 55 |     epoch_infix=_epoch${epoch}
 56 |   fi
 57 | }
 58 | 
 59 | 
 60 | 
 61 | echo -n "# System                     "
 62 | for x in $*; do   printf "% 10s" " $(basename $x)";   done
 63 | echo
 64 | 
 65 | strings=(
 66 |   "# WER on dev(fglarge)        "
 67 |   "# WER on dev(tglarge)        "
 68 |   "# WER on dev(tgmed)          "
 69 |   "# WER on dev(tgsmall)        "
 70 |   "# WER on dev_other(fglarge)  "
 71 |   "# WER on dev_other(tglarge)  "
 72 |   "# WER on dev_other(tgmed)    "
 73 |   "# WER on dev_other(tgsmall)  "
 74 |   "# WER on test(fglarge)       "
 75 |   "# WER on test(tglarge)       "
 76 |   "# WER on test(tgmed)         "
 77 |   "# WER on test(tgsmall)       "
 78 |   "# WER on test_other(fglarge) "
 79 |   "# WER on test_other(tglarge) "
 80 |   "# WER on test_other(tgmed)   "
 81 |   "# WER on test_other(tgsmall) ")
 82 | 
 83 | for n in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
 84 |    echo -n "${strings[$n]}"
 85 |    for x in $*; do
 86 |      set_names $x  # sets $dirname and $epoch_infix
 87 |      decode_names=(dev_clean_fglarge dev_clean_tglarge dev_clean_tgmed dev_clean_tgsmall dev_other_fglarge dev_other_tglarge dev_other_tgmed dev_other_tgsmall test_clean_fglarge test_clean_tglarge test_clean_tgmed test_clean_tgsmall test_other_fglarge test_other_tglarge test_other_tgmed test_other_tgsmall)
 88 | 
 89 |      wer=$(grep WER $dirname/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
 90 |      printf "% 10s" $wer
 91 |    done
 92 |    echo
 93 |    if $include_looped; then
 94 |      echo -n "#             [looped:]    "
 95 |      for x in $*; do
 96 |        set_names $x  # sets $dirname and $epoch_infix
 97 |        wer=$(grep WER $dirname/decode_looped_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
 98 |        printf "% 10s" $wer
 99 |      done
100 |      echo
101 |    fi
102 |    if $include_online; then
103 |      echo -n "#             [online:]    "
104 |      for x in $*; do
105 |        set_names $x  # sets $dirname and $epoch_infix
106 |        wer=$(grep WER ${dirname}_online/decode_${decode_names[$n]}/wer_* | utils/best_wer.sh | awk '{print $2}')
107 |        printf "% 10s" $wer
108 |      done
109 |      echo
110 |    fi
111 | done
112 | 
113 | 
114 | if $used_epochs; then
115 |   exit 0;  # the diagnostics aren't comparable between regular and discriminatively trained systems.
116 | fi
117 | 
118 | 
119 | echo -n "# Final train prob           "
120 | for x in $*; do
121 |   prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
122 |   printf "% 10s" $prob
123 | done
124 | echo
125 | 
126 | echo -n "# Final valid prob           "
127 | for x in $*; do
128 |   prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
129 |   printf "% 10s" $prob
130 | done
131 | echo
132 | 
133 | echo -n "# Final train prob (xent)    "
134 | for x in $*; do
135 |   prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
136 |   printf "% 10s" $prob
137 | done
138 | echo
139 | 
140 | echo -n "# Final valid prob (xent)    "
141 | for x in $*; do
142 |   prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
143 |   printf "% 10s" $prob
144 | done
145 | echo
146 | 
147 | echo -n "# Num-parameters             "
148 | for x in $*; do
149 |   num_params=$(grep num-parameters $x/log/progress.1.log | awk '{print $2}')
150 |   printf "% 10d" $num_params
151 | done
152 | echo
153 | 


--------------------------------------------------------------------------------
/baseline/run_asr_eval_train.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -e
  4 | 
  5 | 
  6 | 
  7 | stage=6
  8 | 
  9 | . ./cmd.sh
 10 | . ./path.sh
 11 | 
 12 | nj=10
 13 | 
 14 | . parse_options.sh
 15 | 
 16 | train="train_clean_360"
 17 | 
 18 | if [ $stage -le 6 ]; then
 19 |   for part in dev_clean test_clean $train; do
 20 |     steps/make_mfcc.sh --cmd "$train_cmd" --nj $nj data/$part exp/make_mfcc/$part $mfccdir
 21 |     steps/compute_cmvn_stats.sh data/$part exp/make_mfcc/$part $mfccdir
 22 |   done
 23 | fi
 24 | 
 25 | if [ $stage -le 7 ]; then
 26 |   utils/subset_data_dir.sh --shortest data/$train 5000 data/train_5k
 27 | fi
 28 | 
 29 | if [ $stage -le 8 ]; then
 30 |   steps/train_mono.sh \
 31 |     --boost-silence 1.25 --nj $nj --cmd "$train_cmd" \
 32 |     data/train_5k data/lang_nosp exp/mono
 33 |   (
 34 |     utils/mkgraph.sh \
 35 | 	  data/lang_nosp_test_tgsmall \
 36 |       exp/mono exp/mono/graph_nosp_tgsmall
 37 |     for test in test_clean dev_clean; do
 38 |       steps/decode.sh \
 39 | 	    --nj $nj --cmd "$decode_cmd" \
 40 | 		exp/mono/graph_nosp_tgsmall \
 41 |         data/$test exp/mono/decode_nosp_tgsmall_$test
 42 |     done
 43 |   )&
 44 | fi
 45 | 
 46 | if [ $stage -le 9 ]; then
 47 |   steps/align_si.sh \
 48 |     --boost-silence 1.25 --nj $nj --cmd "$train_cmd" \
 49 |     data/$train data/lang_nosp exp/mono exp/mono_ali
 50 |   steps/train_deltas.sh \
 51 |     --boost-silence 1.25 --cmd "$train_cmd" \
 52 |     2000 20000 data/$train data/lang_nosp \
 53 | 	exp/mono_ali exp/tri1
 54 |   (
 55 |     utils/mkgraph.sh \
 56 | 	  data/lang_nosp_test_tgsmall \
 57 |       exp/tri1 exp/tri1/graph_nosp_tgsmall
 58 |     for test in test_clean dev_clean; do
 59 |       steps/decode.sh --nj $nj --cmd "$decode_cmd" exp/tri1/graph_nosp_tgsmall \
 60 |                       data/$test exp/tri1/decode_nosp_tgsmall_$test
 61 |       steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
 62 |                          data/$test exp/tri1/decode_nosp_{tgsmall,tgmed}_$test
 63 |       steps/lmrescore_const_arpa.sh \
 64 |         --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
 65 |         data/$test exp/tri1/decode_nosp_{tgsmall,tglarge}_$test
 66 |     done
 67 |   )&
 68 | fi
 69 | 
 70 | if [ $stage -le 10 ]; then
 71 |   steps/align_si.sh --nj $nj --cmd "$train_cmd" \
 72 |                     data/$train data/lang_nosp exp/tri1 exp/tri1_ali
 73 |   steps/train_lda_mllt.sh --cmd "$train_cmd" \
 74 |                           --splice-opts "--left-context=3 --right-context=3" 2500 25000 \
 75 |                           data/$train data/lang_nosp exp/tri1_ali exp/tri2b
 76 |   (
 77 |     utils/mkgraph.sh data/lang_nosp_test_tgsmall \
 78 |                      exp/tri2b exp/tri2b/graph_nosp_tgsmall
 79 |     for test in test_clean dev_clean; do
 80 |       steps/decode.sh --nj $nj --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgsmall \
 81 |                       data/$test exp/tri2b/decode_nosp_tgsmall_$test
 82 |       steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
 83 |                          data/$test exp/tri2b/decode_nosp_{tgsmall,tgmed}_$test
 84 |       steps/lmrescore_const_arpa.sh \
 85 |         --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
 86 |         data/$test exp/tri2b/decode_nosp_{tgsmall,tglarge}_$test
 87 |     done
 88 |   )&
 89 | fi
 90 | 
 91 | if [ $stage -le 11 ]; then
 92 |   steps/align_si.sh  --nj $nj --cmd "$train_cmd" --use-graphs true \
 93 |                      data/$train data/lang_nosp exp/tri2b exp/tri2b_ali
 94 |   steps/train_sat.sh --cmd "$train_cmd" 3000 45000 \
 95 |                      data/$train data/lang_nosp exp/tri2b_ali exp/tri3b
 96 |   (
 97 |     utils/mkgraph.sh data/lang_nosp_test_tgsmall \
 98 |                      exp/tri3b exp/tri3b/graph_nosp_tgsmall
 99 |     for test in test_clean dev_clean; do
100 |       steps/decode_fmllr.sh --nj $nj --cmd "$decode_cmd" \
101 |                             exp/tri3b/graph_nosp_tgsmall data/$test \
102 |                             exp/tri3b/decode_nosp_tgsmall_$test
103 |       steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
104 |                          data/$test exp/tri3b/decode_nosp_{tgsmall,tgmed}_$test
105 |       steps/lmrescore_const_arpa.sh \
106 |         --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tglarge} \
107 |         data/$test exp/tri3b/decode_nosp_{tgsmall,tglarge}_$test
108 |     done
109 |   )&
110 | fi
111 | 
112 | if [ $stage -le 19 ]; then
113 |   # this does some data-cleaning. The cleaned data should be useful when we add
114 |   # the neural net and chain systems.  (although actually it was pretty clean already.)
115 |   local/run_cleanup_segmentation.sh --data "data/$train"
116 | fi
117 | 
118 | if [ $stage -le 20 ]; then
119 |   # train and test nnet3 tdnn models on the entire data with data-cleaning.
120 |   # set "--stage 11" if you have already run local/nnet3/run_tdnn.sh
121 | 
122 |  
123 | #  local/chain/run_tdnn_1d__360.sh
124 |   local/chain/run_tdnn_1d__360.sh
125 | 
126 | #  --stage 15 --train_stage 563
127 | 
128 | #  local/chain/run_tdnn.sh \
129 | #    --stage 3 \
130 | #	--train_stage -10
131 | fi
132 | 
133 | # The nnet3 TDNN recipe:
134 | # local/nnet3/run_tdnn.sh # set "--stage 11" if you have already run local/chain/run_tdnn.sh
135 | 
136 | # # train models on cleaned-up data
137 | # # we've found that this isn't helpful-- see the comments in local/run_data_cleaning.sh
138 | # local/run_data_cleaning.sh
139 | 
140 | # Wait for decodings in the background
141 | wait
142 | echo Done
143 | 


--------------------------------------------------------------------------------
/baseline/local/nnet3/run_tdnn.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # 1b is as 1a but uses xconfigs.
  4 | 
  5 | # this is the standard "tdnn" system, built in nnet3; it's what we use to
  6 | # call multi-splice.
  7 | 
  8 | # without cleanup:
  9 | # local/nnet3/run_tdnn.sh  --train-set train960 --gmm tri6b --nnet3-affix "" &
 10 | 
 11 | 
 12 | # At this script level we don't support not running on GPU, as it would be painfully slow.
 13 | # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
 14 | # --num-threads 16 and --minibatch-size 128.
 15 | 
 16 | # First the options that are passed through to run_ivector_common.sh
 17 | # (some of which are also used in this script directly).
 18 | stage=0
 19 | decode_nj=30
 20 | train_set=train_960_cleaned
 21 | gmm=tri6b_cleaned  # this is the source gmm-dir for the data-type of interest; it
 22 |                    # should have alignments for the specified training data.
 23 | nnet3_affix=_cleaned
 24 | 
 25 | # Options which are not passed through to run_ivector_common.sh
 26 | affix=
 27 | train_stage=-10
 28 | common_egs_dir=
 29 | reporting_email=
 30 | remove_egs=true
 31 | 
 32 | . ./cmd.sh
 33 | . ./path.sh
 34 | . ./utils/parse_options.sh
 35 | 
 36 | 
 37 | if ! cuda-compiled; then
 38 |   cat <<EOF && exit 1
 39 | This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
 40 | If you want to use GPUs (and have them), go to src/, and configure and make on a machine
 41 | where "nvcc" is installed.
 42 | EOF
 43 | fi
 44 | 
 45 | local/nnet3/run_ivector_common.sh --stage $stage \
 46 |                                   --train-set $train_set \
 47 |                                   --gmm $gmm \
 48 |                                   --nnet3-affix "$nnet3_affix" || exit 1;
 49 | 
 50 | 
 51 | gmm_dir=exp/${gmm}
 52 | graph_dir=$gmm_dir/graph_tgsmall
 53 | ali_dir=exp/${gmm}_ali_${train_set}_sp
 54 | dir=exp/nnet3${nnet3_affix}/tdnn${affix:+_$affix}_sp
 55 | train_data_dir=data/${train_set}_sp_hires
 56 | train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
 57 | 
 58 | 
 59 | for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
 60 |      $graph_dir/HCLG.fst $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
 61 |   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
 62 | done
 63 | 
 64 | if [ $stage -le 11 ]; then
 65 |   echo "$0: creating neural net configs";
 66 | 
 67 |   num_targets=$(tree-info $ali_dir/tree |grep num-pdfs|awk '{print $2}')
 68 | 
 69 |   mkdir -p $dir/configs
 70 |   cat <<EOF > $dir/configs/network.xconfig
 71 |   input dim=100 name=ivector
 72 |   input dim=40 name=input
 73 |   fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
 74 | 
 75 |   relu-batchnorm-layer name=tdnn0 dim=1280
 76 |   relu-batchnorm-layer name=tdnn1 dim=1280 input=Append(-1,2)
 77 |   relu-batchnorm-layer name=tdnn2 dim=1280 input=Append(-3,3)
 78 |   relu-batchnorm-layer name=tdnn3 dim=1280 input=Append(-7,2)
 79 |   relu-batchnorm-layer name=tdnn4 dim=1280
 80 |   output-layer name=output input=tdnn4 dim=$num_targets max-change=1.5
 81 | EOF
 82 |   steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
 83 |     --config-dir $dir/configs || exit 1;
 84 | fi
 85 | 
 86 | if [ $stage -le 12 ]; then
 87 |   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
 88 |     utils/create_split_dir.pl \
 89 |      /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
 90 |   fi
 91 | 
 92 |   steps/nnet3/train_dnn.py --stage=$train_stage \
 93 |     --cmd="$decode_cmd" \
 94 |     --feat.online-ivector-dir $train_ivector_dir \
 95 |     --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
 96 |     --trainer.num-epochs 4 \
 97 |     --trainer.optimization.num-jobs-initial 3 \
 98 |     --trainer.optimization.num-jobs-final 16 \
 99 |     --trainer.optimization.initial-effective-lrate 0.0017 \
100 |     --trainer.optimization.final-effective-lrate 0.00017 \
101 |     --egs.dir "$common_egs_dir" \
102 |     --cleanup.remove-egs $remove_egs \
103 |     --cleanup.preserve-model-interval 100 \
104 |     --feat-dir=$train_data_dir \
105 |     --ali-dir $ali_dir \
106 |     --lang data/lang \
107 |     --reporting.email="$reporting_email" \
108 |     --dir=$dir  || exit 1;
109 | 
110 | fi
111 | 
112 | if [ $stage -le 13 ]; then
113 |   # this does offline decoding that should give about the same results as the
114 |   # real online decoding (the one with --per-utt true)
115 |   rm $dir/.error 2>/dev/null || true
116 |   for test in test_clean test_other dev_clean dev_other; do
117 |     (
118 |     steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
119 |       --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \
120 |       ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1
121 |     steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
122 |       data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed}  || exit 1
123 |     steps/lmrescore_const_arpa.sh \
124 |       --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
125 |       data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1
126 |     steps/lmrescore_const_arpa.sh \
127 |       --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
128 |       data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1
129 |     ) || touch $dir/.error &
130 |   done
131 |   wait
132 |   [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
133 | fi
134 | 
135 | exit 0;
136 | 


--------------------------------------------------------------------------------
/baseline/local/nnet3/tuning/run_tdnn_1b.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # 1b is as 1a but uses xconfigs.
  4 | 
  5 | # this is the standard "tdnn" system, built in nnet3; it's what we use to
  6 | # call multi-splice.
  7 | 
  8 | # without cleanup:
  9 | # local/nnet3/run_tdnn.sh  --train-set train960 --gmm tri6b --nnet3-affix "" &
 10 | 
 11 | 
 12 | # At this script level we don't support not running on GPU, as it would be painfully slow.
 13 | # If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
 14 | # --num-threads 16 and --minibatch-size 128.
 15 | 
 16 | # First the options that are passed through to run_ivector_common.sh
 17 | # (some of which are also used in this script directly).
 18 | stage=0
 19 | decode_nj=30
 20 | train_set=train_960_cleaned
 21 | gmm=tri6b_cleaned  # this is the source gmm-dir for the data-type of interest; it
 22 |                    # should have alignments for the specified training data.
 23 | nnet3_affix=_cleaned
 24 | 
 25 | # Options which are not passed through to run_ivector_common.sh
 26 | affix=
 27 | train_stage=-10
 28 | common_egs_dir=
 29 | reporting_email=
 30 | remove_egs=true
 31 | 
 32 | . ./cmd.sh
 33 | . ./path.sh
 34 | . ./utils/parse_options.sh
 35 | 
 36 | 
 37 | if ! cuda-compiled; then
 38 |   cat <<EOF && exit 1
 39 | This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
 40 | If you want to use GPUs (and have them), go to src/, and configure and make on a machine
 41 | where "nvcc" is installed.
 42 | EOF
 43 | fi
 44 | 
 45 | local/nnet3/run_ivector_common.sh --stage $stage \
 46 |                                   --train-set $train_set \
 47 |                                   --gmm $gmm \
 48 |                                   --nnet3-affix "$nnet3_affix" || exit 1;
 49 | 
 50 | 
 51 | gmm_dir=exp/${gmm}
 52 | graph_dir=$gmm_dir/graph_tgsmall
 53 | ali_dir=exp/${gmm}_ali_${train_set}_sp
 54 | dir=exp/nnet3${nnet3_affix}/tdnn${affix:+_$affix}_sp
 55 | train_data_dir=data/${train_set}_sp_hires
 56 | train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
 57 | 
 58 | 
 59 | for f in $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
 60 |      $graph_dir/HCLG.fst $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
 61 |   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
 62 | done
 63 | 
 64 | if [ $stage -le 11 ]; then
 65 |   echo "$0: creating neural net configs";
 66 | 
 67 |   num_targets=$(tree-info $ali_dir/tree |grep num-pdfs|awk '{print $2}')
 68 | 
 69 |   mkdir -p $dir/configs
 70 |   cat <<EOF > $dir/configs/network.xconfig
 71 |   input dim=100 name=ivector
 72 |   input dim=40 name=input
 73 |   fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
 74 | 
 75 |   relu-batchnorm-layer name=tdnn0 dim=1280
 76 |   relu-batchnorm-layer name=tdnn1 dim=1280 input=Append(-1,2)
 77 |   relu-batchnorm-layer name=tdnn2 dim=1280 input=Append(-3,3)
 78 |   relu-batchnorm-layer name=tdnn3 dim=1280 input=Append(-7,2)
 79 |   relu-batchnorm-layer name=tdnn4 dim=1280
 80 |   output-layer name=output input=tdnn4 dim=$num_targets max-change=1.5
 81 | EOF
 82 |   steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
 83 |     --config-dir $dir/configs || exit 1;
 84 | fi
 85 | 
 86 | if [ $stage -le 12 ]; then
 87 |   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
 88 |     utils/create_split_dir.pl \
 89 |      /export/b0{3,4,5,6}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
 90 |   fi
 91 | 
 92 |   steps/nnet3/train_dnn.py --stage=$train_stage \
 93 |     --cmd="$decode_cmd" \
 94 |     --feat.online-ivector-dir $train_ivector_dir \
 95 |     --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
 96 |     --trainer.num-epochs 4 \
 97 |     --trainer.optimization.num-jobs-initial 3 \
 98 |     --trainer.optimization.num-jobs-final 16 \
 99 |     --trainer.optimization.initial-effective-lrate 0.0017 \
100 |     --trainer.optimization.final-effective-lrate 0.00017 \
101 |     --egs.dir "$common_egs_dir" \
102 |     --cleanup.remove-egs $remove_egs \
103 |     --cleanup.preserve-model-interval 100 \
104 |     --feat-dir=$train_data_dir \
105 |     --ali-dir $ali_dir \
106 |     --lang data/lang \
107 |     --reporting.email="$reporting_email" \
108 |     --dir=$dir  || exit 1;
109 | 
110 | fi
111 | 
112 | if [ $stage -le 13 ]; then
113 |   # this does offline decoding that should give about the same results as the
114 |   # real online decoding (the one with --per-utt true)
115 |   rm $dir/.error 2>/dev/null || true
116 |   for test in test_clean test_other dev_clean dev_other; do
117 |     (
118 |     steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
119 |       --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${test}_hires \
120 |       ${graph_dir} data/${test}_hires $dir/decode_${test}_tgsmall || exit 1
121 |     steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
122 |       data/${test}_hires $dir/decode_${test}_{tgsmall,tgmed}  || exit 1
123 |     steps/lmrescore_const_arpa.sh \
124 |       --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
125 |       data/${test}_hires $dir/decode_${test}_{tgsmall,tglarge} || exit 1
126 |     steps/lmrescore_const_arpa.sh \
127 |       --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
128 |       data/${test}_hires $dir/decode_${test}_{tgsmall,fglarge} || exit 1
129 |     ) || touch $dir/.error &
130 |   done
131 |   wait
132 |   [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
133 | fi
134 | 
135 | exit 0;
136 | 


--------------------------------------------------------------------------------
/baseline/local/similarity_matrices/compute_similarity_matrices_metrics.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash 
 2 | 
 3 | . ./cmd.sh
 4 | . ./path.sh
 5 | 
 6 | set -e
 7 | 
 8 | #===== begin config =======
 9 | 
10 | set_test=libri_test_trials_f 
11 | results=
12 | 
13 | asv_eval_model=exp/models/asv_eval/xvect_01709_1
14 | plda_dir=$asv_eval_model/xvect_train_clean_360
15 | 
16 | #=========== end config ===========
17 | 
18 | . utils/parse_options.sh
19 | 
20 | anon_data_suffix=_anon
21 | osp_set_folder=$asv_eval_model/xvect_$set_test
22 | psp_set_folder=${osp_set_folder}$anon_data_suffix
23 | utt2spk=data/$set_test/utt2spk
24 | 
25 | printf "asv_eval_model = $asv_eval_model\n"
26 | printf "set_test = $set_test\n"
27 | printf "plda_dir = $plda_dir\n"
28 | printf "results = $results\n"
29 | 
30 | exp_files_dir=$results/similarity_matrices_DeID_Gvd/$set_test/exp_files
31 | 
32 | if [ ! -d "$exp_files_dir" ]; then
33 | 	mkdir -p $exp_files_dir
34 | fi
35 | 
36 | cat $osp_set_folder/xvector.scp | cut -d' ' -f1 > $exp_files_dir/segments_osp_set.scp 
37 | cat $psp_set_folder/xvector.scp | cut -d' ' -f1 > $exp_files_dir/segments_psp_set.scp
38 | 
39 | python3 local/similarity_matrices/create_trial.py $exp_files_dir/segments_osp_set.scp $exp_files_dir/segments_osp_set.scp osp_osp $exp_files_dir/ $utt2spk 
40 | python3 local/similarity_matrices/create_trial.py $exp_files_dir/segments_osp_set.scp $exp_files_dir/segments_psp_set.scp osp_psp $exp_files_dir/ $utt2spk 
41 | python3 local/similarity_matrices/create_trial.py $exp_files_dir/segments_psp_set.scp $exp_files_dir/segments_psp_set.scp psp_psp $exp_files_dir/ $utt2spk 
42 | wait
43 | 
44 | #Compute scores Osp-Osp
45 | $train_cmd $exp_files_dir/scores/log/test_scoring.log \
46 |   ivector-plda-scoring --normalize-length=true \
47 |   "ivector-copy-plda --smoothing=0.0 $plda_dir/plda - |" \
48 |   "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:$osp_set_folder/xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
49 |   "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:$osp_set_folder/xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
50 |   "cat $exp_files_dir/segments_osp_osp_trial.txt | cut -d\  --fields=1,2 |" $exp_files_dir/scores_output_osp_osp || exit 1;
51 | 
52 | 
53 | #Compute scores Osp-Psp
54 | $train_cmd $exp_files_dir/scores/log/test_scoring.log \
55 |   ivector-plda-scoring --normalize-length=true \
56 |   "ivector-copy-plda --smoothing=0.0 $plda_dir/plda - |" \
57 |   "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:$osp_set_folder/xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
58 |   "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:$psp_set_folder/xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
59 |   "cat $exp_files_dir/segments_osp_psp_trial.txt | cut -d\  --fields=1,2 |" $exp_files_dir/scores_output_osp_psp || exit 1;
60 | 
61 | 
62 | #Compute scores Psp-Psp
63 | $train_cmd $exp_files_dir/scores/log/test_scoring.log \
64 |   ivector-plda-scoring --normalize-length=true \
65 |   "ivector-copy-plda --smoothing=0.0 $plda_dir/plda - |" \
66 |   "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:$psp_set_folder/xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
67 |   "ark:ivector-subtract-global-mean $plda_dir/mean.vec scp:$psp_set_folder/xvector.scp ark:- | transform-vec $plda_dir/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
68 |   "cat $exp_files_dir/segments_psp_psp_trial.txt | cut -d\  --fields=1,2 |" $exp_files_dir/scores_output_psp_psp || exit 1;
69 | 
70 | 
71 | python3 local/similarity_matrices/scores_calibration.py $exp_files_dir/scores_output_osp_osp $exp_files_dir/spk_osp_osp_trial.txt 
72 | python3 local/similarity_matrices/scores_calibration.py $exp_files_dir/scores_output_osp_psp $exp_files_dir/spk_osp_psp_trial.txt 
73 | python3 local/similarity_matrices/scores_calibration.py $exp_files_dir/scores_output_psp_psp $exp_files_dir/spk_psp_psp_trial.txt 
74 | wait
75 | 
76 | python3 local/similarity_matrices/compute_similarity_matrix.py $exp_files_dir/scores_output_osp_osp.calibrated $exp_files_dir/spk_osp_osp_trial.txt.calibrated $results/similarity_matrices_DeID_Gvd/$set_test osp_osp 
77 | python3 local/similarity_matrices/compute_similarity_matrix.py $exp_files_dir/scores_output_osp_psp.calibrated $exp_files_dir/spk_osp_psp_trial.txt.calibrated $results/similarity_matrices_DeID_Gvd/${set_test} osp_psp 
78 | python3 local/similarity_matrices/compute_similarity_matrix.py $exp_files_dir/scores_output_psp_psp.calibrated $exp_files_dir/spk_psp_psp_trial.txt.calibrated $results/similarity_matrices_DeID_Gvd/${set_test} psp_psp 
79 | wait
80 | 
81 | DeID=$(python3 local/similarity_matrices/compute_DeID.py $results/similarity_matrices_DeID_Gvd/${set_test}/similarity_matrix_osp_osp.npy $results/similarity_matrices_DeID_Gvd/${set_test}/similarity_matrix_osp_psp.npy)
82 | Gvd=$(python3 local/similarity_matrices/compute_Gvd.py $results/similarity_matrices_DeID_Gvd/${set_test}/similarity_matrix_osp_osp.npy $results/similarity_matrices_DeID_Gvd/${set_test}/similarity_matrix_psp_psp.npy)
83 | 
84 | echo "Set : $set_test"
85 | echo "  De-Identification : $DeID"
86 | echo "  Gain of voice distinctiveness : $Gvd"
87 | 
88 | echo "De-Identification : $DeID" > $results/similarity_matrices_DeID_Gvd/$set_test/DeIDentification
89 | echo "Gain of voice distinctiveness : $Gvd" > $results/similarity_matrices_DeID_Gvd/$set_test/gain_of_voice_distinctiveness
90 | 
91 | 


--------------------------------------------------------------------------------
/baseline/local/split_long_utterance.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Copyright 2014  Guoguo Chen
  4 | # Apache 2.0
  5 | 
  6 | # Begin configuration section.
  7 | seg_length=30
  8 | min_seg_length=10
  9 | overlap_length=5
 10 | # End configuration section.
 11 | 
 12 | echo "$0 $@"
 13 | 
 14 | [ -f ./path.sh ] && . ./path.sh
 15 | . parse_options.sh || exit 1;
 16 | 
 17 | if [ $# -ne 2 ]; then
 18 |   echo "This script truncates the long audio into smaller overlapping segments"
 19 |   echo ""
 20 |   echo "Usage: $0 [options] <input-dir> <output-dir>"
 21 |   echo " e.g.: $0 data/train_si284_long data/train_si284_split"
 22 |   echo ""
 23 |   echo "Options:"
 24 |   echo "    --min-seg-length        # minimal segment length"
 25 |   echo "    --seg-length            # length of segments in seconds."
 26 |   echo "    --overlap-length        # length of overlap in seconds."
 27 |   exit 1;
 28 | fi
 29 | 
 30 | input_dir=$1
 31 | output_dir=$2
 32 | 
 33 | for f in spk2utt utt2spk wav.scp; do
 34 |   [ ! -f $input_dir/$f ] && echo "$0: no such file $input_dir/$f" && exit 1;
 35 | done
 36 | 
 37 | [ ! $seg_length -gt $overlap_length ] \
 38 |   && echo "$0: --seg-length should be longer than --overlap-length." && exit 1;
 39 | 
 40 | # Checks if sox is on the path.
 41 | sox=`which sox`
 42 | [ $? -ne 0 ] && echo "$0: sox command not found." && exit 1;
 43 | sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
 44 | [ ! -x $sph2pipe ] && echo "$0: sph2pipe command not found." && exit 1;
 45 | 
 46 | mkdir -p $output_dir
 47 | cp -f $input_dir/spk2gender $output_dir/spk2gender 2>/dev/null
 48 | cp -f $input_dir/wav.scp $output_dir/wav.scp
 49 | 
 50 | # We assume the audio length in header is correct and get it from there. It is
 51 | # a little bit annoying that old version of sox does not support the following:
 52 | #   $audio_cmd | sox --i -D
 53 | # we have to put it in the following format for the old versions:
 54 | #   $sox --i -D "|$audio_cmd"
 55 | # Another way is to count all the samples to get the duration, but it takes
 56 | # longer time, so we do not use it here.. The command is:
 57 | #   $audio_cmd | sox -t wav - -n stat | grep -P "^Length" | awk '{print $1;}'
 58 | #
 59 | # Note: in the wsj example the process takes couple of minutes because of the
 60 | #       audio file concatenation; in a real case it should be much faster since
 61 | #       it just reads the header.
 62 | cat $output_dir/wav.scp | perl -e '
 63 |   $no_orig_seg = "false";       # Original segment file may or may not exist.
 64 |   ($u2s_in, $u2s_out, $seg_in,
 65 |    $seg_out, $orig2utt, $sox, $slen, $mslen, $olen) = @ARGV;
 66 |   open(UI, "<$u2s_in") || die "Error: fail to open $u2s_in\n";
 67 |   open(UO, ">$u2s_out") || die "Error: fail to open $u2s_out\n";
 68 |   open(SI, "<$seg_in") || ($no_orig_seg = "true");
 69 |   open(SO, ">$seg_out") || die "Error: fail to open $seg_out\n";
 70 |   open(UMAP, ">$orig2utt") || die "Error: fail to open $orig2utt\n";
 71 |   # If the original segment file exists, we have to work out the segment
 72 |   # duration from the segment file. Otherwise we work that out from the wav.scp
 73 |   # file.
 74 |   if ($no_orig_seg eq "false") {
 75 |     while (<SI>) {
 76 |       chomp;
 77 |       @col = split;
 78 |       @col == 4 || die "Error: bad line $_\n";
 79 |       ($seg_id, $wav_id, $seg_start, $seg_end) = @col;
 80 |       $seg2wav{$seg_id} = $wav_id;
 81 |       $seg_start{$seg_id} = $seg_start;
 82 |       $seg_end{$seg_id} = $seg_end;
 83 |     }
 84 |   } else {
 85 |     while (<STDIN>) {
 86 |       chomp;
 87 |       @col = split;
 88 |       @col >= 2 || "bad line $_\n";
 89 |       if ((@col > 2) &&  ($col[-1] eq "|")) {
 90 |         $wav_id = shift @col; pop @col;
 91 |         $audio_cmd = join(" ", @col);
 92 |         $duration = `$sox --i -D '\''|$audio_cmd'\''`;
 93 |       } else {
 94 |         @col == 2 || die "Error: bad line $_\n in wav.scp";
 95 |         $wav_id = $col[0];
 96 |         $audio_file = $col[1];
 97 |         $duration = `$sox --i -D $audio_file`;
 98 |       }
 99 |       chomp($duration);
100 |       $seg2wav{$wav_id} = $wav_id;
101 |       $seg_start{$wav_id} = 0;
102 |       $seg_end{$wav_id} = $duration;
103 |     }
104 |   }
105 |   while (<UI>) {
106 |     chomp;
107 |     @col = split;
108 |     @col == 2 || die "Error: bad line $_\n";
109 |     $utt2spk{$col[0]} = $col[1];
110 |   }
111 |   foreach $seg (sort keys %seg2wav) {
112 |     $index = 0;
113 |     $step = $slen - $olen;
114 |     print UMAP "$seg";
115 |     while ($seg_start{$seg} + $index * $step < $seg_end{$seg}) {
116 |       $new_seg = $seg . "_" . sprintf("%05d", $index);
117 |       $start = $seg_start{$seg} + $index * $step;
118 |       $end = $start + $slen;
119 |       defined($utt2spk{$seg}) || die "Error: speaker not found for $seg\n";
120 |       print UO "$new_seg $utt2spk{$seg}\n";
121 |       print UMAP " $new_seg"; 
122 |       $index += 1;
123 |       if ($end - $olen + $mslen >= $seg_end{$seg}) {
124 |         # last segment will have at least $mslen seconds.
125 |         $end = $seg_end{$seg};
126 |         print SO "$new_seg $seg2wav{$seg} $start $end\n";
127 |         last;
128 |       } else {
129 |         print SO "$new_seg $seg2wav{$seg} $start $end\n";
130 |       }
131 |     }
132 |     print UMAP "\n";
133 |   }' $input_dir/utt2spk $output_dir/utt2spk \
134 |     $input_dir/segments $output_dir/segments $output_dir/orig2utt \
135 |     $sox $seg_length $min_seg_length $overlap_length
136 | 
137 | # CAVEAT: We are not dealing with channels here. Each channel should have a
138 | # unique file name in wav.scp.
139 | paste -d ' ' <(cut -d ' ' -f 1 $output_dir/wav.scp) \
140 |   <(cut -d ' ' -f 1 $output_dir/wav.scp) | awk '{print $1" "$2" A";}' \
141 |   > $output_dir/reco2file_and_channel
142 | 
143 | utils/fix_data_dir.sh $output_dir
144 | 
145 | exit 0;
146 | 


--------------------------------------------------------------------------------
/baseline/local/anon/gen_pseudo_xvecs.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from os.path import basename, join
  3 | import operator
  4 | 
  5 | import numpy as np
  6 | import random
  7 | from kaldiio import WriteHelper, ReadHelper
  8 | 
  9 | args = sys.argv
 10 | print(args)
 11 | 
 12 | src_data = args[1]
 13 | pool_data = args[2]
 14 | affinity_scores_dir = args[3]
 15 | xvec_out_dir = args[4]
 16 | pseudo_xvecs_dir = args[5]
 17 | rand_level = args[6]
 18 | cross_gender = args[7] == "true"
 19 | proximity = args[8]
 20 | 
 21 | rand_seed = args[9]
 22 | 
 23 | REGION = 100
 24 | WORLD = 200
 25 | 
 26 | random.seed(rand_seed)
 27 | 
 28 | if cross_gender:
 29 |     print("**Opposite gender speakers will be selected.**")
 30 | else:
 31 |     print("**Same gender speakers will be selected.**")
 32 | 
 33 | print("Randomization level: " + rand_level)
 34 | print("Proximity: " + proximity)
 35 | # Core logic of anonymization by randomization
 36 | def select_random_xvec(top500, pool_xvectors):
 37 |     # number of random xvectors to select out of pool
 38 |     #random100mask = np.random.random_integers(0, 199, NR)
 39 |     random100mask = random.sample(range(WORLD), REGION)
 40 |     pseudo_spk_list = [x for i, x in enumerate(top500) if i in
 41 |                            random100mask]
 42 |     pseudo_spk_matrix = np.zeros((REGION, 512), dtype='float64')
 43 |     for i, spk_aff in enumerate(pseudo_spk_list):
 44 |         pseudo_spk_matrix[i, :] = pool_xvectors[spk_aff[0]]
 45 |     # Take mean of 100 randomly selected xvectors
 46 |     pseudo_xvec = np.mean(pseudo_spk_matrix, axis=0)
 47 |     return pseudo_xvec
 48 | 
 49 | 
 50 | gender_rev = {'m': 'f', 'f': 'm'}
 51 | src_spk2gender_file = join(src_data, 'spk2gender')
 52 | src_spk2utt_file = join(src_data, 'spk2utt')
 53 | pool_spk2gender_file = join(pool_data, 'spk2gender')
 54 | 
 55 | src_spk2gender = {}
 56 | src_spk2utt = {}
 57 | pool_spk2gender = {}
 58 | # Read source spk2gender and spk2utt
 59 | print("Reading source spk2gender.")
 60 | with open(src_spk2gender_file) as f:
 61 |     for line in f.read().splitlines():
 62 |         sp = line.split()
 63 |         src_spk2gender[sp[0]] = sp[1]
 64 | print("Reading source spk2utt.")
 65 | with open(src_spk2utt_file) as f:
 66 |     for line in f.read().splitlines():
 67 |         sp = line.split()
 68 |         src_spk2utt[sp[0]] = sp[1:]
 69 | # Read pool spk2gender
 70 | print("Reading pool spk2gender.")
 71 | with open(pool_spk2gender_file) as f:
 72 |     for line in f.read().splitlines():
 73 |         sp = line.split()
 74 |         pool_spk2gender[sp[0]] = sp[1]
 75 | 
 76 | # Read pool xvectors
 77 | print("Reading pool xvectors.")
 78 | pool_xvec_file = join(xvec_out_dir, 'xvectors_'+basename(pool_data),
 79 |                      'spk_xvector.scp')
 80 | pool_xvectors = {}
 81 | c = 0
 82 | #with open(pool_xvec_file) as f:
 83 |  #   for key, xvec in kaldi_io.read_vec_flt_scp(f):
 84 | with ReadHelper('scp:'+pool_xvec_file) as reader:
 85 |     for key, xvec in reader:
 86 |         #print key, mat.shape
 87 |         pool_xvectors[key] = xvec
 88 |         c += 1
 89 | print("Read ", c, "pool xvectors")
 90 | 
 91 | pseudo_xvec_map = {}
 92 | pseudo_gender_map = {}
 93 | for spk, gender in src_spk2gender.items():
 94 |     # Filter the affinity pool by gender
 95 |     affinity_pool = {}
 96 |     # If we are doing cross-gender VC, reverse the gender else gender remains same
 97 |     if cross_gender:
 98 |         gender = gender_rev[gender]
 99 |     #print("Filtering pool for spk: "+spk)
100 |     pseudo_gender_map[spk] = gender
101 |     with open(join(affinity_scores_dir, 'affinity_'+spk)) as f:
102 |         for line in f.read().splitlines():
103 |             sp = line.split()
104 |             pool_spk = sp[1]
105 |             af_score = float(sp[2])
106 |             if pool_spk2gender[pool_spk] == gender:
107 |                 affinity_pool[pool_spk] = af_score
108 | 
109 |     # Sort the filtered affinity pool by scores
110 |     if proximity == "farthest":
111 |         sorted_aff = sorted(affinity_pool.items(), key=operator.itemgetter(1))
112 |     elif proximity == "nearest":
113 |         sorted_aff = sorted(affinity_pool.items(), key=operator.itemgetter(1),
114 |                            reverse=True)
115 | 
116 | 
117 |     # Select WORLD least affinity speakers and then randomly select REGION out of
118 |     # them
119 |     top_spk = sorted_aff[:WORLD]
120 |     if rand_level == 'spk':
121 |         # For rand_level = spk, one xvector is assigned to all the utterances
122 |         # of a speaker
123 |         pseudo_xvec = select_random_xvec(top_spk, pool_xvectors)
124 |         # Assign it to all utterances of the current speaker
125 |         for uttid in src_spk2utt[spk]:
126 |             pseudo_xvec_map[uttid] = pseudo_xvec
127 |     elif rand_level == 'utt':
128 |         # For rand_level = utt, random xvector is assigned to all the utterances
129 |         # of a speaker
130 |         for uttid in src_spk2utt[spk]:
131 |             # Compute random vector for every utt
132 |             pseudo_xvec = select_random_xvec(top_spk, pool_xvectors)
133 |             # Assign it to all utterances of the current speaker
134 |             pseudo_xvec_map[uttid] = pseudo_xvec
135 |     else:
136 |         print("rand_level not supported! Errors will happen!")
137 | 
138 | 
139 | # Write features as ark,scp
140 | print("Writing pseud-speaker xvectors to: "+pseudo_xvecs_dir)
141 | ark_scp_output = 'ark,scp:{}/{}.ark,{}/{}.scp'.format(
142 |                     pseudo_xvecs_dir, 'pseudo_xvector',
143 |                     pseudo_xvecs_dir, 'pseudo_xvector')
144 | with WriteHelper(ark_scp_output) as writer:
145 |       for uttid, xvec in pseudo_xvec_map.items():
146 |           writer(uttid, xvec)
147 | 
148 | print("Writing pseudo-speaker spk2gender.")
149 | with open(join(pseudo_xvecs_dir, 'spk2gender'), 'w') as f:
150 |     spk2gen_arr = [spk+' '+gender for spk, gender in pseudo_gender_map.items()]
151 |     sorted_spk2gen = sorted(spk2gen_arr)
152 |     f.write('\n'.join(sorted_spk2gen) + '\n')
153 | 
154 | 
155 | 


--------------------------------------------------------------------------------
/baseline/run_xvector.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright      2017   David Snyder
  3 | #                2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
  4 | #                2017   Johns Hopkins University (Author: Daniel Povey)
  5 | #
  6 | # Copied from egs/sre16/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh (commit e082c17d4a8f8a791428ae4d9f7ceb776aef3f0b).
  7 | #
  8 | # Apache 2.0.
  9 | 
 10 | # Adapted from the follwing:
 11 | # script trains a DNN similar to the recipe described in http://www.danielpovey.com/files/2018_icassp_xvectors.pdf
 12 | 
 13 | . ./cmd.sh
 14 | . ./path.sh
 15 | 
 16 | set -e
 17 | 
 18 | stage=1
 19 | train_stage=-1
 20 | lrate=001
 21 | epochs=1
 22 | shrink=10
 23 | 
 24 | data=data/train_clean_360
 25 | nnet_dir=exp/xvect
 26 | egs_dir=$nnet_dir/egs
 27 | 
 28 | . ./utils/parse_options.sh
 29 | 
 30 | num_pdfs=$(awk '{print $2}' $data/utt2spk | sort | uniq -c | wc -l)
 31 | 
 32 | # Now we create the nnet examples using sid/nnet3/xvector/get_egs.sh.
 33 | # The argument --num-repeats is related to the number of times a speaker
 34 | # repeats per archive.  If it seems like you're getting too many archives
 35 | # (e.g., more than 200) try increasing the --frames-per-iter option.  The
 36 | # arguments --min-frames-per-chunk and --max-frames-per-chunk specify the
 37 | # minimum and maximum length (in terms of number of frames) of the features
 38 | # in the examples.
 39 | #
 40 | # To make sense of the egs script, it may be necessary to put an "exit 1"
 41 | # command immediately after stage 3.  Then, inspect
 42 | # exp/<your-dir>/egs/temp/ranges.* . The ranges files specify the examples that
 43 | # will be created, and which archives they will be stored in.  Each line of
 44 | # ranges.* has the following form:
 45 | #    <utt-id> <local-ark-indx> <global-ark-indx> <start-frame> <end-frame> <spk-id>
 46 | # For example:
 47 | #    100304-f-sre2006-kacg-A 1 2 4079 881 23
 48 | 
 49 | # If you're satisfied with the number of archives (e.g., 50-150 archives is
 50 | # reasonable) and with the number of examples per speaker (e.g., 1000-5000
 51 | # is reasonable) then you can let the script continue to the later stages.
 52 | # Otherwise, try increasing or decreasing the --num-repeats option.  You might
 53 | # need to fiddle with --frames-per-iter.  Increasing this value decreases the
 54 | # the number of archives and increases the number of examples per archive.
 55 | # Decreasing this value increases the number of archives, while decreasing the
 56 | # number of examples per archive.
 57 | if [ $stage -le 6 ]; then
 58 |   echo "$0: Getting neural network training egs";
 59 |   sid/nnet3/xvector/get_egs.sh --cmd "$train_cmd" \
 60 |     --nj 8 \
 61 |     --stage 0 \
 62 |     --frames-per-iter 100000000 \
 63 |     --frames-per-iter-diagnostic 100000 \
 64 |     --min-frames-per-chunk 200 \
 65 |     --max-frames-per-chunk 400 \
 66 |     --num-diagnostic-archives 3 \
 67 |     --num-repeats 50 \
 68 |     "$data" $egs_dir || exit 1
 69 | fi
 70 | 
 71 | if [ $stage -le 7 ]; then
 72 |   echo "$0: creating neural net configs using the xconfig parser";
 73 |   num_targets=$(wc -w $egs_dir/pdf2num | awk '{print $1}')
 74 |   feat_dim=$(cat $egs_dir/info/feat_dim)
 75 | 
 76 |   # This chunk-size corresponds to the maximum number of frames the
 77 |   # stats layer is able to pool over.  In this script, it corresponds
 78 |   # to 100 seconds.  If the input recording is greater than 100 seconds,
 79 |   # we will compute multiple xvectors from the same recording and average
 80 |   # to produce the final xvector.
 81 |   max_chunk_size=10000
 82 | 
 83 |   # The smallest number of frames we're comfortable computing an xvector from.
 84 |   # Note that the hard minimum is given by the left and right context of the
 85 |   # frame-level layers.
 86 |   min_chunk_size=25
 87 |   mkdir -p $nnet_dir/configs
 88 |   cat <<EOF > $nnet_dir/configs/network.xconfig
 89 |   # please note that it is important to have input layer with the name=input
 90 | 
 91 |   # The frame-level layers
 92 |   input dim=${feat_dim} name=input
 93 |   relu-batchnorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=512
 94 |   relu-batchnorm-layer name=tdnn2 input=Append(-2,0,2) dim=512
 95 |   relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=512
 96 |   relu-batchnorm-layer name=tdnn4 dim=512
 97 |   relu-batchnorm-layer name=tdnn5 dim=1500
 98 | 
 99 |   # The stats pooling layer. Layers after this are segment-level.
100 |   # In the config below, the first and last argument (0, and ${max_chunk_size})
101 |   # means that we pool over an input segment starting at frame 0
102 |   # and ending at frame ${max_chunk_size} or earlier.  The other arguments (1:1)
103 |   # mean that no subsampling is performed.
104 |   stats-layer name=stats config=mean+stddev(0:1:1:${max_chunk_size})
105 | 
106 |   # This is where we usually extract the embedding (aka xvector) from.
107 |   relu-batchnorm-layer name=tdnn6 dim=512 input=stats
108 | 
109 |   # This is where another layer the embedding could be extracted
110 |   # from, but usually the previous one works better.
111 |   relu-batchnorm-layer name=tdnn7 dim=512
112 |   output-layer name=output include-log-softmax=true dim=${num_targets}
113 | EOF
114 | 
115 |   steps/nnet3/xconfig_to_configs.py \
116 |       --xconfig-file $nnet_dir/configs/network.xconfig \
117 |       --config-dir $nnet_dir/configs
118 |   cp $nnet_dir/configs/final.config $nnet_dir/nnet.config
119 | 
120 |   # These three files will be used by sid/nnet3/xvector/extract_xvectors.sh
121 |   echo "output-node name=output input=tdnn6.affine" > $nnet_dir/extract.config
122 |   echo "$max_chunk_size" > $nnet_dir/max_chunk_size
123 |   echo "$min_chunk_size" > $nnet_dir/min_chunk_size
124 | fi
125 | 
126 | dropout_schedule='0,0@0.20,0.1@0.50,0'
127 | srand=123
128 | if [ $stage -le 8 ]; then
129 |   steps/nnet3/train_raw_dnn.py --stage=$train_stage \
130 |     --cmd="$train_cmd" \
131 |     --trainer.optimization.proportional-shrink $shrink \
132 |     --trainer.optimization.momentum=0.5 \
133 |     --trainer.optimization.num-jobs-initial=2 \
134 |     --trainer.optimization.num-jobs-final=2 \
135 |     --trainer.optimization.initial-effective-lrate=0.$lrate \
136 |     --trainer.optimization.final-effective-lrate=0.0$lrate \
137 |     --trainer.optimization.minibatch-size=64 \
138 |     --trainer.srand=$srand \
139 |     --trainer.max-param-change=2 \
140 |     --trainer.num-epochs=$epochs \
141 |     --trainer.dropout-schedule="$dropout_schedule" \
142 |     --trainer.shuffle-buffer-size=1000 \
143 |     --egs.frames-per-eg=1 \
144 |     --egs.dir="$egs_dir" \
145 |     --cleanup.remove-egs false \
146 |     --cleanup.preserve-model-interval=5 \
147 |     --use-gpu=true \
148 |     --dir=$nnet_dir  || exit 1
149 | fi
150 | 
151 | exit 0
152 | 


--------------------------------------------------------------------------------
/baseline/run_asv_eval_train.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | . ./cmd.sh
  4 | . ./path.sh
  5 | 
  6 | set -e
  7 | 
  8 | #ASV_eval training on LibriSpeech train_clean_360 corpus
  9 | 
 10 | 
 11 | nj=20
 12 | voxceleb1_trials=data/voxceleb1_test/trials
 13 | 
 14 | lrate=01709  
 15 | epochs=1
 16 | shrink=10
 17 | egs_dir=exp/xvect_egs
 18 | 
 19 | stage=0
 20 | train_stage=-1
 21 | 
 22 | . ./utils/parse_options.sh
 23 | 
 24 | nnet_dir=exp/xvect_${lrate}_${epochs}
 25 | 
 26 | if [ $stage -le 0 ]; then
 27 |   for name in voxceleb1_test train_clean_360; do
 28 |     steps/make_mfcc.sh \
 29 |       --write-utt2num-frames true \
 30 |       --mfcc-config conf/mfcc.conf \
 31 |       --nj $nj --cmd "$train_cmd" \
 32 |       data/$name || exit 1
 33 |     utils/fix_data_dir.sh data/$name || exit 1
 34 |     sid/compute_vad_decision.sh \
 35 |       --nj $nj --cmd "$train_cmd" \
 36 |       --vad-config conf/vad.conf \
 37 |       data/$name || exit 1
 38 |     utils/fix_data_dir.sh data/$name || exit 1
 39 |   done
 40 | fi
 41 | 
 42 | # Now we prepare the features to generate examples for xvector training.
 43 | if [ $stage -le 1 ]; then
 44 |   # This script applies CMVN and removes nonspeech frames.  Note that this is somewhat
 45 |   # wasteful, as it roughly doubles the amount of training data on disk.  After
 46 |   # creating training examples, this can be removed.
 47 |   local/nnet3/xvector/prepare_feats_for_egs.sh \
 48 |     --nj $nj --cmd "$train_cmd" \
 49 |     data/train_clean_360 data/train_clean_360_no_sil \
 50 |     exp/train_clean_360_no_sil || exit 1
 51 |   utils/fix_data_dir.sh data/train_clean_360_no_sil || exit 1
 52 | fi
 53 | 
 54 | if [ $stage -le 2 ]; then
 55 |   # Now, we need to remove features that are too short after removing silence
 56 |   # frames.  We want atleast 5s (500 frames) per utterance.
 57 |   min_len=400
 58 |   mv data/train_clean_360_no_sil/utt2num_frames data/train_clean_360_no_sil/utt2num_frames.bak
 59 |   awk -v min_len=${min_len} '$2 > min_len {print $1, $2}' data/train_clean_360_no_sil/utt2num_frames.bak > data/train_clean_360_no_sil/utt2num_frames
 60 |   utils/filter_scp.pl data/train_clean_360_no_sil/utt2num_frames data/train_clean_360_no_sil/utt2spk > data/train_clean_360_no_sil/utt2spk.new
 61 |   mv data/train_clean_360_no_sil/utt2spk.new data/train_clean_360_no_sil/utt2spk
 62 |   utils/fix_data_dir.sh data/train_clean_360_no_sil || exit 1
 63 | 
 64 |   # We also want several utterances per speaker. Now we'll throw out speakers
 65 |   # with fewer than 8 utterances.
 66 |   min_num_utts=8
 67 |   awk '{print $1, NF-1}' data/train_clean_360_no_sil/spk2utt > data/train_clean_360_no_sil/spk2num
 68 |   awk -v min_num_utts=${min_num_utts} '$2 >= min_num_utts {print $1, $2}' data/train_clean_360_no_sil/spk2num | utils/filter_scp.pl - data/train_clean_360_no_sil/spk2utt > data/train_clean_360_no_sil/spk2utt.new
 69 |   mv data/train_clean_360_no_sil/spk2utt.new data/train_clean_360_no_sil/spk2utt
 70 |   utils/spk2utt_to_utt2spk.pl data/train_clean_360_no_sil/spk2utt > data/train_clean_360_no_sil/utt2spk
 71 | 
 72 |   utils/filter_scp.pl data/train_clean_360_no_sil/utt2spk data/train_clean_360_no_sil/utt2num_frames > data/train_clean_360_no_sil/utt2num_frames.new
 73 |   mv data/train_clean_360_no_sil/utt2num_frames.new data/train_clean_360_no_sil/utt2num_frames
 74 | 
 75 |   # Now we're ready to create training examples.
 76 |   utils/fix_data_dir.sh data/train_clean_360_no_sil || exit 1
 77 | fi
 78 | 
 79 | # Stages 6 through 8 are handled in run_xvector.sh
 80 | if [ $stage -le 8 ]; then
 81 |   ./run_xvector.sh \
 82 |     --stage $stage --train-stage $train_stage \
 83 |     --data data/train_clean_360_no_sil --nnet-dir $nnet_dir \
 84 |     --epochs $epochs --shrink $shrink --lrate $lrate --egs-dir $egs_dir || exit 1
 85 | fi
 86 | 
 87 | if [ $stage -le 9 ]; then
 88 |   # Extract x-vectors for centering, LDA, and PLDA training.
 89 |   sid/nnet3/xvector/extract_xvectors.sh \
 90 |     --cmd "$train_cmd --mem 4G" --nj $nj \
 91 |     $nnet_dir data/train_clean_360 \
 92 |     $nnet_dir/xvect_train_clean_360 || exit 1
 93 |   # Extract x-vectors used in the evaluation.
 94 |   sid/nnet3/xvector/extract_xvectors.sh \
 95 |     --cmd "$train_cmd --mem 4G" --nj $nj \
 96 |     $nnet_dir data/voxceleb1_test \
 97 |     $nnet_dir/xvect_voxceleb1_test || exit 1
 98 | fi
 99 | 
100 | if [ $stage -le 10 ]; then
101 |   # Compute the mean vector for centering the evaluation xvectors.
102 |   $train_cmd $nnet_dir/xvect_train_clean_360/log/compute_mean.log \
103 |     ivector-mean scp:$nnet_dir/xvect_train_clean_360/xvector.scp \
104 |     $nnet_dir/xvect_train_clean_360/mean.vec || exit 1
105 | 
106 |   # This script uses LDA to decrease the dimensionality prior to PLDA.
107 |   lda_dim=200
108 |   $train_cmd $nnet_dir/xvect_train_clean_360/log/lda.log \
109 |     ivector-compute-lda --total-covariance-factor=0.0 --dim=$lda_dim \
110 |     "ark:ivector-subtract-global-mean scp:$nnet_dir/xvect_train_clean_360/xvector.scp ark:- |" \
111 |     ark:data/train_clean_360/utt2spk $nnet_dir/xvect_train_clean_360/transform.mat || exit 1
112 | 
113 |   # Train the PLDA model.
114 |   $train_cmd $nnet_dir/xvect_train_clean_360/log/plda.log \
115 |     ivector-compute-plda ark:data/train_clean_360/spk2utt \
116 |     "ark:ivector-subtract-global-mean scp:$nnet_dir/xvect_train_clean_360/xvector.scp ark:- | transform-vec $nnet_dir/xvect_train_clean_360/transform.mat ark:- ark:- | ivector-normalize-length ark:-  ark:- |" \
117 |     $nnet_dir/xvect_train_clean_360/plda || exit 1
118 | fi
119 | 
120 | if [ $stage -le 11 ]; then
121 |   $train_cmd $nnet_dir/scores/log/voxceleb1_test_scoring.log \
122 |     ivector-plda-scoring --normalize-length=true \
123 |     "ivector-copy-plda --smoothing=0.0 $nnet_dir/xvect_train_clean_360/plda - |" \
124 |     "ark:ivector-subtract-global-mean $nnet_dir/xvect_train_clean_360/mean.vec scp:$nnet_dir/xvect_voxceleb1_test/xvector.scp ark:- | transform-vec $nnet_dir/xvect_train_clean_360/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
125 |     "ark:ivector-subtract-global-mean $nnet_dir/xvect_train_clean_360/mean.vec scp:$nnet_dir/xvect_voxceleb1_test/xvector.scp ark:- | transform-vec $nnet_dir/xvect_train_clean_360/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
126 |     "cat '$voxceleb1_trials' | cut -d\  --fields=1,2 |" $nnet_dir/scores_voxceleb1_test || exit 1
127 | fi
128 | 
129 | if [ $stage -le 12 ]; then
130 |   eer=`compute-eer <(local/prepare_for_eer.py $voxceleb1_trials $nnet_dir/scores_voxceleb1_test) 2> /dev/null`
131 |   mindcf1=`sid/compute_min_dcf.py --p-target 0.01 $nnet_dir/scores_voxceleb1_test $voxceleb1_trials 2> /dev/null`
132 |   mindcf2=`sid/compute_min_dcf.py --p-target 0.001 $nnet_dir/scores_voxceleb1_test $voxceleb1_trials 2> /dev/null`
133 |   echo "EER: $eer%" | tee $nnet_dir/scores/voxceleb1_test.txt
134 |   echo "minDCF(p-target=0.01): $mindcf1" | tee -a $nnet_dir/scores/voxceleb1_test.txt
135 |   echo "minDCF(p-target=0.001): $mindcf2" | tee -a $nnet_dir/scores/voxceleb1_test.txt
136 | 
137 | fi
138 | 
139 | echo Done
140 | 


--------------------------------------------------------------------------------
/baseline/RESULTS_mcadams:
--------------------------------------------------------------------------------
  1 | ASV-libri_dev_enrolls-libri_dev_trials_f
  2 |   EER: 8.807%
  3 |   Cllr (min/act): 0.305/42.903
  4 |   ROCCH-EER: 8.686%
  5 | ASV-libri_dev_enrolls-libri_dev_trials_f_anon
  6 |   EER: 35.37%
  7 |   Cllr (min/act): 0.820/116.889
  8 |   ROCCH-EER: 34.640%
  9 | ASV-libri_dev_enrolls-libri_dev_trials_m
 10 |   EER: 1.242%
 11 |   Cllr (min/act): 0.035/14.294
 12 |   ROCCH-EER: 1.128%
 13 | ASV-libri_dev_enrolls-libri_dev_trials_m_anon
 14 |   EER: 17.86%
 15 |   Cllr (min/act): 0.526/105.727
 16 |   ROCCH-EER: 17.633%
 17 | ASV-libri_dev_enrolls_anon-libri_dev_trials_f_anon
 18 |   EER: 23.58%
 19 |   Cllr (min/act): 0.620/11.765
 20 |   ROCCH-EER: 23.081%
 21 | ASV-libri_dev_enrolls_anon-libri_dev_trials_m_anon
 22 |   EER: 10.56%
 23 |   Cllr (min/act): 0.359/11.959
 24 |   ROCCH-EER: 10.433%
 25 | ASV-libri_test_enrolls-libri_test_trials_f
 26 |   EER: 7.664%
 27 |   Cllr (min/act): 0.184/26.808
 28 |   ROCCH-EER: 7.179%
 29 | ASV-libri_test_enrolls-libri_test_trials_f_anon
 30 |   EER: 26.09%
 31 |   Cllr (min/act): 0.686/115.572
 32 |   ROCCH-EER: 25.575%
 33 | ASV-libri_test_enrolls-libri_test_trials_m
 34 |   EER: 1.114%
 35 |   Cllr (min/act): 0.041/15.342
 36 |   ROCCH-EER: 1.065%
 37 | ASV-libri_test_enrolls-libri_test_trials_m_anon
 38 |   EER: 17.82%
 39 |   Cllr (min/act): 0.498/106.444
 40 |   ROCCH-EER: 17.532%
 41 | ASV-libri_test_enrolls_anon-libri_test_trials_f_anon
 42 |   EER: 15.15%
 43 |   Cllr (min/act): 0.489/12.542
 44 |   ROCCH-EER: 15.002%
 45 | ASV-libri_test_enrolls_anon-libri_test_trials_m_anon
 46 |   EER: 8.463%
 47 |   Cllr (min/act): 0.263/15.393
 48 |   ROCCH-EER: 8.237%
 49 | ASV-vctk_dev_enrolls-vctk_dev_trials_f
 50 |   EER: 2.92%
 51 |   Cllr (min/act): 0.101/1.135
 52 |   ROCCH-EER: 2.874%
 53 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_anon
 54 |   EER: 35.43%
 55 |   Cllr (min/act): 0.907/90.524
 56 |   ROCCH-EER: 35.304%
 57 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_common
 58 |   EER: 2.616%
 59 |   Cllr (min/act): 0.088/0.869
 60 |   ROCCH-EER: 2.195%
 61 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_common_anon
 62 |   EER: 34.01%
 63 |   Cllr (min/act): 0.879/85.860
 64 |   ROCCH-EER: 33.871%
 65 | ASV-vctk_dev_enrolls-vctk_dev_trials_m
 66 |   EER: 1.439%
 67 |   Cllr (min/act): 0.052/1.155
 68 |   ROCCH-EER: 1.375%
 69 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_anon
 70 |   EER: 28.14%
 71 |   Cllr (min/act): 0.740/98.410
 72 |   ROCCH-EER: 28.033%
 73 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_common
 74 |   EER: 1.425%
 75 |   Cllr (min/act): 0.050/1.555
 76 |   ROCCH-EER: 1.318%
 77 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_common_anon
 78 |   EER: 23.93%
 79 |   Cllr (min/act): 0.669/90.705
 80 |   ROCCH-EER: 23.485%
 81 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_f_anon
 82 |   EER: 15.78%
 83 |   Cllr (min/act): 0.504/39.761
 84 |   ROCCH-EER: 15.687%
 85 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_f_common_anon
 86 |   EER: 11.63%
 87 |   Cllr (min/act): 0.368/43.488
 88 |   ROCCH-EER: 11.197%
 89 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_m_anon
 90 |   EER: 11.12%
 91 |   Cllr (min/act): 0.384/23.024
 92 |   ROCCH-EER: 11.075%
 93 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_m_common_anon
 94 |   EER: 10.54%
 95 |   Cllr (min/act): 0.317/24.945
 96 |   ROCCH-EER: 10.073%
 97 | ASV-vctk_test_enrolls-vctk_test_trials_f
 98 |   EER: 4.938%
 99 |   Cllr (min/act): 0.169/1.492
100 |   ROCCH-EER: 4.864%
101 | ASV-vctk_test_enrolls-vctk_test_trials_f_anon
102 |   EER: 29.99%
103 |   Cllr (min/act): 0.795/93.164
104 |   ROCCH-EER: 29.905%
105 | ASV-vctk_test_enrolls-vctk_test_trials_f_common
106 |   EER: 2.89%
107 |   Cllr (min/act): 0.092/0.861
108 |   ROCCH-EER: 2.748%
109 | ASV-vctk_test_enrolls-vctk_test_trials_f_common_anon
110 |   EER: 30.92%
111 |   Cllr (min/act): 0.807/93.959
112 |   ROCCH-EER: 30.455%
113 | ASV-vctk_test_enrolls-vctk_test_trials_m
114 |   EER: 2.067%
115 |   Cllr (min/act): 0.072/1.816
116 |   ROCCH-EER: 1.968%
117 | ASV-vctk_test_enrolls-vctk_test_trials_m_anon
118 |   EER: 28.3%
119 |   Cllr (min/act): 0.720/101.697
120 |   ROCCH-EER: 27.994%
121 | ASV-vctk_test_enrolls-vctk_test_trials_m_common
122 |   EER: 1.13%
123 |   Cllr (min/act): 0.036/1.042
124 |   ROCCH-EER: 0.965%
125 | ASV-vctk_test_enrolls-vctk_test_trials_m_common_anon
126 |   EER: 24.29%
127 |   Cllr (min/act): 0.713/99.336
128 |   ROCCH-EER: 23.988%
129 | ASV-vctk_test_enrolls_anon-vctk_test_trials_f_anon
130 |   EER: 16.98%
131 |   Cllr (min/act): 0.546/41.337
132 |   ROCCH-EER: 16.894%
133 | ASV-vctk_test_enrolls_anon-vctk_test_trials_f_common_anon
134 |   EER: 14.45%
135 |   Cllr (min/act): 0.464/42.745
136 |   ROCCH-EER: 14.069%
137 | ASV-vctk_test_enrolls_anon-vctk_test_trials_m_anon
138 |   EER: 12.23%
139 |   Cllr (min/act): 0.397/25.074
140 |   ROCCH-EER: 11.931%
141 | ASV-vctk_test_enrolls_anon-vctk_test_trials_m_common_anon
142 |   EER: 11.86%
143 |   Cllr (min/act): 0.347/28.230
144 |   ROCCH-EER: 11.217%
145 | ASR-libri_dev_asr
146 |   %WER 5.24 [ 2084 / 39783, 204 ins, 214 del, 1666 sub ] exp/models/asr_eval/decode_libri_dev_asr_tgsmall/wer_12_0.0
147 |   %WER 3.84 [ 1527 / 39783, 179 ins, 148 del, 1200 sub ] exp/models/asr_eval/decode_libri_dev_asr_tglarge/wer_11_0.5
148 | ASR-libri_dev_asr_anon
149 |   %WER 12.15 [ 4832 / 39783, 345 ins, 683 del, 3804 sub ] exp/models/asr_eval/decode_libri_dev_asr_anon_tgsmall/wer_15_0.0
150 |   %WER 8.74 [ 3476 / 39783, 353 ins, 378 del, 2745 sub ] exp/models/asr_eval/decode_libri_dev_asr_anon_tglarge/wer_15_0.0
151 | ASR-libri_test_asr
152 |   %WER 5.55 [ 1944 / 35042, 229 ins, 185 del, 1530 sub ] exp/models/asr_eval/decode_libri_test_asr_tgsmall/wer_12_0.0
153 |   %WER 4.17 [ 1460 / 35042, 175 ins, 158 del, 1127 sub ] exp/models/asr_eval/decode_libri_test_asr_tglarge/wer_12_1.0
154 | ASR-libri_test_asr_anon
155 |   %WER 11.75 [ 4119 / 35042, 373 ins, 448 del, 3298 sub ] exp/models/asr_eval/decode_libri_test_asr_anon_tgsmall/wer_14_0.0
156 |   %WER 8.90 [ 3118 / 35042, 352 ins, 318 del, 2448 sub ] exp/models/asr_eval/decode_libri_test_asr_anon_tglarge/wer_15_0.0
157 | ASR-vctk_dev_asr
158 |   %WER 14.00 [ 12132 / 86627, 1148 ins, 1857 del, 9127 sub ] exp/models/asr_eval/decode_vctk_dev_asr_tgsmall/wer_15_0.0
159 |   %WER 10.78 [ 9337 / 86627, 988 ins, 1352 del, 6997 sub ] exp/models/asr_eval/decode_vctk_dev_asr_tglarge/wer_14_0.5
160 | ASR-vctk_dev_asr_anon
161 |   %WER 30.05 [ 26035 / 86627, 1874 ins, 4609 del, 19552 sub ] exp/models/asr_eval/decode_vctk_dev_asr_anon_tgsmall/wer_16_0.0
162 |   %WER 25.56 [ 22138 / 86627, 1764 ins, 3677 del, 16697 sub ] exp/models/asr_eval/decode_vctk_dev_asr_anon_tglarge/wer_16_0.5
163 | ASR-vctk_test_asr
164 |   %WER 16.38 [ 14196 / 86642, 1331 ins, 2234 del, 10631 sub ] exp/models/asr_eval/decode_vctk_test_asr_tgsmall/wer_14_0.0
165 |   %WER 12.80 [ 11092 / 86642, 1301 ins, 1467 del, 8324 sub ] exp/models/asr_eval/decode_vctk_test_asr_tglarge/wer_14_0.0
166 | ASR-vctk_test_asr_anon
167 |   %WER 33.30 [ 28854 / 86642, 1933 ins, 5177 del, 21744 sub ] exp/models/asr_eval/decode_vctk_test_asr_anon_tgsmall/wer_16_0.0
168 |   %WER 28.15 [ 24388 / 86642, 2128 ins, 3676 del, 18584 sub ] exp/models/asr_eval/decode_vctk_test_asr_anon_tglarge/wer_17_0.0
169 | 


--------------------------------------------------------------------------------
/baseline/RESULTS_baseline:
--------------------------------------------------------------------------------
  1 | ASV-libri_dev_enrolls-libri_dev_trials_f
  2 |   EER: 8.665%
  3 |   Cllr (min/act): 0.304/42.857
  4 |   ROCCH-EER: 8.600%
  5 | ASV-libri_dev_enrolls-libri_dev_trials_f_anon
  6 |   EER: 50.14%
  7 |   Cllr (min/act): 0.996/144.112
  8 |   ROCCH-EER: 48.584%
  9 | ASV-libri_dev_enrolls-libri_dev_trials_m
 10 |   EER: 1.242%
 11 |   Cllr (min/act): 0.034/14.250
 12 |   ROCCH-EER: 1.070%
 13 | ASV-libri_dev_enrolls-libri_dev_trials_m_anon
 14 |   EER: 57.76%
 15 |   Cllr (min/act): 0.999/168.988
 16 |   ROCCH-EER: 49.932%
 17 | ASV-libri_dev_enrolls_anon-libri_dev_trials_f_anon
 18 |   EER: 36.79%
 19 |   Cllr (min/act): 0.894/16.345
 20 |   ROCCH-EER: 36.281%
 21 | ASV-libri_dev_enrolls_anon-libri_dev_trials_m_anon
 22 |   EER: 34.16%
 23 |   Cllr (min/act): 0.867/24.715
 24 |   ROCCH-EER: 33.886%
 25 | ASV-libri_test_enrolls-libri_test_trials_f
 26 |   EER: 7.664%
 27 |   Cllr (min/act): 0.183/26.793
 28 |   ROCCH-EER: 7.165%
 29 | ASV-libri_test_enrolls-libri_test_trials_f_anon
 30 |   EER: 47.26%
 31 |   Cllr (min/act): 0.995/151.822
 32 |   ROCCH-EER: 46.808%
 33 | ASV-libri_test_enrolls-libri_test_trials_m
 34 |   EER: 1.114%
 35 |   Cllr (min/act): 0.041/15.303
 36 |   ROCCH-EER: 1.046%
 37 | ASV-libri_test_enrolls-libri_test_trials_m_anon
 38 |   EER: 52.12%
 39 |   Cllr (min/act): 0.999/166.658
 40 |   ROCCH-EER: 49.713%
 41 | ASV-libri_test_enrolls_anon-libri_test_trials_f_anon
 42 |   EER: 32.12%
 43 |   Cllr (min/act): 0.839/16.270
 44 |   ROCCH-EER: 31.599%
 45 | ASV-libri_test_enrolls_anon-libri_test_trials_m_anon
 46 |   EER: 36.75%
 47 |   Cllr (min/act): 0.903/33.928
 48 |   ROCCH-EER: 36.117%
 49 | ASV-vctk_dev_enrolls-vctk_dev_trials_f
 50 |   EER: 2.864%
 51 |   Cllr (min/act): 0.100/1.134
 52 |   ROCCH-EER: 2.858%
 53 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_anon
 54 |   EER: 49.97%
 55 |   Cllr (min/act): 0.989/166.027
 56 |   ROCCH-EER: 48.043%
 57 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_common
 58 |   EER: 2.616%
 59 |   Cllr (min/act): 0.088/0.868
 60 |   ROCCH-EER: 2.181%
 61 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_common_anon
 62 |   EER: 49.71%
 63 |   Cllr (min/act): 0.995/172.049
 64 |   ROCCH-EER: 47.895%
 65 | ASV-vctk_dev_enrolls-vctk_dev_trials_m
 66 |   EER: 1.439%
 67 |   Cllr (min/act): 0.052/1.158
 68 |   ROCCH-EER: 1.389%
 69 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_anon
 70 |   EER: 53.95%
 71 |   Cllr (min/act): 1.000/167.511
 72 |   ROCCH-EER: 49.992%
 73 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_common
 74 |   EER: 1.425%
 75 |   Cllr (min/act): 0.050/1.559
 76 |   ROCCH-EER: 1.326%
 77 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_common_anon
 78 |   EER: 54.99%
 79 |   Cllr (min/act): 0.999/192.924
 80 |   ROCCH-EER: 49.914%
 81 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_f_anon
 82 |   EER: 26.11%
 83 |   Cllr (min/act): 0.760/8.414
 84 |   ROCCH-EER: 25.986%
 85 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_f_common_anon
 86 |   EER: 27.91%
 87 |   Cllr (min/act): 0.741/7.205
 88 |   ROCCH-EER: 27.035%
 89 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_m_anon
 90 |   EER: 30.92%
 91 |   Cllr (min/act): 0.839/23.797
 92 |   ROCCH-EER: 30.791%
 93 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_m_common_anon
 94 |   EER: 33.33%
 95 |   Cllr (min/act): 0.840/23.891
 96 |   ROCCH-EER: 32.847%
 97 | ASV-vctk_test_enrolls-vctk_test_trials_f
 98 |   EER: 4.887%
 99 |   Cllr (min/act): 0.169/1.495
100 |   ROCCH-EER: 4.842%
101 | ASV-vctk_test_enrolls-vctk_test_trials_f_anon
102 |   EER: 48.05%
103 |   Cllr (min/act): 0.998/146.929
104 |   ROCCH-EER: 47.837%
105 | ASV-vctk_test_enrolls-vctk_test_trials_f_common
106 |   EER: 2.89%
107 |   Cllr (min/act): 0.091/0.866
108 |   ROCCH-EER: 2.749%
109 | ASV-vctk_test_enrolls-vctk_test_trials_f_common_anon
110 |   EER: 48.27%
111 |   Cllr (min/act): 0.994/162.531
112 |   ROCCH-EER: 47.635%
113 | ASV-vctk_test_enrolls-vctk_test_trials_m
114 |   EER: 2.067%
115 |   Cllr (min/act): 0.072/1.817
116 |   ROCCH-EER: 1.970%
117 | ASV-vctk_test_enrolls-vctk_test_trials_m_anon
118 |   EER: 53.85%
119 |   Cllr (min/act): 1.000/167.824
120 |   ROCCH-EER: 49.998%
121 | ASV-vctk_test_enrolls-vctk_test_trials_m_common
122 |   EER: 1.13%
123 |   Cllr (min/act): 0.036/1.041
124 |   ROCCH-EER: 0.958%
125 | ASV-vctk_test_enrolls-vctk_test_trials_m_common_anon
126 |   EER: 53.39%
127 |   Cllr (min/act): 1.000/190.136
128 |   ROCCH-EER: 49.955%
129 | ASV-vctk_test_enrolls_anon-vctk_test_trials_f_anon
130 |   EER: 31.74%
131 |   Cllr (min/act): 0.847/11.527
132 |   ROCCH-EER: 31.664%
133 | ASV-vctk_test_enrolls_anon-vctk_test_trials_f_common_anon
134 |   EER: 31.21%
135 |   Cllr (min/act): 0.830/9.015
136 |   ROCCH-EER: 30.791%
137 | ASV-vctk_test_enrolls_anon-vctk_test_trials_m_anon
138 |   EER: 30.94%
139 |   Cllr (min/act): 0.834/23.842
140 |   ROCCH-EER: 30.784%
141 | ASV-vctk_test_enrolls_anon-vctk_test_trials_m_common_anon
142 |   EER: 31.07%
143 |   Cllr (min/act): 0.835/21.680
144 |   ROCCH-EER: 30.794%
145 | ASR-libri_dev_asr
146 |   %WER 5.25 [ 2089 / 39783, 172 ins, 250 del, 1667 sub ] exp/models/asr_eval/decode_libri_dev_asr_tgsmall/wer_12_0.5
147 |   %WER 3.83 [ 1522 / 39783, 187 ins, 139 del, 1196 sub ] exp/models/asr_eval/decode_libri_dev_asr_tglarge/wer_13_0.0
148 | ASR-libri_dev_asr_anon
149 |   %WER 8.76 [ 3485 / 39783, 277 ins, 484 del, 2724 sub ] exp/models/asr_eval/decode_libri_dev_asr_anon_tgsmall/wer_14_0.0
150 |   %WER 6.39 [ 2543 / 39783, 270 ins, 284 del, 1989 sub ] exp/models/asr_eval/decode_libri_dev_asr_anon_tglarge/wer_14_0.0
151 | ASR-libri_test_asr
152 |   %WER 5.55 [ 1944 / 35042, 214 ins, 197 del, 1533 sub ] exp/models/asr_eval/decode_libri_test_asr_tgsmall/wer_13_0.0
153 |   %WER 4.15 [ 1453 / 35042, 173 ins, 158 del, 1122 sub ] exp/models/asr_eval/decode_libri_test_asr_tglarge/wer_12_1.0
154 | ASR-libri_test_asr_anon
155 |   %WER 9.15 [ 3205 / 35042, 295 ins, 407 del, 2503 sub ] exp/models/asr_eval/decode_libri_test_asr_anon_tgsmall/wer_14_0.0
156 |   %WER 6.73 [ 2359 / 35042, 216 ins, 326 del, 1817 sub ] exp/models/asr_eval/decode_libri_test_asr_anon_tglarge/wer_15_0.5
157 | ASR-vctk_dev_asr
158 |   %WER 14.00 [ 12127 / 86627, 1148 ins, 1869 del, 9110 sub ] exp/models/asr_eval/decode_vctk_dev_asr_tgsmall/wer_15_0.0
159 |   %WER 10.79 [ 9348 / 86627, 993 ins, 1354 del, 7001 sub ] exp/models/asr_eval/decode_vctk_dev_asr_tglarge/wer_14_0.5
160 | ASR-vctk_dev_asr_anon
161 |   %WER 18.92 [ 16390 / 86627, 1486 ins, 2568 del, 12336 sub ] exp/models/asr_eval/decode_vctk_dev_asr_anon_tgsmall/wer_16_0.0
162 |   %WER 15.38 [ 13327 / 86627, 1591 ins, 1738 del, 9998 sub ] exp/models/asr_eval/decode_vctk_dev_asr_anon_tglarge/wer_16_0.0
163 | ASR-vctk_test_asr
164 |   %WER 16.39 [ 14198 / 86642, 1323 ins, 2243 del, 10632 sub ] exp/models/asr_eval/decode_vctk_test_asr_tgsmall/wer_14_0.0
165 |   %WER 12.82 [ 11104 / 86642, 1300 ins, 1472 del, 8332 sub ] exp/models/asr_eval/decode_vctk_test_asr_tglarge/wer_14_0.0
166 | ASR-vctk_test_asr_anon
167 |   %WER 18.88 [ 16358 / 86642, 1482 ins, 2647 del, 12229 sub ] exp/models/asr_eval/decode_vctk_test_asr_anon_tgsmall/wer_15_0.0
168 |   %WER 15.23 [ 13193 / 86642, 1294 ins, 2044 del, 9855 sub ] exp/models/asr_eval/decode_vctk_test_asr_anon_tglarge/wer_14_0.5
169 | 


--------------------------------------------------------------------------------
/baseline/RESULTS_baseline_cosine:
--------------------------------------------------------------------------------
  1 | ASV-libri_dev_enrolls-libri_dev_trials_f
  2 |   EER: 8.665%
  3 |   Cllr (min/act): 0.304/42.857
  4 |   ROCCH-EER: 8.600%
  5 | ASV-libri_dev_enrolls-libri_dev_trials_f_anon
  6 |   EER: 49.29%
  7 |   Cllr (min/act): 0.996/146.391
  8 |   ROCCH-EER: 48.327%
  9 | ASV-libri_dev_enrolls-libri_dev_trials_m
 10 |   EER: 1.242%
 11 |   Cllr (min/act): 0.034/14.250
 12 |   ROCCH-EER: 1.070%
 13 | ASV-libri_dev_enrolls-libri_dev_trials_m_anon
 14 |   EER: 58.7%
 15 |   Cllr (min/act): 0.999/169.570
 16 |   ROCCH-EER: 49.911%
 17 | ASV-libri_dev_enrolls_anon-libri_dev_trials_f_anon
 18 |   EER: 34.66%
 19 |   Cllr (min/act): 0.873/15.674
 20 |   ROCCH-EER: 34.168%
 21 | ASV-libri_dev_enrolls_anon-libri_dev_trials_m_anon
 22 |   EER: 29.66%
 23 |   Cllr (min/act): 0.799/19.098
 24 |   ROCCH-EER: 29.356%
 25 | ASV-libri_test_enrolls-libri_test_trials_f
 26 |   EER: 7.664%
 27 |   Cllr (min/act): 0.183/26.793
 28 |   ROCCH-EER: 7.165%
 29 | ASV-libri_test_enrolls-libri_test_trials_f_anon
 30 |   EER: 49.09%
 31 |   Cllr (min/act): 0.996/151.245
 32 |   ROCCH-EER: 48.088%
 33 | ASV-libri_test_enrolls-libri_test_trials_m
 34 |   EER: 1.114%
 35 |   Cllr (min/act): 0.041/15.303
 36 |   ROCCH-EER: 1.046%
 37 | ASV-libri_test_enrolls-libri_test_trials_m_anon
 38 |   EER: 52.78%
 39 |   Cllr (min/act): 0.999/169.178
 40 |   ROCCH-EER: 49.968%
 41 | ASV-libri_test_enrolls_anon-libri_test_trials_f_anon
 42 |   EER: 29.38%
 43 |   Cllr (min/act): 0.806/13.904
 44 |   ROCCH-EER: 29.112%
 45 | ASV-libri_test_enrolls_anon-libri_test_trials_m_anon
 46 |   EER: 31.85%
 47 |   Cllr (min/act): 0.840/26.806
 48 |   ROCCH-EER: 31.324%
 49 | ASV-vctk_dev_enrolls-vctk_dev_trials_f
 50 |   EER: 2.864%
 51 |   Cllr (min/act): 0.100/1.134
 52 |   ROCCH-EER: 2.858%
 53 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_anon
 54 |   EER: 49.92%
 55 |   Cllr (min/act): 0.990/163.294
 56 |   ROCCH-EER: 48.289%
 57 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_common
 58 |   EER: 2.616%
 59 |   Cllr (min/act): 0.088/0.868
 60 |   ROCCH-EER: 2.181%
 61 | ASV-vctk_dev_enrolls-vctk_dev_trials_f_common_anon
 62 |   EER: 49.42%
 63 |   Cllr (min/act): 0.995/165.686
 64 |   ROCCH-EER: 48.335%
 65 | ASV-vctk_dev_enrolls-vctk_dev_trials_m
 66 |   EER: 1.439%
 67 |   Cllr (min/act): 0.052/1.158
 68 |   ROCCH-EER: 1.389%
 69 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_anon
 70 |   EER: 54.99%
 71 |   Cllr (min/act): 1.000/165.738
 72 |   ROCCH-EER: 49.989%
 73 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_common
 74 |   EER: 1.425%
 75 |   Cllr (min/act): 0.050/1.559
 76 |   ROCCH-EER: 1.326%
 77 | ASV-vctk_dev_enrolls-vctk_dev_trials_m_common_anon
 78 |   EER: 56.13%
 79 |   Cllr (min/act): 1.000/191.512
 80 |   ROCCH-EER: 49.949%
 81 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_f_anon
 82 |   EER: 28.47%
 83 |   Cllr (min/act): 0.809/9.809
 84 |   ROCCH-EER: 28.438%
 85 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_f_common_anon
 86 |   EER: 25.29%
 87 |   Cllr (min/act): 0.733/7.660
 88 |   ROCCH-EER: 24.873%
 89 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_m_anon
 90 |   EER: 27.74%
 91 |   Cllr (min/act): 0.787/19.331
 92 |   ROCCH-EER: 27.522%
 93 | ASV-vctk_dev_enrolls_anon-vctk_dev_trials_m_common_anon
 94 |   EER: 28.77%
 95 |   Cllr (min/act): 0.750/18.813
 96 |   ROCCH-EER: 27.871%
 97 | ASV-vctk_test_enrolls-vctk_test_trials_f
 98 |   EER: 4.887%
 99 |   Cllr (min/act): 0.169/1.495
100 |   ROCCH-EER: 4.842%
101 | ASV-vctk_test_enrolls-vctk_test_trials_f_anon
102 |   EER: 49.23%
103 |   Cllr (min/act): 1.000/141.531
104 |   ROCCH-EER: 48.932%
105 | ASV-vctk_test_enrolls-vctk_test_trials_f_common
106 |   EER: 2.89%
107 |   Cllr (min/act): 0.091/0.866
108 |   ROCCH-EER: 2.749%
109 | ASV-vctk_test_enrolls-vctk_test_trials_f_common_anon
110 |   EER: 49.42%
111 |   Cllr (min/act): 0.996/156.555
112 |   ROCCH-EER: 48.192%
113 | ASV-vctk_test_enrolls-vctk_test_trials_m
114 |   EER: 2.067%
115 |   Cllr (min/act): 0.072/1.817
116 |   ROCCH-EER: 1.970%
117 | ASV-vctk_test_enrolls-vctk_test_trials_m_anon
118 |   EER: 53.67%
119 |   Cllr (min/act): 1.000/166.265
120 |   ROCCH-EER: 49.998%
121 | ASV-vctk_test_enrolls-vctk_test_trials_m_common
122 |   EER: 1.13%
123 |   Cllr (min/act): 0.036/1.041
124 |   ROCCH-EER: 0.958%
125 | ASV-vctk_test_enrolls-vctk_test_trials_m_common_anon
126 |   EER: 55.93%
127 |   Cllr (min/act): 1.000/190.058
128 |   ROCCH-EER: 49.975%
129 | ASV-vctk_test_enrolls_anon-vctk_test_trials_f_anon
130 |   EER: 34.88%
131 |   Cllr (min/act): 0.896/13.381
132 |   ROCCH-EER: 34.582%
133 | ASV-vctk_test_enrolls_anon-vctk_test_trials_f_common_anon
134 |   EER: 31.5%
135 |   Cllr (min/act): 0.846/9.833
136 |   ROCCH-EER: 30.885%
137 | ASV-vctk_test_enrolls_anon-vctk_test_trials_m_anon
138 |   EER: 24.28%
139 |   Cllr (min/act): 0.716/15.101
140 |   ROCCH-EER: 24.116%
141 | ASV-vctk_test_enrolls_anon-vctk_test_trials_m_common_anon
142 |   EER: 22.6%
143 |   Cllr (min/act): 0.669/14.047
144 |   ROCCH-EER: 22.075%
145 | ASR-libri_dev_asr
146 |   %WER 5.25 [ 2089 / 39783, 172 ins, 250 del, 1667 sub ] exp/models/asr_eval/decode_libri_dev_asr_tgsmall/wer_12_0.5
147 |   %WER 3.83 [ 1522 / 39783, 187 ins, 139 del, 1196 sub ] exp/models/asr_eval/decode_libri_dev_asr_tglarge/wer_13_0.0
148 | ASR-libri_dev_asr_anon
149 |   %WER 9.02 [ 3587 / 39783, 280 ins, 512 del, 2795 sub ] exp/models/asr_eval/decode_libri_dev_asr_anon_tgsmall/wer_14_0.0
150 |   %WER 6.53 [ 2597 / 39783, 296 ins, 272 del, 2029 sub ] exp/models/asr_eval/decode_libri_dev_asr_anon_tglarge/wer_13_0.0
151 | ASR-libri_test_asr
152 |   %WER 5.55 [ 1944 / 35042, 214 ins, 197 del, 1533 sub ] exp/models/asr_eval/decode_libri_test_asr_tgsmall/wer_13_0.0
153 |   %WER 4.15 [ 1453 / 35042, 173 ins, 158 del, 1122 sub ] exp/models/asr_eval/decode_libri_test_asr_tglarge/wer_12_1.0
154 | ASR-libri_test_asr_anon
155 |   %WER 9.01 [ 3158 / 35042, 267 ins, 380 del, 2511 sub ] exp/models/asr_eval/decode_libri_test_asr_anon_tgsmall/wer_14_0.0
156 |   %WER 6.69 [ 2344 / 35042, 216 ins, 308 del, 1820 sub ] exp/models/asr_eval/decode_libri_test_asr_anon_tglarge/wer_15_0.5
157 | ASR-vctk_dev_asr
158 |   %WER 14.00 [ 12127 / 86627, 1148 ins, 1869 del, 9110 sub ] exp/models/asr_eval/decode_vctk_dev_asr_tgsmall/wer_15_0.0
159 |   %WER 10.79 [ 9348 / 86627, 993 ins, 1354 del, 7001 sub ] exp/models/asr_eval/decode_vctk_dev_asr_tglarge/wer_14_0.5
160 | ASR-vctk_dev_asr_anon
161 |   %WER 19.06 [ 16507 / 86627, 1451 ins, 2652 del, 12404 sub ] exp/models/asr_eval/decode_vctk_dev_asr_anon_tgsmall/wer_16_0.0
162 |   %WER 15.57 [ 13487 / 86627, 1583 ins, 1814 del, 10090 sub ] exp/models/asr_eval/decode_vctk_dev_asr_anon_tglarge/wer_16_0.0
163 | ASR-vctk_test_asr
164 |   %WER 16.39 [ 14198 / 86642, 1323 ins, 2243 del, 10632 sub ] exp/models/asr_eval/decode_vctk_test_asr_tgsmall/wer_14_0.0
165 |   %WER 12.82 [ 11104 / 86642, 1300 ins, 1472 del, 8332 sub ] exp/models/asr_eval/decode_vctk_test_asr_tglarge/wer_14_0.0
166 | ASR-vctk_test_asr_anon
167 |   %WER 19.35 [ 16767 / 86642, 1484 ins, 2731 del, 12552 sub ] exp/models/asr_eval/decode_vctk_test_asr_anon_tgsmall/wer_15_0.0
168 |   %WER 15.61 [ 13524 / 86642, 1495 ins, 1946 del, 10083 sub ] exp/models/asr_eval/decode_vctk_test_asr_anon_tglarge/wer_16_0.0
169 | 


--------------------------------------------------------------------------------
/baseline/local/featex/split_am_nsf_data.py:
--------------------------------------------------------------------------------
  1 | #!/bin/python
  2 | 
  3 | '''
  4 | Script to divide a given data directory for Acoustic modeling 
  5 | and Neural Source Filter waveform modeling. The dev set will contain some
  6 | speakers from train set, and the test set will contain completely disjoint
  7 | speakers.
  8 | 
  9 | The root-dir should be in kaldi format, out-dir will be where newly created
 10 | train, dev and test will be stored.
 11 | 
 12 | The data division will be with respect to gender. First, ntest speakers
 13 | (ntest/2 male, ntest/2 female) will be
 14 | split from the dataset with all their utterances to create the test set, then
 15 | ndev speakers (ndev/2 male, ndev/2 female) will be sampled from remaining data
 16 | and a given percentage (dev-utt-per value can range from 0 to 1) of utterances 
 17 | will be sampled from each speaker to
 18 | create the dev set.
 19 | 
 20 | Remaining data will be used for training.
 21 | 
 22 | To run:
 23 | python local/split_am_nsf_data.py <root-dir> <out-dir> <ntest> <ndev>
 24 | '''
 25 | 
 26 | import sys
 27 | import os
 28 | from os.path import join, basename
 29 | 
 30 | args = sys.argv
 31 | 
 32 | root_dir = args[1]
 33 | out_dir = args[2]
 34 | ntest = int(args[3])
 35 | ndev = int(args[4])
 36 | 
 37 | print "Config: root_dir =", root_dir, " out_dir =", out_dir
 38 | print "Config: ntest =", ntest, " ndev =", ndev
 39 | 
 40 | test_dir = join(out_dir, basename(root_dir) + '_test')
 41 | dev_dir = join(out_dir, basename(root_dir) + '_dev')
 42 | train_dir = join(out_dir, basename(root_dir) + '_train')
 43 | 
 44 | spk2utt = {}
 45 | spk2gender = {}
 46 | utt2wav = {}
 47 | utt2text = {}
 48 | utt2spk = {}
 49 | 
 50 | with open(join(root_dir, 'spk2utt')) as f:
 51 |     for line in f.read().splitlines():
 52 |         sp = line.split()
 53 |         spkid = sp[0]
 54 |         utts = sp[1:]
 55 |         spk2utt[spkid] = utts
 56 | 
 57 | with open(join(root_dir, 'spk2gender')) as f:
 58 |     for line in f.read().splitlines():
 59 |         sp = line.split()
 60 |         spkid = sp[0]
 61 |         gen = sp[1]
 62 |         spk2gender[spkid] = gen
 63 | 
 64 | with open(join(root_dir, 'wav.scp')) as f:
 65 |     for line in f.read().splitlines():
 66 |         sp = line.split()
 67 |         uttid = sp[0]
 68 |         wav_path = ' '.join(sp[1:])
 69 |         utt2wav[uttid] = wav_path
 70 | 
 71 | with open(join(root_dir, 'text')) as f:
 72 |     for line in f.read().splitlines():
 73 |         sp = line.split()
 74 |         uttid = sp[0]
 75 |         text = ' '.join(sp[1:])
 76 |         utt2text[uttid] = text
 77 | 
 78 | with open(join(root_dir, 'utt2spk')) as f:
 79 |     for line in f.read().splitlines():
 80 |         sp = line.split()
 81 |         uttid = sp[0]
 82 |         spk = sp[1]
 83 |         utt2spk[uttid] = spk
 84 | 
 85 | 
 86 | # Find ntest/2 male and ntest/2 female speakers
 87 | test_spks = []
 88 | spklim = int(ntest / 2)
 89 | print "Per gender speaker limit for test =", spklim
 90 | mspk, fspk = 0, 0
 91 | for spk, gender in spk2gender.items():
 92 |     if mspk < spklim and gender == 'm':
 93 |         test_spks.append(spk)
 94 |         mspk += 1
 95 |     elif fspk < spklim and gender == 'f':
 96 |         test_spks.append(spk)
 97 |         fspk += 1
 98 | 
 99 | print "Selected ", len(test_spks), " test speakers."
100 | 
101 | # Find dev spks and utts
102 | dev_spks = []
103 | dev_utts = []
104 | spklim = int(ndev / 2)
105 | print "Per gender speaker limit for dev = ", spklim
106 | 
107 | mspk, fspk = 0, 0
108 | for spk, gender in spk2gender.items():
109 |     if spk not in test_spks:
110 |         if mspk < spklim and gender == 'm':
111 |             dev_spks.append(spk)
112 |             spk_utts = spk2utt[spk]
113 |             #utt_frac = int(devper * len(spk_utts))
114 |             dev_utts.extend(spk_utts)
115 |             mspk += 1
116 |         elif fspk < spklim and gender == 'f':
117 |             dev_spks.append(spk)
118 |             spk_utts = spk2utt[spk]
119 |             #utt_frac = int(devper * len(spk_utts))
120 |             dev_utts.extend(spk_utts)
121 |             fspk += 1
122 | 
123 | print "Selected ", len(dev_spks), " dev speakers."
124 | 
125 | os.makedirs(test_dir)
126 | with open(join(test_dir, 'spk2utt.unsorted'), 'w') as f:
127 |     for spk in test_spks:
128 |         f.write(spk + ' ' + ' '.join(spk2utt[spk]) + '\n')
129 | 
130 | with open(join(test_dir, 'spk2gender.unsorted'), 'w') as f:
131 |     for spk in test_spks:
132 |         f.write(spk + ' ' + spk2gender[spk] + '\n')
133 | 
134 | with open(join(test_dir, 'utt2spk.unsorted'), 'w') as f:
135 |     for spk in test_spks:
136 |         for utt in spk2utt[spk]:
137 |             f.write(utt + ' ' + spk + '\n')
138 | 
139 | with open(join(test_dir, 'text.unsorted'), 'w') as f:
140 |     for spk in test_spks:
141 |         for utt in spk2utt[spk]:
142 |             f.write(utt + ' ' + utt2text[utt] + '\n')
143 | 
144 | with open(join(test_dir, 'wav.scp.unsorted'), 'w') as f:
145 |     for spk in test_spks:
146 |         for utt in spk2utt[spk]:
147 |             f.write(utt + ' ' + utt2wav[utt] + '\n')
148 | 
149 | print "Finished creating test dir."
150 | 
151 | os.makedirs(dev_dir)
152 | with open(join(dev_dir, 'spk2utt.unsorted'), 'w') as f:
153 |     for spk in dev_spks:
154 |         #spk_utts = [utt for utt in spk2utt[spk] if utt in dev_utts]
155 |         spk_utts = spk2utt[spk]
156 |         f.write(spk + ' ' + ' '.join(spk_utts) + '\n')
157 | 
158 | with open(join(dev_dir, 'spk2gender.unsorted'), 'w') as f:
159 |     for spk in dev_spks:
160 |         f.write(spk + ' ' + spk2gender[spk] + '\n')
161 | 
162 | with open(join(dev_dir, 'utt2spk.unsorted'), 'w') as f:
163 |     for utt in dev_utts:
164 |         f.write(utt + ' ' + utt2spk[utt] + '\n')
165 | 
166 | with open(join(dev_dir, 'text.unsorted'), 'w') as f:
167 |     for utt in dev_utts:
168 |         f.write(utt + ' ' + utt2text[utt] + '\n')
169 | 
170 | with open(join(dev_dir, 'wav.scp.unsorted'), 'w') as f:
171 |     for utt in dev_utts:
172 |         f.write(utt + ' ' + utt2wav[utt] + '\n')
173 | 
174 | print "Finished creating dev dir."
175 | 
176 | all_spks = list(spk2gender.keys())
177 | all_utts = list(utt2spk.keys())
178 | train_spks = [spk for spk in all_spks if spk not in test_spks and spk not in
179 |                 dev_spks]
180 | train_utts = [utt for utt in all_utts if utt2spk[utt] not in test_spks and
181 |                 utt not in dev_utts]
182 | print "Selected", len(train_spks), "train speakers and", len(train_utts), "train utterances."
183 | 
184 | os.makedirs(train_dir)
185 | with open(join(train_dir, 'spk2utt.unsorted'), 'w') as f:
186 |     for spk in train_spks:
187 |         spk_utts = [utt for utt in spk2utt[spk] if utt in train_utts]
188 |         f.write(spk + ' ' + ' '.join(spk_utts) + '\n')
189 | 
190 | with open(join(train_dir, 'spk2gender.unsorted'), 'w') as f:
191 |     for spk in train_spks:
192 |         f.write(spk + ' ' + spk2gender[spk] + '\n')
193 | 
194 | with open(join(train_dir, 'utt2spk.unsorted'), 'w') as f:
195 |     for utt in train_utts:
196 |         f.write(utt + ' ' + utt2spk[utt] + '\n')
197 | 
198 | with open(join(train_dir, 'text.unsorted'), 'w') as f:
199 |     for utt in train_utts:
200 |         f.write(utt + ' ' + utt2text[utt] + '\n')
201 | 
202 | with open(join(train_dir, 'wav.scp.unsorted'), 'w') as f:
203 |     for utt in train_utts:
204 |         f.write(utt + ' ' + utt2wav[utt] + '\n')
205 | 
206 | print "Finished creating train dir."
207 | 


--------------------------------------------------------------------------------
/baseline/cleanup.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Script for The First VoicePrivacy Challenge 2020
  3 | #
  4 | # This clean-up script should be used before re-running anonymization system (for example, with different parametrs, models, etc.) 
  5 | # in order to delete all old directories (in data, exp, ..., which should be updated) from the previous run of anonymization and evaluation sripts.
  6 | #
  7 | #
  8 | 
  9 | set -e
 10 | 
 11 | # ls | awk '{for (i=1; i<=NF; ++i) print $i}' | sort >> ../cleanup.sh
 12 | 
 13 | names='
 14 | libri_dev_asr
 15 | libri_dev_asr_anon
 16 | libri_dev_enrolls
 17 | libri_dev_enrolls_anon
 18 | libri_dev_trials_all
 19 | libri_dev_trials_f
 20 | libri_dev_trials_f_anon
 21 | libri_dev_trials_m
 22 | libri_dev_trials_m_anon
 23 | libri_test_asr
 24 | libri_test_asr_anon
 25 | libri_test_enrolls
 26 | libri_test_enrolls_anon
 27 | libri_test_trials_all
 28 | libri_test_trials_f
 29 | libri_test_trials_f_anon
 30 | libri_test_trials_m
 31 | libri_test_trials_m_anon
 32 | vctk_dev_asr
 33 | vctk_dev_asr_anon
 34 | vctk_dev_enrolls
 35 | vctk_dev_enrolls_anon
 36 | vctk_dev_trials_all
 37 | vctk_dev_trials_f
 38 | vctk_dev_trials_f_all
 39 | vctk_dev_trials_f_all_anon
 40 | vctk_dev_trials_f_anon
 41 | vctk_dev_trials_f_common
 42 | vctk_dev_trials_f_common_anon
 43 | vctk_dev_trials_m
 44 | vctk_dev_trials_m_all
 45 | vctk_dev_trials_m_all_anon
 46 | vctk_dev_trials_m_anon
 47 | vctk_dev_trials_m_common
 48 | vctk_dev_trials_m_common_anon
 49 | vctk_test_asr
 50 | vctk_test_asr_anon
 51 | vctk_test_enrolls
 52 | vctk_test_enrolls_anon
 53 | vctk_test_trials_all
 54 | vctk_test_trials_f
 55 | vctk_test_trials_f_all
 56 | vctk_test_trials_f_all_anon
 57 | vctk_test_trials_f_anon
 58 | vctk_test_trials_f_common
 59 | vctk_test_trials_f_common_anon
 60 | vctk_test_trials_m
 61 | vctk_test_trials_m_all
 62 | vctk_test_trials_m_all_anon
 63 | vctk_test_trials_m_anon
 64 | vctk_test_trials_m_common
 65 | vctk_test_trials_m_common_anon
 66 | libri_dev_trials_f_hires   
 67 | libri_test_asr_hires       
 68 | vctk_dev_asr_hires                  
 69 | vctk_test_trials_f_all_hires
 70 | libri_dev_asr_anon_hires  
 71 | libri_dev_trials_m_hires   
 72 | libri_test_enrolls_hires     
 73 | vctk_dev_enrolls_hires     
 74 | vctk_test_asr_anon_hires
 75 | vctk_test_trials_m_all_hires
 76 | libri_dev_asr_hires 
 77 | libri_test_trials_f_hires          
 78 | vctk_dev_trials_f_all_hires  
 79 | vctk_test_asr_hires
 80 | libri_dev_enrolls_hires
 81 | libri_test_asr_anon_hires
 82 | libri_test_trials_m_hires
 83 | vctk_dev_asr_anon_hires
 84 | vctk_dev_trials_m_all_hires 
 85 | vctk_test_enrolls_hires'
 86 | 
 87 | 
 88 | for name in $names; do
 89 |   dir=data/$name
 90 |   #[ ! -d $dir ] && echo $dir
 91 |   if [ -d $dir ]; then echo $dir; rm -r $dir; fi
 92 | done
 93 | 
 94 | names='
 95 | decode_libri_dev_asr_anon_tglarge
 96 | decode_libri_dev_asr_anon_tgsmall
 97 | decode_libri_dev_asr_tglarge
 98 | decode_libri_dev_asr_tgsmall
 99 | decode_libri_test_asr_anon_tglarge
100 | decode_libri_test_asr_anon_tgsmall
101 | decode_libri_test_asr_tglarge
102 | decode_libri_test_asr_tgsmall
103 | decode_vctk_dev_asr_anon_tglarge
104 | decode_vctk_dev_asr_anon_tgsmall
105 | decode_vctk_dev_asr_tglarge
106 | decode_vctk_dev_asr_tgsmall
107 | decode_vctk_test_asr_anon_tglarge
108 | decode_vctk_test_asr_anon_tgsmall
109 | decode_vctk_test_asr_tglarge
110 | decode_vctk_test_asr_tgsmall'
111 | 
112 | for name in $names; do
113 |   dir=exp/models/asr_eval/$name
114 |   #[ ! -d $dir ] && echo $dir
115 |   if [ -d $dir ]; then echo $dir; rm -r $dir; fi
116 | done
117 | 
118 | names='
119 | ivect_libri_dev_asr
120 | ivect_libri_dev_asr_anon
121 | ivect_libri_test_asr
122 | ivect_libri_test_asr_anon
123 | ivect_vctk_dev_asr
124 | ivect_vctk_dev_asr_anon
125 | ivect_vctk_test_asr
126 | ivect_vctk_test_asr_anon'
127 | 
128 | for name in $names; do
129 |   dir=exp/models/asr_eval/extractor/$name
130 |   #[ ! -d $dir ] && echo $dir
131 |   if [ -d $dir ]; then echo $dir; rm -r $dir; fi
132 | done
133 | 
134 | names='
135 | ivectors_libri_dev_enrolls_hires
136 | ivectors_libri_dev_trials_f_hires
137 | ivectors_libri_dev_trials_m_hires
138 | ivectors_libri_test_enrolls_hires
139 | ivectors_libri_test_trials_f_hires
140 | ivectors_libri_test_trials_m_hires
141 | ivectors_vctk_dev_enrolls_hires
142 | ivectors_vctk_dev_trials_f_all_hires
143 | ivectors_vctk_dev_trials_m_all_hires
144 | ivectors_vctk_test_enrolls_hires
145 | ivectors_vctk_test_trials_f_all_hires
146 | ivectors_vctk_test_trials_m_all_hires
147 | ppg_libri_dev_enrolls
148 | ppg_libri_dev_trials_f
149 | ppg_libri_dev_trials_m
150 | ppg_libri_test_enrolls
151 | ppg_libri_test_trials_f
152 | ppg_libri_test_trials_m
153 | ppg_vctk_dev_enrolls
154 | ppg_vctk_dev_trials_f_all
155 | ppg_vctk_dev_trials_m_all
156 | ppg_vctk_test_enrolls
157 | ppg_vctk_test_trials_f_all
158 | ppg_vctk_test_trials_m_all'
159 | 
160 | for name in $names; do
161 |   dir=exp/models/1_asr_am/exp/nnet3_cleaned/$name
162 |   #[ ! -d $dir ] && echo $dir
163 |   if [ -d $dir ]; then echo $dir; rm -r $dir; fi
164 | done
165 | 
166 | names='
167 | xvect_libri_dev_enrolls
168 | xvect_libri_dev_enrolls_anon
169 | xvect_libri_dev_trials_f
170 | xvect_libri_dev_trials_f_anon
171 | xvect_libri_dev_trials_m
172 | xvect_libri_dev_trials_m_anon
173 | xvect_libri_test_enrolls
174 | xvect_libri_test_enrolls_anon
175 | xvect_libri_test_trials_f
176 | xvect_libri_test_trials_f_anon
177 | xvect_libri_test_trials_m
178 | xvect_libri_test_trials_m_anon
179 | xvect_vctk_dev_enrolls
180 | xvect_vctk_dev_enrolls_anon
181 | xvect_vctk_dev_trials_f
182 | xvect_vctk_dev_trials_f_anon
183 | xvect_vctk_dev_trials_f_common
184 | xvect_vctk_dev_trials_f_common_anon
185 | xvect_vctk_dev_trials_m
186 | xvect_vctk_dev_trials_m_anon
187 | xvect_vctk_dev_trials_m_common
188 | xvect_vctk_dev_trials_m_common_anon
189 | xvect_vctk_test_enrolls
190 | xvect_vctk_test_enrolls_anon
191 | xvect_vctk_test_trials_f
192 | xvect_vctk_test_trials_f_anon
193 | xvect_vctk_test_trials_f_common
194 | xvect_vctk_test_trials_f_common_anon
195 | xvect_vctk_test_trials_m
196 | xvect_vctk_test_trials_m_anon
197 | xvect_vctk_test_trials_m_common
198 | xvect_vctk_test_trials_m_common_anon'
199 | 
200 | for name in $names; do
201 |   dir=exp/models/asv_eval/xvect_01709_1/$name
202 |   #[ ! -d $dir ] && echo $dir
203 |   if [ -d $dir ]; then echo $dir; rm -r $dir; fi
204 | done
205 | 
206 | names='
207 | xvectors_libri_dev_enrolls
208 | xvectors_libri_dev_trials_f
209 | xvectors_libri_dev_trials_m
210 | xvectors_libri_test_enrolls
211 | xvectors_libri_test_trials_f
212 | xvectors_libri_test_trials_m
213 | xvectors_vctk_dev_enrolls
214 | xvectors_vctk_dev_trials_f_all
215 | xvectors_vctk_dev_trials_m_all
216 | xvectors_vctk_test_enrolls
217 | xvectors_vctk_test_trials_f_all
218 | xvectors_vctk_test_trials_m_all'
219 | 
220 | for name in $names; do
221 |   dir=exp/models/2_xvect_extr/exp/xvector_nnet_1a/anon/$name
222 |   #[ ! -d $dir ] && echo $dir
223 |   if [ -d $dir ]; then echo $dir; rm -r $dir; fi
224 | done
225 | 
226 | names='
227 | libri_dev_enrolls
228 | libri_dev_trials_f
229 | libri_dev_trials_m
230 | libri_test_enrolls
231 | libri_test_trials_f
232 | libri_test_trials_m
233 | vctk_dev_enrolls
234 | vctk_dev_trials_f_all
235 | vctk_dev_trials_m_all
236 | vctk_test_enrolls
237 | vctk_test_trials_f_all
238 | vctk_test_trials_m_all'
239 | 
240 | for name in $names; do
241 |   dir=exp/am_nsf_data/$name
242 |   #[ ! -d $dir ] && echo $dir
243 |   if [ -d $dir ]; then echo $dir; rm -r $dir; fi
244 | done
245 | 
246 | dir="exp/tmp"
247 | if [ -d $dir ]; then echo $dir; rm -r $dir; fi
248 | 
249 | echo Done
250 | 


--------------------------------------------------------------------------------