├── .gitignore ├── .rsyncignore ├── Kaldi_Models ├── final.raw ├── mean.vec ├── plda └── transform.mat ├── README.md ├── conf ├── SRE19_mfcc_vad_cfgs │ ├── mfcc.conf │ └── vad.conf ├── Voices_16k_mfcc_vad_cfgs │ ├── mfcc.conf │ └── vad.conf ├── sdsvc_config.cfg ├── sre18_egs_config.cfg ├── sre_config.cfg ├── voices_config.cfg ├── voices_config_dplda.cfg └── voxceleb_xvector_model_configs │ ├── final.config │ ├── network.xconfig │ ├── ref.config │ ├── ref.raw │ ├── vars │ ├── xconfig │ ├── xconfig.expanded.1 │ └── xconfig.expanded.2 ├── dataprep_sdsvc.py ├── dataprep_sre.py ├── dataprep_sre18_egs.py ├── dataprep_voices_challenge.py ├── utils ├── Kaldi2NumpyUtils │ ├── __pycache__ │ │ ├── kaldiPlda2numpydict.cpython-36.pyc │ │ └── kaldiPlda2numpydict.cpython-37.pyc │ ├── kaldiPlda2numpydict.py │ ├── kaldifeats2numpydict.py │ └── kaldivec2numpydict.py ├── NpldaConf.py ├── __pycache__ │ ├── calibration.cpython-36.pyc │ ├── sv_trials_loaders.cpython-36.pyc │ └── sv_trials_loaders.cpython-37.pyc ├── adaptive_score_normalization.py ├── histograms.py ├── models.py ├── plotting.py ├── score_calibration.py ├── scorefile_generator.py └── sv_trials_loaders.py ├── xvector_DPlda_pytorch.py ├── xvector_GaussianBackend_pytorch.py ├── xvector_NeuralPlda_pytorch.py └── xvector_generate_scores.py /.gitignore: -------------------------------------------------------------------------------- 1 | **/logs 2 | **/scores 3 | **/models 4 | **/pickled_files 5 | logs/* 6 | scores/* 7 | models/* 8 | pickled_files/* 9 | utils/__pycache__/* 10 | scores_* 11 | *ark 12 | *txt 13 | valid_* 14 | plots/* 15 | utt2spk* 16 | spk2utt* 17 | Kaldi_Models/*/* 18 | Kaldi_Models/final.pkl 19 | *_spks 20 | tmp_utts 21 | xvector_train.scp 22 | denoise_xvector.py 23 | config.json 24 | kaldi_io/** 25 | trials_and_keys/** 26 | xvectors/** 27 | xvector_GaussianBackend_pytorch_vox.py 28 | utils/tsne_visualize.py 29 | utils/visualizations/** 30 | utils/tdnn_xvector.py 31 | utils/analyze_scores.py 32 | utils/*gen_20_sec_chunks* 33 | utils/qsub* 34 | utils/trials_gen* 35 | utils/models_scorenorm.py 36 | kaldi_io/** 37 | mfcc/** 38 | utils/gen_20* 39 | utils/trials_gen_e2e.py 40 | utils/qsub_gen_20sec_chunks_voxceleb.sh 41 | utils/models_scorenorm.py 42 | **/*.out 43 | **/*.err 44 | -------------------------------------------------------------------------------- /.rsyncignore: -------------------------------------------------------------------------------- 1 | **/logs 2 | **/scores 3 | **/models 4 | **/pickled_files 5 | logs/* 6 | scores/* 7 | models/* 8 | pickled_files/* 9 | utils/__pycache__/* 10 | scores_* 11 | *ark 12 | *txt 13 | valid_* 14 | plots/* 15 | utt2spk* 16 | spk2utt* 17 | Kaldi_Models/*/* 18 | Kaldi_Models/final.pkl 19 | *_spks 20 | tmp_utts 21 | xvector_train.scp 22 | denoise_xvector.py 23 | config.json 24 | kaldi_io/** 25 | trials_and_keys/** 26 | xvectors/** 27 | xvector_GaussianBackend_pytorch_vox.py 28 | utils/tsne_visualize.py 29 | utils/visualizations/** 30 | utils/tdnn_xvector.py 31 | utils/analyze_scores.py 32 | utils/*gen_20_sec_chunks* 33 | utils/qsub* 34 | utils/trials_gen* 35 | kaldi_io/** 36 | **/*.out 37 | **/*.err 38 | mfcc/** 39 | .git* 40 | .git/* 41 | -------------------------------------------------------------------------------- /Kaldi_Models/final.raw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iiscleap/NeuralPlda/ac99bb4f19ca598dedff7a9f01ca3c81665d060d/Kaldi_Models/final.raw -------------------------------------------------------------------------------- /Kaldi_Models/mean.vec: -------------------------------------------------------------------------------- 1 | [ -1.078267 1.270397 -0.7076068 -0.2098637 0.09463782 0.3680134 -0.3859169 -0.9079144 0.6070266 -0.1720753 -0.8016843 1.246332 0.6467567 -0.1072495 -0.03087796 -0.05100157 -0.4935657 2.309668 2.829105 -0.1282838 -1.098014 2.150876 -0.9283825 -1.247043 0.7274057 1.778487 1.96038 1.426191 1.769001 1.078677 1.39229 0.3257743 0.2956752 -0.02260386 -0.03128085 -0.2049067 0.8732985 0.3469385 -0.6974237 -0.2156143 0.5950524 1.076595 0.840586 2.521737 -1.870046 2.025931 0.417563 1.76681 1.454609 0.4021332 0.1536419 0.3750819 0.299158 0.9342029 0.6498342 1.862257 0.6638532 0.1686164 -0.4961072 -0.4895763 0.5921575 1.606296 0.6974308 2.005958 1.212495 -1.319008 0.003372358 2.894888 1.453798 -0.9470718 0.1701881 -0.2272952 -0.5998532 0.6298541 2.919267 -0.8781338 -0.4119065 0.7529454 -0.831157 0.4879438 -0.3415558 -1.019727 0.9924725 2.696247 1.471816 -0.7935514 1.17525 2.242546 0.8328737 2.421517 -0.7617516 0.9464926 -0.2254741 -1.021796 -0.2994864 -1.731138 1.708555 -0.653897 1.391567 -0.3862774 -0.3527439 0.325037 0.3907058 0.2259492 0.4944036 0.3420738 1.434183 0.9571143 -0.32263 -0.0491781 1.593246 0.9722343 2.866502 0.3169456 0.4638853 0.4834644 0.856905 -0.4421222 -0.6894522 0.2594848 1.356911 0.2494047 2.746869 -0.2100332 0.05990769 4.045659 0.7109839 -1.83713 -0.7200889 0.4288694 -0.3026591 2.006073 -0.8320403 0.5912298 -0.813475 3.143559 1.95152 1.0616 -0.5684562 -0.6065781 3.138942 2.532035 -1.078534 1.642468 2.115736 0.8859955 -0.1088584 -0.2285181 0.9685895 0.7763882 0.7042006 0.7367982 -0.4024608 0.3761119 0.0454693 -0.1768445 -0.2720835 1.23124 1.277711 1.021454 0.2052816 0.9982144 1.235251 1.930983 0.5609777 0.2620774 -0.009641117 -0.9688708 -0.1108883 0.7551855 0.2472481 -0.7190395 -0.5178243 -2.245706 0.1184718 -0.6803186 -1.022159 0.5423789 0.1394518 -0.98061 -0.566381 0.1127939 -0.08253952 -0.7229582 0.5253069 1.813598 1.83201 -0.2421755 3.424379 3.148658 1.394755 0.7897432 1.31941 1.286687 -0.38565 1.107873 -0.007100429 0.468621 -0.2318493 1.927849 1.937331 -1.189061 -0.2345906 -0.4070761 0.008467014 -1.16113 3.131583 2.324117 0.2009479 -0.1635814 2.227993 1.50884 0.9637208 0.4314355 -0.5461852 -0.5295896 -2.068636 -1.043591 -0.05273676 1.185378 1.398973 1.003106 0.04942706 -0.9021872 -0.6865521 0.2392083 0.1757577 0.3192558 -0.6645173 -0.8704 0.01734466 -0.3412069 2.168366 -0.2849281 -0.1116432 -1.159344 -0.2387517 1.976386 -0.1694252 1.017498 -0.4030491 0.036569 -0.4600107 -0.4967998 0.5760832 1.600808 0.291688 3.654283 0.6024704 0.7085361 0.09402099 -0.3095708 -0.2379865 -0.4915582 1.515361 -1.825471 -0.4885668 0.5414232 0.7374258 2.879878 -0.2311052 0.465668 0.3618321 2.593897 -0.1637373 -0.6778232 1.854169 -0.06309183 -0.7383304 1.588758 -0.484911 0.6701986 1.644875 0.1861239 0.9826036 1.96781 2.515116 0.4654867 -0.267663 1.719524 -0.5812652 0.2019402 -0.5757724 0.3229897 -0.9764078 -0.1035124 -0.5044929 2.764748 0.9960505 -0.2344112 0.5074826 -0.6720839 0.7536164 0.1669663 0.2788936 -0.4101553 -0.48574 -0.2992488 0.9121327 3.473317 1.228534 -0.8090135 1.895683 1.210576 -0.7278895 3.659199 -0.09125352 1.440799 1.116784 2.856706 1.270075 2.054808 -0.6997992 -0.799051 -0.2797754 1.055156 -0.5915854 -0.1475476 1.601106 -0.4835464 2.102486 -0.7370726 3.992492 2.460993 -0.6807581 0.8554972 -0.746101 -0.1980108 1.533954 2.849741 2.511009 2.880246 -1.468846 2.513953 0.5259104 -0.7461595 -0.4206173 0.4655266 -0.203723 1.508744 0.1475628 1.291648 0.3520635 0.1915301 2.01044 0.2619963 1.17219 -0.274855 2.87393 -0.7475736 -0.5124024 -0.9491215 0.3441683 2.64096 0.9636678 1.398312 -0.2931762 1.040158 -0.00211716 1.352985 -0.8214171 0.1970082 1.012089 1.814837 1.552405 -0.7799022 0.728392 1.058162 0.7100649 1.29967 1.039059 2.156693 0.4537788 0.37625 1.778724 -0.8813882 -0.1149526 -0.5871835 1.920322 -1.17784 -0.5422357 2.499871 -0.3473829 1.068015 1.374549 -0.17558 0.4172747 -1.052777 0.5983068 1.245442 0.8989309 2.76185 2.509869 0.7904766 0.08769375 -0.47114 0.09007813 0.057129 0.2056237 2.745301 -0.4416811 0.9544126 1.243894 0.6577861 0.4831531 0.5641459 0.7019801 0.5920131 -0.3314624 -1.108255 2.196711 -0.288998 -0.8298137 -0.06033496 -0.7807337 1.020684 1.757625 -1.572078 0.1503305 -0.6395035 0.8890952 0.5363092 1.277464 0.77208 0.6067886 0.2273018 -0.5886792 0.09580821 -0.3446812 0.3800524 -1.075305 -0.0001945033 -0.7621963 -0.02490688 1.184362 0.8725733 0.3098737 2.650266 -1.076025 -0.1604337 -0.2511254 -0.5123474 -0.1853942 1.426561 -0.6584423 0.4655151 0.2833495 1.822781 0.03289639 -0.9728919 0.7050171 1.439932 0.06961966 0.2212543 1.844667 1.218873 2.155593 0.4240932 0.6693496 0.8912831 -0.1877562 0.004689313 0.9128061 0.2332849 2.29213 3.411733 1.108323 1.807046 0.1422238 0.03661728 0.2432908 0.5380685 0.9266791 -0.9418516 0.4636756 0.5168624 1.77348 -0.3663233 -1.216864 0.1849886 1.10684 -1.045185 1.223489 -0.06867379 1.255476 0.06595846 -0.2280547 0.05848947 0.4375556 2.89911 -0.1673773 0.5076201 -0.001341609 0.2702836 0.5808836 -0.1883472 -0.1907355 0.3396735 -0.6235273 -0.338237 -0.0848971 -1.242526 -0.1241269 -0.4139891 -0.6312056 0.2680316 1.166387 -0.7577576 3.576955 0.6388649 -3.086694 0.5743925 ] 2 | -------------------------------------------------------------------------------- /Kaldi_Models/plda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iiscleap/NeuralPlda/ac99bb4f19ca598dedff7a9f01ca3c81665d060d/Kaldi_Models/plda -------------------------------------------------------------------------------- /Kaldi_Models/transform.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iiscleap/NeuralPlda/ac99bb4f19ca598dedff7a9f01ca3c81665d060d/Kaldi_Models/transform.mat -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NeuralPlda - v1 2 | 3 | Pytorch Implementation of the Pairwise Discriminative Neural PLDA (NPLDA) for Speaker Verification 4 | 5 | Refs: 6 | [1] Shreyas Ramoji, Prashant Krishnan, Prachi Singh, Sriram Ganapathy: "Pairwise Discriminative Neural PLDA for Speaker Verification" arXiv preprint arXiv:2001.07034 (2020). 7 | 8 | [2] Shreyas Ramoji, Prashant Krishnan, Sriram Ganapathy: "NPLDA: A Deep Neural PLDA Model for Speaker Verification" submitted to Odyssey 2020, The Speaker and Language Recognition Workshop. 9 | 10 | [3] Shreyas Ramoji, Prashant Krishnan, Bhargavram Mysore, Prachi Singh, Sriram Ganapathy: "LEAP System for SRE19 Challenge - Improvements and Error Analysis" arXiv preprint arXiv:2001.07034 (2020). 11 | 12 | ## Requirements: 13 | 14 | 1. Kaldi Toolkit : https://github.com/kaldi-asr/kaldi 15 | 2. Python 3.6 or above with Numpy, Scipy, Pytorch, Matplotlib, Pickle, kaldi_io (https://pypi.org/project/kaldi-io/, https://github.com/vesis84/kaldi-io-for-python) 16 | -------------------------------------------------------------------------------- /conf/SRE19_mfcc_vad_cfgs/mfcc.conf: -------------------------------------------------------------------------------- 1 | --sample-frequency=8000 2 | --frame-length=25 # the default is 25 3 | --low-freq=20 # the default. 4 | --high-freq=3700 # the default is zero meaning use the Nyquist (8k in this case). 5 | --num-mel-bins=23 6 | --num-ceps=23 7 | --snip-edges=false 8 | -------------------------------------------------------------------------------- /conf/SRE19_mfcc_vad_cfgs/vad.conf: -------------------------------------------------------------------------------- 1 | --vad-energy-threshold=5.5 2 | --vad-energy-mean-scale=0.5 3 | --vad-proportion-threshold=0.12 4 | --vad-frames-context=2 5 | -------------------------------------------------------------------------------- /conf/Voices_16k_mfcc_vad_cfgs/mfcc.conf: -------------------------------------------------------------------------------- 1 | --sample-frequency=8000 2 | --frame-length=25 # the default is 25 3 | --low-freq=20 # the default. 4 | --high-freq=3700 # the default is zero meaning use the Nyquist (8k in this case). 5 | --num-mel-bins=23 6 | --num-ceps=23 7 | --snip-edges=false 8 | -------------------------------------------------------------------------------- /conf/Voices_16k_mfcc_vad_cfgs/vad.conf: -------------------------------------------------------------------------------- 1 | --vad-energy-threshold=5.5 2 | --vad-energy-mean-scale=0.5 3 | --vad-proportion-threshold=0.12 4 | --vad-frames-context=2 5 | -------------------------------------------------------------------------------- /conf/sdsvc_config.cfg: -------------------------------------------------------------------------------- 1 | # This is a sample configuration for NPLDA - for VOiCES Challenge 2 | 3 | [Paths] 4 | training_data_trials_list = trials_and_keys/voxceleb_16k_aug_train_trial_keys_1_10.tsv,trials_and_keys/sdsvc_train_trial_keys_100_80.tsv 5 | validation_trials_list = trials_and_keys/voxceleb_16k_aug_validate_trial_keys_1_10.tsv,trials_and_keys/sdsvc_validate_trial_keys_100_80.tsv 6 | test_trials_list = /home/data2/SRE2019/prashantk/voxceleb/v1/data/sdsv_challenge_task2.test/trials 7 | mega_xvector_scp = xvectors/mega_xvector_voices_voxceleb_16k.scp 8 | mega_xvector_pkl = xvectors/mega_xvector_voxceleb_16k_sdsvc.pkl 9 | meanvec = Kaldi_Models/voxceleb/mean.vec 10 | transformmat = Kaldi_Models/voxceleb/transform.mat 11 | kaldiplda = Kaldi_Models/voxceleb/plda 12 | 13 | [NPLDA] 14 | xvector_dim = 512 15 | layer1_LDA_dim = 170 16 | layer2_PLDA_spkfactor_dim = 170 17 | initialization = kaldi 18 | device = cuda:2 19 | seed = 1 20 | alpha = 15 21 | 22 | [Training] 23 | train_subsample_factors = 0.6,1.01 24 | valid_subsample_factors = 0.3,1.01 25 | loss = SoftCdet 26 | cmiss = 10 27 | cfa = 1 28 | target_probs = 0.01 29 | batch_size = 4096 30 | n_epochs = 20 31 | lr = 0.0001 32 | heldout_set_for_th_init = sdsvc_validate_trial_keys_100_80 33 | heldout_set_for_lr_decay = sdsvc_validate_trial_keys_100_80 34 | 35 | [Scoring] 36 | scorefile_format = voices 37 | 38 | [Logging] 39 | log_interval = 500 -------------------------------------------------------------------------------- /conf/sre18_egs_config.cfg: -------------------------------------------------------------------------------- 1 | # This is a sample configuration for NPLDA 2 | 3 | [Paths] 4 | training_data_trials_list = trials_and_keys/sre18_egs_train_trial_keys.tsv 5 | validation_trials_list = trials_and_keys/sre18_egs_validate_trial_keys.tsv,trials_and_keys/sre18_dev_keys.tsv 6 | test_trials_list = /home/data/SRE2019/LDC2019E58/docs/sre19_cts_challenge_trials.tsv 7 | mega_xvector_scp = xvectors/mega_xvector_voxceleb_8k_plus_sre18_egs.scp 8 | mega_xvector_pkl = xvectors/mega_xvector_voxceleb_8k_plus_sre18_egs.pkl 9 | meanvec = Kaldi_Models/Train/mean.vec 10 | transformmat = Kaldi_Models/Train/transform.mat 11 | kaldiplda = Kaldi_Models/Train/plda 12 | 13 | 14 | [NPLDA] 15 | xvector_dim = 512 16 | layer1_LDA_dim = 170 17 | layer2_PLDA_spkfactor_dim = 170 18 | initialization = kaldi 19 | device = cuda 20 | seed = 1 21 | alpha = 30 22 | 23 | [Training] 24 | loss = SoftCdet 25 | target_probs = 0.01,0.005 26 | batch_size = 2048 27 | n_epochs = 20 28 | lr = 0.0001 29 | heldout_set_for_th_init = sre18_egs_validate_trial_keys 30 | heldout_set_for_lr_decay = sre18_egs_validate_trial_keys 31 | 32 | [Scoring] 33 | scorefile_format = sre 34 | 35 | [Logging] 36 | log_interval = 100 -------------------------------------------------------------------------------- /conf/sre_config.cfg: -------------------------------------------------------------------------------- 1 | # This is a sample configuration for NPLDA 2 | 3 | [Paths] 4 | training_data_trials_list = trials_and_keys/swbd_sre04to10_mx6_train_trial_keys.tsv,trials_and_keys/sre18_eval_keys.tsv 5 | validation_trials_list = trials_and_keys/swbd_sre04to10_mx6_validate_trial_keys.tsv,trials_and_keys/sre18_dev_keys.tsv 6 | test_trials_list = /home/data/SRE2019/LDC2019E58/docs/sre19_cts_challenge_trials.tsv 7 | mega_xvector_scp = xvectors/mega_xvector_voxceleb_8k.scp 8 | mega_xvector_pkl = xvectors/mega_xvector_voxceleb_8k.pkl 9 | meanvec = Kaldi_Models/Train/mean.vec 10 | transformmat = Kaldi_Models/Train/transform.mat 11 | kaldiplda = Kaldi_Models/Train/plda 12 | 13 | 14 | [NPLDA] 15 | xvector_dim = 512 16 | layer1_LDA_dim = 170 17 | layer2_PLDA_spkfactor_dim = 170 18 | initialization = kaldi 19 | device = cuda 20 | seed = 1 21 | alpha = 15 22 | 23 | [Training] 24 | train_subsample_factors = None 25 | valid_subsample_factors = None 26 | loss = crossentropy 27 | cmiss = 1 28 | cfa = 1 29 | target_probs = 0.01,0.005 30 | batch_size = 128 31 | n_epochs = 20 32 | lr = 0.0001 33 | heldout_set_for_th_init = sre18_dev_keys 34 | heldout_set_for_lr_decay = swbd_sre04to10_mx6_validate_trial_keys 35 | 36 | [Scoring] 37 | scorefile_format = sre 38 | 39 | [Logging] 40 | log_interval = 1000 -------------------------------------------------------------------------------- /conf/voices_config.cfg: -------------------------------------------------------------------------------- 1 | # This is a sample configuration for NPLDA - for VOiCES Challenge 2 | 3 | [Paths] 4 | training_data_trials_list = trials_and_keys/voxceleb_16k_aug_train_trial_keys_1_10.tsv 5 | validation_trials_list = trials_and_keys/voxceleb_16k_aug_validate_trial_keys_1_10.tsv,trials_and_keys/voices_dev_keys.tsv 6 | test_trials_list = /home/data/VOICES/interspeech2019Challenge/VOiCES_challenge_2019_post-eval-release/VOiCES_challenge_2019_eval.SID.trial-keys.lst,/home/data/VOICES/interspeech2019Challenge/Development_Data/Speaker_Recognition/sid_dev_lists_and_keys/dev-trial.lst,/home/data2/SRE2019/prashantk/voxceleb/v1/data/sitw_eval_test/trials/core-core_trials 7 | mega_xvector_scp = xvectors/mega_xvector_voices_voxceleb_16k.scp 8 | mega_xvector_pkl = xvectors/mega_xvector_voices_voxceleb_16k.pkl 9 | meanvec = Kaldi_Models/voxceleb/mean.vec 10 | transformmat = Kaldi_Models/voxceleb/transform.mat 11 | kaldiplda = Kaldi_Models/voxceleb/plda 12 | 13 | [NPLDA] 14 | xvector_dim = 512 15 | layer1_LDA_dim = 170 16 | layer2_PLDA_spkfactor_dim = 170 17 | initialization = kaldi 18 | device = cuda 19 | seed = 1 20 | alpha = 15 21 | 22 | [Training] 23 | train_subsample_factors=1.01 24 | valid_subsample_factors=0.2,1.01 25 | loss = softCdet 26 | cmiss = 1 27 | cfa = 1 28 | target_probs = 0.01 29 | batch_size = 2048 30 | n_epochs = 20 31 | lr = 0.0001 32 | heldout_set_for_th_init = voices_dev_keys 33 | heldout_set_for_lr_decay = voxceleb_16k_aug_validate_trial_keys 34 | 35 | [Scoring] 36 | scorefile_format = voices 37 | 38 | [Logging] 39 | log_interval = 1000 40 | -------------------------------------------------------------------------------- /conf/voices_config_dplda.cfg: -------------------------------------------------------------------------------- 1 | # This is a sample configuration for NPLDA - for VOiCES Challenge 2 | 3 | [Paths] 4 | training_data_trials_list = trials_and_keys/voxceleb_16k_aug_train_trial_keys_1_10.tsv 5 | validation_trials_list = trials_and_keys/voxceleb_16k_aug_validate_trial_keys_1_10.tsv,trials_and_keys/voices_dev_keys.tsv 6 | test_trials_list = /home/data/VOICES/interspeech2019Challenge/VOiCES_challenge_2019_post-eval-release/VOiCES_challenge_2019_eval.SID.trial-keys.lst,/home/data2/SRE2019/prashantk/voxceleb/v1/data/sitw_eval_test/trials/core-core_trials 7 | mega_xvector_scp = xvectors/mega_xvector_voices_voxceleb_16k.scp 8 | mega_xvector_pkl = xvectors/mega_xvector_voices_voxceleb_16k.pkl 9 | meanvec = Kaldi_Models/voxceleb/mean.vec 10 | transformmat = Kaldi_Models/voxceleb/transform.mat 11 | kaldiplda = Kaldi_Models/voxceleb/plda 12 | 13 | [NPLDA] 14 | xvector_dim = 512 15 | layer1_LDA_dim = 170 16 | layer2_PLDA_spkfactor_dim = 170 17 | initialization = kaldi 18 | device = cuda 19 | seed = 1 20 | alpha = 15 21 | 22 | [Training] 23 | train_subsample_factors=1.01 24 | valid_subsample_factors=0.2,1.01 25 | loss = crossentropy 26 | cmiss = 1 27 | cfa = 1 28 | target_probs = 0.01 29 | batch_size = 256 30 | n_epochs = 20 31 | lr = 0.0001 32 | heldout_set_for_th_init = voices_dev_keys 33 | heldout_set_for_lr_decay = voxceleb_16k_aug_validate_trial_keys 34 | 35 | [Scoring] 36 | scorefile_format = voices 37 | 38 | [Logging] 39 | log_interval = 10000 -------------------------------------------------------------------------------- /conf/voxceleb_xvector_model_configs/final.config: -------------------------------------------------------------------------------- 1 | # This file was created by the command: 2 | # steps/nnet3/xconfig_to_configs.py --xconfig-file exp/xvector_nnet_1a/configs/network.xconfig --config-dir exp/xvector_nnet_1a/configs/ 3 | # It contains the entire neural network. 4 | 5 | input-node name=input dim=23 6 | component name=tdnn1.affine type=NaturalGradientAffineComponent input-dim=115 output-dim=512 max-change=0.75 7 | component-node name=tdnn1.affine component=tdnn1.affine input=Append(Offset(input, -2), Offset(input, -1), input, Offset(input, 1), Offset(input, 2)) 8 | component name=tdnn1.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 9 | component-node name=tdnn1.relu component=tdnn1.relu input=tdnn1.affine 10 | component name=tdnn1.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 11 | component-node name=tdnn1.batchnorm component=tdnn1.batchnorm input=tdnn1.relu 12 | component name=tdnn2.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=512 max-change=0.75 13 | component-node name=tdnn2.affine component=tdnn2.affine input=tdnn1.batchnorm 14 | component name=tdnn2.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 15 | component-node name=tdnn2.relu component=tdnn2.relu input=tdnn2.affine 16 | component name=tdnn2.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 17 | component-node name=tdnn2.batchnorm component=tdnn2.batchnorm input=tdnn2.relu 18 | component name=tdnn3.affine type=NaturalGradientAffineComponent input-dim=1536 output-dim=512 max-change=0.75 19 | component-node name=tdnn3.affine component=tdnn3.affine input=Append(Offset(tdnn2.batchnorm, -2), tdnn2.batchnorm, Offset(tdnn2.batchnorm, 2)) 20 | component name=tdnn3.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 21 | component-node name=tdnn3.relu component=tdnn3.relu input=tdnn3.affine 22 | component name=tdnn3.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 23 | component-node name=tdnn3.batchnorm component=tdnn3.batchnorm input=tdnn3.relu 24 | component name=tdnn4.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=512 max-change=0.75 25 | component-node name=tdnn4.affine component=tdnn4.affine input=tdnn3.batchnorm 26 | component name=tdnn4.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 27 | component-node name=tdnn4.relu component=tdnn4.relu input=tdnn4.affine 28 | component name=tdnn4.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 29 | component-node name=tdnn4.batchnorm component=tdnn4.batchnorm input=tdnn4.relu 30 | component name=tdnn5.affine type=NaturalGradientAffineComponent input-dim=1536 output-dim=512 max-change=0.75 31 | component-node name=tdnn5.affine component=tdnn5.affine input=Append(Offset(tdnn4.batchnorm, -3), tdnn4.batchnorm, Offset(tdnn4.batchnorm, 3)) 32 | component name=tdnn5.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 33 | component-node name=tdnn5.relu component=tdnn5.relu input=tdnn5.affine 34 | component name=tdnn5.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 35 | component-node name=tdnn5.batchnorm component=tdnn5.batchnorm input=tdnn5.relu 36 | component name=tdnn6.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=512 max-change=0.75 37 | component-node name=tdnn6.affine component=tdnn6.affine input=tdnn5.batchnorm 38 | component name=tdnn6.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 39 | component-node name=tdnn6.relu component=tdnn6.relu input=tdnn6.affine 40 | component name=tdnn6.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 41 | component-node name=tdnn6.batchnorm component=tdnn6.batchnorm input=tdnn6.relu 42 | component name=tdnn7.affine type=NaturalGradientAffineComponent input-dim=1536 output-dim=512 max-change=0.75 43 | component-node name=tdnn7.affine component=tdnn7.affine input=Append(Offset(tdnn6.batchnorm, -4), tdnn6.batchnorm, Offset(tdnn6.batchnorm, 4)) 44 | component name=tdnn7.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 45 | component-node name=tdnn7.relu component=tdnn7.relu input=tdnn7.affine 46 | component name=tdnn7.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 47 | component-node name=tdnn7.batchnorm component=tdnn7.batchnorm input=tdnn7.relu 48 | component name=tdnn8.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=512 max-change=0.75 49 | component-node name=tdnn8.affine component=tdnn8.affine input=tdnn7.batchnorm 50 | component name=tdnn8.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 51 | component-node name=tdnn8.relu component=tdnn8.relu input=tdnn8.affine 52 | component name=tdnn8.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 53 | component-node name=tdnn8.batchnorm component=tdnn8.batchnorm input=tdnn8.relu 54 | component name=tdnn9.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=512 max-change=0.75 55 | component-node name=tdnn9.affine component=tdnn9.affine input=tdnn8.batchnorm 56 | component name=tdnn9.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 57 | component-node name=tdnn9.relu component=tdnn9.relu input=tdnn9.affine 58 | component name=tdnn9.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 59 | component-node name=tdnn9.batchnorm component=tdnn9.batchnorm input=tdnn9.relu 60 | component name=tdnn10.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=1500 max-change=0.75 61 | component-node name=tdnn10.affine component=tdnn10.affine input=tdnn9.batchnorm 62 | component name=tdnn10.relu type=RectifiedLinearComponent dim=1500 self-repair-scale=1e-05 63 | component-node name=tdnn10.relu component=tdnn10.relu input=tdnn10.affine 64 | component name=tdnn10.batchnorm type=BatchNormComponent dim=1500 target-rms=1.0 65 | component-node name=tdnn10.batchnorm component=tdnn10.batchnorm input=tdnn10.relu 66 | component name=stats-extraction-0-10000 type=StatisticsExtractionComponent input-dim=1500 input-period=1 output-period=1 include-variance=true 67 | component-node name=stats-extraction-0-10000 component=stats-extraction-0-10000 input=tdnn10.batchnorm 68 | component name=stats-pooling-0-10000 type=StatisticsPoolingComponent input-dim=3001 input-period=1 left-context=0 right-context=10000 num-log-count-features=0 output-stddevs=true 69 | component-node name=stats-pooling-0-10000 component=stats-pooling-0-10000 input=stats-extraction-0-10000 70 | component name=tdnn11.affine type=NaturalGradientAffineComponent input-dim=3000 output-dim=512 max-change=0.75 71 | component-node name=tdnn11.affine component=tdnn11.affine input=Round(stats-pooling-0-10000, 1) 72 | component name=tdnn11.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 73 | component-node name=tdnn11.relu component=tdnn11.relu input=tdnn11.affine 74 | component name=tdnn11.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 75 | component-node name=tdnn11.batchnorm component=tdnn11.batchnorm input=tdnn11.relu 76 | component name=tdnn12.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=512 max-change=0.75 77 | component-node name=tdnn12.affine component=tdnn12.affine input=tdnn11.batchnorm 78 | component name=tdnn12.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 79 | component-node name=tdnn12.relu component=tdnn12.relu input=tdnn12.affine 80 | component name=tdnn12.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 81 | component-node name=tdnn12.batchnorm component=tdnn12.batchnorm input=tdnn12.relu 82 | component name=output.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=7323 max-change=1.5 param-stddev=0.0 bias-stddev=0.0 83 | component-node name=output.affine component=output.affine input=tdnn12.batchnorm 84 | component name=output.log-softmax type=LogSoftmaxComponent dim=7323 85 | component-node name=output.log-softmax component=output.log-softmax input=output.affine 86 | output-node name=output input=output.log-softmax objective=linear 87 | -------------------------------------------------------------------------------- /conf/voxceleb_xvector_model_configs/network.xconfig: -------------------------------------------------------------------------------- 1 | # please note that it is important to have input layer with the name=input 2 | 3 | # The frame-level layers 4 | input dim=23 name=input 5 | relu-batchnorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=512 6 | relu-batchnorm-layer name=tdnn2 dim=512 7 | relu-batchnorm-layer name=tdnn3 input=Append(-2,0,2) dim=512 8 | relu-batchnorm-layer name=tdnn4 dim=512 9 | relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=512 10 | relu-batchnorm-layer name=tdnn6 dim=512 11 | relu-batchnorm-layer name=tdnn7 input=Append(-4,0,4) dim=512 12 | relu-batchnorm-layer name=tdnn8 dim=512 13 | relu-batchnorm-layer name=tdnn9 dim=512 14 | relu-batchnorm-layer name=tdnn10 dim=1500 15 | 16 | # The stats pooling layer. Layers after this are segment-level. 17 | # In the config below, the first and last argument (0, and 10000) 18 | # means that we pool over an input segment starting at frame 0 19 | # and ending at frame 10000 or earlier. The other arguments (1:1) 20 | # mean that no subsampling is performed. 21 | stats-layer name=stats config=mean+stddev(0:1:1:10000) 22 | 23 | # This is where we usually extract the embedding (aka xvector) from. 24 | relu-batchnorm-layer name=tdnn11 dim=512 input=stats 25 | 26 | # This is where another layer the embedding could be extracted 27 | # from, but usually the previous one works better. 28 | relu-batchnorm-layer name=tdnn12 dim=512 29 | output-layer name=output include-log-softmax=true dim=7323 30 | -------------------------------------------------------------------------------- /conf/voxceleb_xvector_model_configs/ref.config: -------------------------------------------------------------------------------- 1 | # This file was created by the command: 2 | # steps/nnet3/xconfig_to_configs.py --xconfig-file exp/xvector_nnet_1a/configs/network.xconfig --config-dir exp/xvector_nnet_1a/configs/ 3 | # It contains the entire neural network, but with those 4 | # components that would normally require fixed vectors/matrices 5 | # read from disk, replaced with random initialization 6 | # (this applies to the LDA-like transform and the 7 | # presoftmax-prior-scale, if applicable). This file 8 | # is used only to work out the left-context and right-context 9 | # of the network. 10 | 11 | input-node name=input dim=23 12 | component name=tdnn1.affine type=NaturalGradientAffineComponent input-dim=115 output-dim=512 max-change=0.75 13 | component-node name=tdnn1.affine component=tdnn1.affine input=Append(Offset(input, -2), Offset(input, -1), input, Offset(input, 1), Offset(input, 2)) 14 | component name=tdnn1.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 15 | component-node name=tdnn1.relu component=tdnn1.relu input=tdnn1.affine 16 | component name=tdnn1.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 17 | component-node name=tdnn1.batchnorm component=tdnn1.batchnorm input=tdnn1.relu 18 | component name=tdnn2.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=512 max-change=0.75 19 | component-node name=tdnn2.affine component=tdnn2.affine input=tdnn1.batchnorm 20 | component name=tdnn2.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 21 | component-node name=tdnn2.relu component=tdnn2.relu input=tdnn2.affine 22 | component name=tdnn2.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 23 | component-node name=tdnn2.batchnorm component=tdnn2.batchnorm input=tdnn2.relu 24 | component name=tdnn3.affine type=NaturalGradientAffineComponent input-dim=1536 output-dim=512 max-change=0.75 25 | component-node name=tdnn3.affine component=tdnn3.affine input=Append(Offset(tdnn2.batchnorm, -2), tdnn2.batchnorm, Offset(tdnn2.batchnorm, 2)) 26 | component name=tdnn3.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 27 | component-node name=tdnn3.relu component=tdnn3.relu input=tdnn3.affine 28 | component name=tdnn3.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 29 | component-node name=tdnn3.batchnorm component=tdnn3.batchnorm input=tdnn3.relu 30 | component name=tdnn4.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=512 max-change=0.75 31 | component-node name=tdnn4.affine component=tdnn4.affine input=tdnn3.batchnorm 32 | component name=tdnn4.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 33 | component-node name=tdnn4.relu component=tdnn4.relu input=tdnn4.affine 34 | component name=tdnn4.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 35 | component-node name=tdnn4.batchnorm component=tdnn4.batchnorm input=tdnn4.relu 36 | component name=tdnn5.affine type=NaturalGradientAffineComponent input-dim=1536 output-dim=512 max-change=0.75 37 | component-node name=tdnn5.affine component=tdnn5.affine input=Append(Offset(tdnn4.batchnorm, -3), tdnn4.batchnorm, Offset(tdnn4.batchnorm, 3)) 38 | component name=tdnn5.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 39 | component-node name=tdnn5.relu component=tdnn5.relu input=tdnn5.affine 40 | component name=tdnn5.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 41 | component-node name=tdnn5.batchnorm component=tdnn5.batchnorm input=tdnn5.relu 42 | component name=tdnn6.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=512 max-change=0.75 43 | component-node name=tdnn6.affine component=tdnn6.affine input=tdnn5.batchnorm 44 | component name=tdnn6.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 45 | component-node name=tdnn6.relu component=tdnn6.relu input=tdnn6.affine 46 | component name=tdnn6.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 47 | component-node name=tdnn6.batchnorm component=tdnn6.batchnorm input=tdnn6.relu 48 | component name=tdnn7.affine type=NaturalGradientAffineComponent input-dim=1536 output-dim=512 max-change=0.75 49 | component-node name=tdnn7.affine component=tdnn7.affine input=Append(Offset(tdnn6.batchnorm, -4), tdnn6.batchnorm, Offset(tdnn6.batchnorm, 4)) 50 | component name=tdnn7.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 51 | component-node name=tdnn7.relu component=tdnn7.relu input=tdnn7.affine 52 | component name=tdnn7.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 53 | component-node name=tdnn7.batchnorm component=tdnn7.batchnorm input=tdnn7.relu 54 | component name=tdnn8.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=512 max-change=0.75 55 | component-node name=tdnn8.affine component=tdnn8.affine input=tdnn7.batchnorm 56 | component name=tdnn8.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 57 | component-node name=tdnn8.relu component=tdnn8.relu input=tdnn8.affine 58 | component name=tdnn8.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 59 | component-node name=tdnn8.batchnorm component=tdnn8.batchnorm input=tdnn8.relu 60 | component name=tdnn9.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=512 max-change=0.75 61 | component-node name=tdnn9.affine component=tdnn9.affine input=tdnn8.batchnorm 62 | component name=tdnn9.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 63 | component-node name=tdnn9.relu component=tdnn9.relu input=tdnn9.affine 64 | component name=tdnn9.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 65 | component-node name=tdnn9.batchnorm component=tdnn9.batchnorm input=tdnn9.relu 66 | component name=tdnn10.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=1500 max-change=0.75 67 | component-node name=tdnn10.affine component=tdnn10.affine input=tdnn9.batchnorm 68 | component name=tdnn10.relu type=RectifiedLinearComponent dim=1500 self-repair-scale=1e-05 69 | component-node name=tdnn10.relu component=tdnn10.relu input=tdnn10.affine 70 | component name=tdnn10.batchnorm type=BatchNormComponent dim=1500 target-rms=1.0 71 | component-node name=tdnn10.batchnorm component=tdnn10.batchnorm input=tdnn10.relu 72 | component name=stats-extraction-0-10000 type=StatisticsExtractionComponent input-dim=1500 input-period=1 output-period=1 include-variance=true 73 | component-node name=stats-extraction-0-10000 component=stats-extraction-0-10000 input=tdnn10.batchnorm 74 | component name=stats-pooling-0-10000 type=StatisticsPoolingComponent input-dim=3001 input-period=1 left-context=0 right-context=10000 num-log-count-features=0 output-stddevs=true 75 | component-node name=stats-pooling-0-10000 component=stats-pooling-0-10000 input=stats-extraction-0-10000 76 | component name=tdnn11.affine type=NaturalGradientAffineComponent input-dim=3000 output-dim=512 max-change=0.75 77 | component-node name=tdnn11.affine component=tdnn11.affine input=Round(stats-pooling-0-10000, 1) 78 | component name=tdnn11.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 79 | component-node name=tdnn11.relu component=tdnn11.relu input=tdnn11.affine 80 | component name=tdnn11.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 81 | component-node name=tdnn11.batchnorm component=tdnn11.batchnorm input=tdnn11.relu 82 | component name=tdnn12.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=512 max-change=0.75 83 | component-node name=tdnn12.affine component=tdnn12.affine input=tdnn11.batchnorm 84 | component name=tdnn12.relu type=RectifiedLinearComponent dim=512 self-repair-scale=1e-05 85 | component-node name=tdnn12.relu component=tdnn12.relu input=tdnn12.affine 86 | component name=tdnn12.batchnorm type=BatchNormComponent dim=512 target-rms=1.0 87 | component-node name=tdnn12.batchnorm component=tdnn12.batchnorm input=tdnn12.relu 88 | component name=output.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=7323 max-change=1.5 param-stddev=0.0 bias-stddev=0.0 89 | component-node name=output.affine component=output.affine input=tdnn12.batchnorm 90 | component name=output.log-softmax type=LogSoftmaxComponent dim=7323 91 | component-node name=output.log-softmax component=output.log-softmax input=output.affine 92 | output-node name=output input=output.log-softmax objective=linear 93 | -------------------------------------------------------------------------------- /conf/voxceleb_xvector_model_configs/ref.raw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iiscleap/NeuralPlda/ac99bb4f19ca598dedff7a9f01ca3c81665d060d/conf/voxceleb_xvector_model_configs/ref.raw -------------------------------------------------------------------------------- /conf/voxceleb_xvector_model_configs/vars: -------------------------------------------------------------------------------- 1 | model_left_context=0 2 | model_right_context=11 3 | -------------------------------------------------------------------------------- /conf/voxceleb_xvector_model_configs/xconfig: -------------------------------------------------------------------------------- 1 | # This file was created by the command: 2 | # steps/nnet3/xconfig_to_configs.py --xconfig-file exp/xvector_nnet_1a/configs/network.xconfig --config-dir exp/xvector_nnet_1a/configs/ 3 | # It is a copy of the source from which the config files in # this directory were generated. 4 | 5 | # please note that it is important to have input layer with the name=input 6 | 7 | # The frame-level layers 8 | input dim=23 name=input 9 | relu-batchnorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=512 10 | relu-batchnorm-layer name=tdnn2 dim=512 11 | relu-batchnorm-layer name=tdnn3 input=Append(-2,0,2) dim=512 12 | relu-batchnorm-layer name=tdnn4 dim=512 13 | relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=512 14 | relu-batchnorm-layer name=tdnn6 dim=512 15 | relu-batchnorm-layer name=tdnn7 input=Append(-4,0,4) dim=512 16 | relu-batchnorm-layer name=tdnn8 dim=512 17 | relu-batchnorm-layer name=tdnn9 dim=512 18 | relu-batchnorm-layer name=tdnn10 dim=1500 19 | 20 | # The stats pooling layer. Layers after this are segment-level. 21 | # In the config below, the first and last argument (0, and 10000) 22 | # means that we pool over an input segment starting at frame 0 23 | # and ending at frame 10000 or earlier. The other arguments (1:1) 24 | # mean that no subsampling is performed. 25 | stats-layer name=stats config=mean+stddev(0:1:1:10000) 26 | 27 | # This is where we usually extract the embedding (aka xvector) from. 28 | relu-batchnorm-layer name=tdnn11 dim=512 input=stats 29 | 30 | # This is where another layer the embedding could be extracted 31 | # from, but usually the previous one works better. 32 | relu-batchnorm-layer name=tdnn12 dim=512 33 | output-layer name=output include-log-softmax=true dim=7323 34 | -------------------------------------------------------------------------------- /conf/voxceleb_xvector_model_configs/xconfig.expanded.1: -------------------------------------------------------------------------------- 1 | # This file was created by the command: 2 | # steps/nnet3/xconfig_to_configs.py --xconfig-file exp/xvector_nnet_1a/configs/network.xconfig --config-dir exp/xvector_nnet_1a/configs/ 3 | #It contains the same content as ./xconfig but it was parsed and 4 | #default config values were set. 5 | # See also ./xconfig.expanded.2 6 | 7 | input name=input dim=23 8 | relu-batchnorm-layer name=tdnn1 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=Append(-2,-1,0,1,2) l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 9 | relu-batchnorm-layer name=tdnn2 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=[-1] l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 10 | relu-batchnorm-layer name=tdnn3 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=Append(-2,0,2) l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 11 | relu-batchnorm-layer name=tdnn4 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=[-1] l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 12 | relu-batchnorm-layer name=tdnn5 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=Append(-3,0,3) l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 13 | relu-batchnorm-layer name=tdnn6 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=[-1] l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 14 | relu-batchnorm-layer name=tdnn7 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=Append(-4,0,4) l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 15 | relu-batchnorm-layer name=tdnn8 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=[-1] l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 16 | relu-batchnorm-layer name=tdnn9 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=[-1] l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 17 | relu-batchnorm-layer name=tdnn10 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=1500 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=[-1] l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 18 | stats-layer name=stats config=mean+stddev(0:1:1:10000) dim=3000 input=[-1] 19 | relu-batchnorm-layer name=tdnn11 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=stats l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 20 | relu-batchnorm-layer name=tdnn12 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=[-1] l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 21 | output-layer name=output bias-stddev=0.0 bottleneck-dim=-1 dim=7323 include-log-softmax=True input=[-1] l2-regularize= learning-rate-factor= max-change=1.5 ng-affine-options= ng-linear-options= objective-type=linear orthonormal-constraint=1.0 output-delay=0 param-stddev=0.0 22 | -------------------------------------------------------------------------------- /conf/voxceleb_xvector_model_configs/xconfig.expanded.2: -------------------------------------------------------------------------------- 1 | # This file was created by the command: 2 | # steps/nnet3/xconfig_to_configs.py --xconfig-file exp/xvector_nnet_1a/configs/network.xconfig --config-dir exp/xvector_nnet_1a/configs/ 3 | # It contains the same content as ./xconfig but it was parsed, 4 | # default config values were set, 5 | # and Descriptors (input=xxx) were normalized. 6 | # See also ./xconfig.expanded.1 7 | 8 | input name=input dim=23 9 | relu-batchnorm-layer name=tdnn1 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=Append(Offset(input, -2), Offset(input, -1), input, Offset(input, 1), Offset(input, 2)) l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 10 | relu-batchnorm-layer name=tdnn2 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=tdnn1 l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 11 | relu-batchnorm-layer name=tdnn3 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=Append(Offset(tdnn2, -2), tdnn2, Offset(tdnn2, 2)) l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 12 | relu-batchnorm-layer name=tdnn4 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=tdnn3 l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 13 | relu-batchnorm-layer name=tdnn5 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=Append(Offset(tdnn4, -3), tdnn4, Offset(tdnn4, 3)) l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 14 | relu-batchnorm-layer name=tdnn6 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=tdnn5 l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 15 | relu-batchnorm-layer name=tdnn7 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=Append(Offset(tdnn6, -4), tdnn6, Offset(tdnn6, 4)) l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 16 | relu-batchnorm-layer name=tdnn8 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=tdnn7 l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 17 | relu-batchnorm-layer name=tdnn9 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=tdnn8 l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 18 | relu-batchnorm-layer name=tdnn10 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=1500 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=tdnn9 l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 19 | stats-layer name=stats config=mean+stddev(0:1:1:10000) dim=3000 input=tdnn10 20 | relu-batchnorm-layer name=tdnn11 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=stats l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 21 | relu-batchnorm-layer name=tdnn12 add-log-stddev=False bias-stddev= bottleneck-dim=-1 dim=512 dropout-per-dim=False dropout-per-dim-continuous=False dropout-proportion=0.5 input=tdnn11 l2-regularize= learning-rate-factor= max-change=0.75 ng-affine-options= ng-linear-options= self-repair-scale=1e-05 target-rms=1.0 22 | output-layer name=output bias-stddev=0.0 bottleneck-dim=-1 dim=7323 include-log-softmax=True input=tdnn12 l2-regularize= learning-rate-factor= max-change=1.5 ng-affine-options= ng-linear-options= objective-type=linear orthonormal-constraint=1.0 output-delay=0 param-stddev=0.0 23 | -------------------------------------------------------------------------------- /dataprep_sdsvc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Feb 25 18:51:09 2020 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | import numpy as np 10 | import random 11 | import pickle 12 | import subprocess 13 | import re 14 | import os 15 | import kaldi_io 16 | 17 | from pdb import set_trace as bp 18 | 19 | from utils.sv_trials_loaders import generate_train_trial_keys, save_unique_train_valid_xvector_scps 20 | 21 | 22 | 23 | if __name__=='__main__': 24 | 25 | base_path = '/home/data2/SRE2019/prashantk/voxceleb/v1' 26 | xvectors_base_path = os.path.join(base_path,'exp/xvector_nnet_1a') 27 | 28 | stage = 1 29 | 30 | # %% Generate and save training trial keys using SRE SWBD and MX6 datasets 31 | if stage <= 1: 32 | data_spk2utt_list = np.asarray([['{}/data/sdsv_challenge_task2_train/male/spk2utt'.format(base_path), '100'], 33 | ['{}/data/sdsv_challenge_task2_train/female/spk2utt'.format(base_path), '80']]) 34 | 35 | xvector_scp_list = xvector_scp_list = np.asarray( 36 | ['{}/xvectors_sdsv_challenge_task2_train/xvector.scp'.format(xvectors_base_path)]) 37 | 38 | 39 | train_trial_keys, val_trial_keys = generate_train_trial_keys(data_spk2utt_list, xvector_scp_list, train_and_valid=True, train_ratio=0.95) 40 | 41 | # Save the training and validation trials and keys for training NPLDA and other discriminative models 42 | np.savetxt('trials_and_keys/sdsvc_train_trial_keys_100_80.tsv', train_trial_keys, fmt='%s', delimiter='\t', comments='none') 43 | np.savetxt('trials_and_keys/sdsvc_validate_trial_keys_100_80.tsv', val_trial_keys, fmt='%s', delimiter='\t', comments='none') 44 | 45 | # Save the train and validation xvectors for training a Kaldi PLDA if required 46 | train_scp_path = '{}/xvectors_sdsvc/train_split/xvector.scp'.format(xvectors_base_path) 47 | valid_scp_path = '{}/xvectors_sdsvc_aug/valid_split/xvector.scp'.format(xvectors_base_path) 48 | subprocess.call(['mkdir', '-p', os.path.dirname(train_scp_path)]) 49 | subprocess.call(['mkdir', '-p', os.path.dirname(valid_scp_path)]) 50 | save_unique_train_valid_xvector_scps(data_spk2utt_list, xvector_scp_list, train_scp_path, valid_scp_path, train_ratio=0.9) 51 | 52 | exit() 53 | 54 | # %% Make the mega xvector scp with all the xvectors, averaged enrollment xvectors, etc. 55 | 56 | if stage <= 2: 57 | mega_xvec_dict = pickle.load(open('xvectors/mega_xvector_voices_voxceleb_16k.pkl', 'rb')) 58 | xvector_scp_list = xvector_scp_list = np.asarray( 59 | ['{}/xvectors_sdsv_challenge_task2_train/xvector.scp'.format(xvectors_base_path), 60 | '{}/xvectors_sdsv_challenge_task2.enroll/xvector.scp'.format(xvectors_base_path), 61 | '{}/xvectors_sdsv_challenge_task2.enroll/spk_xvector.scp'.format(xvectors_base_path), 62 | '{}/xvectors_sdsv_challenge_task2.test/xvector.scp'.format(xvectors_base_path), 63 | '{}/xvectors_sdsv_challenge_task2.test/spk_xvector.scp'.format(xvectors_base_path)]) 64 | 65 | for fx in xvector_scp_list: 66 | subprocess.call(['sed','-i', 's| exp/xvector_nnet_1a| {}|g'.format(xvectors_base_path), fx]) 67 | with open(fx) as f: 68 | scp_list = f.readlines() 69 | xvec_dict = {x.split(' ', 1)[0]: kaldi_io.read_vec_flt(x.rstrip('\n').split(' ', 1)[1]) for x in scp_list} 70 | mega_xvec_dict.update(xvec_dict) 71 | 72 | pickle.dump(mega_xvec_dict, open('xvectors/mega_xvector_voxceleb_16k_sdsvc.pkl', 'wb')) 73 | -------------------------------------------------------------------------------- /dataprep_sre.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Feb 14 14:32:19 2020 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | import numpy as np 10 | import random 11 | import pickle 12 | import subprocess 13 | import re 14 | import os 15 | import sys 16 | import kaldi_io 17 | 18 | from pdb import set_trace as bp 19 | 20 | from utils.sv_trials_loaders import generate_train_trial_keys, save_unique_train_valid_xvector_scps 21 | 22 | 23 | 24 | if __name__=='__main__': 25 | 26 | base_path = '/home/data2/SRE2019/prashantk/voxceleb/v2' 27 | xvectors_base_path = os.path.join(base_path,'exp/xvector_nnet_1a') 28 | 29 | stage = 6 30 | 31 | # %% Generate and save training trial keys using SRE SWBD and MX6 datasets 32 | if stage <= 1: 33 | data_spk2utt_list = np.asarray([['{}/data/sre2004/male/spk2utt'.format(base_path), '25'], 34 | ['{}/data/sre2004/female/spk2utt'.format(base_path), '25'], 35 | ['{}/data/sre_2005_2006_08/male/spk2utt'.format(base_path), '40'], 36 | ['{}/data/sre_2005_2006_08/female/spk2utt'.format(base_path), '40'], 37 | ['{}/data/sre10/male/spk2utt'.format(base_path), '30'], 38 | ['{}/data/sre10/female/spk2utt'.format(base_path), '30'], 39 | ['{}/data/swbd/male/spk2utt'.format(base_path), '15'], 40 | ['{}/data/swbd/female/spk2utt'.format(base_path), '15'], 41 | ['{}/data/mx6/grepd/male/spk2utt'.format(base_path), '30'], 42 | ['{}/data/mx6/grepd/female/spk2utt'.format(base_path), '35']]) 43 | 44 | xvector_scp_list = np.asarray( 45 | ['{}/xvectors_swbd/xvector_fullpaths.scp'.format(xvectors_base_path), 46 | '{}/xvectors_sre/xvector_fullpaths.scp'.format(xvectors_base_path), 47 | '{}/xvectors_mx6/xvector_fullpaths.scp'.format(xvectors_base_path)]) 48 | 49 | 50 | train_trial_keys, val_trial_keys = generate_train_trial_keys(data_spk2utt_list, xvector_scp_list, batch_size=4096, train_and_valid=True, train_ratio=0.95) 51 | 52 | # Save the training and validation trials and keys for training NPLDA and other discriminative models 53 | np.savetxt('trials_and_keys/swbd_sre04to10_mx6_train_trial_keys.tsv', train_trial_keys, fmt='%s', delimiter='\t', comments='none') 54 | np.savetxt('trials_and_keys/swbd_sre04to10_mx6_validate_trial_keys.tsv', val_trial_keys, fmt='%s', delimiter='\t', comments='none') 55 | 56 | # Save the train and validation xvectors for training a Kaldi PLDA if required 57 | train_scp_path = '{}/xvectors_swbd_sre04to10_mx6/train_split/xvector.scp'.format(xvectors_base_path) 58 | valid_scp_path = '{}/xvectors_swbd_sre04to10_mx6/valid_split/xvector.scp'.format(xvectors_base_path) 59 | save_unique_train_valid_xvector_scps(data_spk2utt_list, xvector_scp_list, train_scp_path, valid_scp_path, train_ratio=0.95) 60 | 61 | # %% Make SRE 18 dev and eval trial keys in required format using existing trial keys 62 | 63 | if stage <= 2: 64 | sre18_dev_trial_key_file_path = "/home/data/SRE2019/LDC2019E59/dev/docs/sre18_dev_trial_key.tsv" 65 | sre18_dev_trial_key = np.genfromtxt(sre18_dev_trial_key_file_path, dtype=str, skip_header=1) 66 | sre18_dev_trial_key[:,2] = (sre18_dev_trial_key[:,3]=='target').astype(int).astype(str) 67 | for i, testid in enumerate(sre18_dev_trial_key[:,1]): 68 | sre18_dev_trial_key[i,1] = os.path.splitext(os.path.basename(testid))[0] 69 | sre18_dev_trial_key = sre18_dev_trial_key[:,:3] 70 | 71 | np.savetxt('trials_and_keys/sre18_dev_keys.tsv', sre18_dev_trial_key, fmt='%s', delimiter='\t', comments='none') 72 | 73 | sre18_eval_trial_key_file_path = "/home/data/SRE2019/LDC2019E59/eval/docs/sre18_eval_trial_key.tsv" 74 | sre18_eval_trial_key = np.genfromtxt(sre18_eval_trial_key_file_path, dtype=str, skip_header=1) 75 | sre18_eval_trial_key[:,2] = (sre18_eval_trial_key[:,3]=='target').astype(int).astype(str) 76 | for i, testid in enumerate(sre18_eval_trial_key[:,1]): 77 | sre18_eval_trial_key[i,1] = os.path.splitext(os.path.basename(testid))[0] 78 | sre18_eval_trial_key = sre18_eval_trial_key[:,:3] 79 | 80 | np.savetxt('trials_and_keys/sre18_eval_keys.tsv', sre18_dev_trial_key, fmt='%s', delimiter='\t', comments='none') 81 | 82 | sys.exit() 83 | 84 | # %% Get SRE 2008 trials in required format 85 | 86 | if stage <= 3: 87 | xvector_scp_file = '{}/xvectors_sre08/xvector_fullpaths.scp'.format(xvectors_base_path) 88 | trials_key_file = '/home/data/SRE08_TEST/export/corpora5/LDC/LDC2011S08/data/keys/NIST_SRE08_KEYS.v0.1/trial-keys/NIST_SRE08_ALL.trial.key' 89 | model_key_file = '/home/data/SRE08_TEST/export/corpora5/LDC/LDC2011S08/data/keys/NIST_SRE08_KEYS.v0.1/model-keys/NIST_SRE08_ALL.model.key' 90 | xvector_scp = np.genfromtxt(xvector_scp_file,dtype=str) 91 | xvector_scp_dict = dict(zip(xvector_scp[:,0], xvector_scp[:,1])) 92 | trials = np.genfromtxt(trials_key_file,dtype=str,delimiter=',') 93 | model_key = np.genfromtxt(model_key_file,dtype=str,delimiter=',') 94 | all_utts_dict = {(w.split('_')[-2]+'_'+w.split('_')[-1].lower()):w for w in xvector_scp[:,0]} 95 | model_key_dict = {w[0]:[all_utts_dict[a] for a in w[2].replace(':','_').split() if a in all_utts_dict] for w in model_key} 96 | model_key_dict = {k:v for k,v in model_key_dict.items() if len(v)>0} 97 | enroll_spk2utt = np.sort(["{} {}".format(k,' '.join(v)) for k,v in model_key_dict.items()]) 98 | trials = [[w[0], w[1]+'_'+w[2], w[3]] for w in trials] 99 | trials_key_SRE08 = [[w[0], all_utts_dict[w[1]],str(int(w[2]=='target'))] for w in trials if w[1] in all_utts_dict] 100 | 101 | np.savetxt('{}/xvectors_sre08/enroll_spk2utt'.format(xvectors_base_path), enroll_spk2utt, fmt='%s', delimiter='\t', comments='none') 102 | subprocess.call(['ivector-mean', 'ark:{}/xvectors_sre08/enroll_spk2utt'.format(xvectors_base_path), 'scp:{}/xvectors_sre08/xvector_fullpaths.scp'.format(xvectors_base_path), 'ark,scp:{}/xvectors_sre08/enroll_xvector.ark,{}/xvectors_sre08/enroll_xvector.scp'.format(xvectors_base_path,xvectors_base_path)]) 103 | np.savetxt('trials_and_keys/sre08_eval_trial_keys.tsv', trials_key_SRE08, fmt='%s', delimiter='\t', comments='none') 104 | 105 | # %% Get SRE 2010 trials in required format 106 | if stage <= 4: 107 | xvector_scp_file = '{}/xvectors_sre10/xvector_fullpaths.scp'.format(xvectors_base_path) 108 | trials_key_file = '/home/data/SRE10/export/corpora5/SRE/SRE2010/eval/keys/NIST_SRE10_ALL.trial.key' 109 | model_key_file = '/home/data/SRE10/export/corpora5/SRE/SRE2010/eval/train/NIST_SRE10_ALL.model.key' 110 | xvector_scp_10 = np.genfromtxt(xvector_scp_file, dtype=str) 111 | xvector_scp_10_dict = dict(zip(xvector_scp_10[:,0], xvector_scp_10[:,1])) 112 | trials_key_10 = np.genfromtxt(trials_key_file, dtype=str, delimiter=',') 113 | trials_key_10_subset = trials_key_10[np.random.rand(len(trials_key_10))<0.12] 114 | model_key_10 = np.asarray([re.split(' m | f ',w.strip()) for w in open(model_key_file,'r').readlines()]) 115 | all_utts_dict_10 = {('_'.join(w.split('_')[-2:])):w for w in xvector_scp_10[:,0]} 116 | model_key_dict_10 = {w[0]:[os.path.basename(x) for x in w[1].replace('.sph','').replace(':','_').split()] for w in model_key_10} 117 | model_key_dict_10 = {k:[all_utts_dict_10[w] for w in v if w in all_utts_dict_10] for k,v in model_key_dict_10.items()} 118 | model_key_dict_10 = {k:v for k,v in model_key_dict_10.items() if len(v)>0} 119 | enroll_spk2utt = np.sort(["{} {}".format(k,' '.join(v)) for k,v in model_key_dict_10.items()]) 120 | 121 | np.savetxt('{}/xvectors_sre10/enroll_spk2utt'.format(xvectors_base_path), enroll_spk2utt, fmt='%s', delimiter='\t', comments='none') 122 | subprocess.call(['ivector-mean', 'ark:{}/xvectors_sre10/enroll_spk2utt'.format(xvectors_base_path), 'scp:{}/xvectors_sre10/xvector_fullpaths.scp'.format(xvectors_base_path), 'ark,scp:{}/xvectors_sre10/enroll_xvector.ark,{}/xvectors_sre10/enroll_xvector.scp'.format(xvectors_base_path,xvectors_base_path)]) 123 | 124 | trials = [[w[0], w[1]+'_'+(w[2]).upper(), w[3]] for w in trials_key_10] 125 | trials_subset = [[w[0], w[1]+'_'+(w[2]).upper(), w[3]] for w in trials_key_10_subset] 126 | trials_key_SRE10 = [[w[0], all_utts_dict_10[w[1]],str(int(w[2]=='target'))] for w in trials if w[1] in all_utts_dict_10] 127 | trials_key_subset_SRE10 = [[w[0], all_utts_dict_10[w[1]],str(int(w[2]=='target'))] for w in trials_subset if w[1] in all_utts_dict_10] 128 | 129 | np.savetxt('trials_and_keys/sre10_eval_trial_keys.tsv', trials_key_SRE10, fmt='%s', delimiter='\t', comments='none') 130 | np.savetxt('trials_and_keys/sre10_eval_trial_keys_subset.tsv', trials_key_subset_SRE10, fmt='%s', delimiter='\t', comments='none') 131 | 132 | # %% Make the mega xvector scp with all the xvectors, averaged enrollment xvectors, etc. 133 | 134 | if stage <= 5: 135 | xvector_scp_list = np.asarray( 136 | ['{}/xvectors_swbd/xvector_fullpaths.scp'.format(xvectors_base_path), 137 | '{}/xvectors_sre/xvector_fullpaths.scp'.format(xvectors_base_path), 138 | '{}/xvectors_mx6/xvector_fullpaths.scp'.format(xvectors_base_path), 139 | '{}/xvectors_sre16_eval_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path), 140 | '{}/xvectors_sre16_eval_test/xvector_fullpaths.scp'.format(xvectors_base_path), 141 | '{}/xvectors_sre16_eval_enrollment/spk_xvector.scp'.format(xvectors_base_path), 142 | '{}/xvectors_sre18_dev_enrollment/spk_xvector.scp'.format(xvectors_base_path), 143 | '{}/xvectors_sre18_dev_test/xvector_fullpaths.scp'.format(xvectors_base_path), 144 | '{}/xvectors_sre18_dev_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path), 145 | '{}/xvectors_sre18_eval_test/xvector_fullpaths.scp'.format(xvectors_base_path), 146 | '{}/xvectors_sre18_eval_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path), 147 | '{}/xvectors_sre18_eval_enrollment/spk_xvector.scp'.format(xvectors_base_path), 148 | '{}/xvectors_sre19_eval_test/xvector_fullpaths.scp'.format(xvectors_base_path), 149 | '{}/xvectors_sre19_eval_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path), 150 | '{}/xvectors_sre19_eval_enrollment/spk_xvector.scp'.format(xvectors_base_path)]) 151 | 152 | mega_scp_dict = {} 153 | mega_xvec_dict = {} 154 | for fx in xvector_scp_list: 155 | subprocess.call(['sed','-i', 's| exp/xvector_nnet_1a| {}|g'.format(xvectors_base_path), fx]) 156 | with open(fx) as f: 157 | scp_list = f.readlines() 158 | scp_dict = {x.split(' ', 1)[0]: x.rstrip('\n').split(' ', 1)[1] for x in scp_list} 159 | xvec_dict = {x.split(' ', 1)[0]: kaldi_io.read_vec_flt(x.rstrip('\n').split(' ', 1)[1]) for x in scp_list} 160 | mega_scp_dict.update(scp_dict) 161 | mega_xvec_dict.update(xvec_dict) 162 | 163 | mega_scp = np.c_[np.asarray(list(mega_scp_dict.keys()))[:,np.newaxis], np.asarray(list(mega_scp_dict.values()))] 164 | 165 | np.savetxt('xvectors/mega_xvector_voxceleb_8k.scp', mega_scp, fmt='%s', delimiter=' ', comments='') 166 | 167 | pickle.dump(mega_xvec_dict, open('xvectors/mega_xvector_voxceleb_8k.pkl', 'wb')) -------------------------------------------------------------------------------- /dataprep_sre18_egs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Mar 4 16:44:26 2020 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | import numpy as np 10 | import random 11 | import pickle 12 | import subprocess 13 | import re 14 | import os 15 | import sys 16 | import kaldi_io 17 | 18 | from pdb import set_trace as bp 19 | 20 | from utils.sv_trials_loaders import generate_train_trial_keys, save_unique_train_valid_xvector_scps 21 | 22 | 23 | 24 | if __name__=='__main__': 25 | 26 | base_path = '/home/data2/SRE2019/prashantk/voxceleb/v3' 27 | xvectors_base_path = os.path.join(base_path,'exp/xvector_nnet_sre18_3') 28 | 29 | stage = 1 30 | 31 | # %% Generate and save training trial keys using SRE SWBD and MX6 datasets 32 | if stage <= 1: 33 | data_spk2utt_list = np.asarray([['{}/egs/male/spk2utt'.format(xvectors_base_path), '5'], 34 | ['{}/egs/female/spk2utt'.format(xvectors_base_path), '5']]) 35 | 36 | 37 | xvector_scp_list = np.asarray( 38 | ['{}/egs/egs.scp'.format(xvectors_base_path)]) 39 | 40 | 41 | train_trial_keys, val_trial_keys = generate_train_trial_keys(data_spk2utt_list, xvector_scp_list, train_and_valid=True, train_ratio=0.95) 42 | 43 | # Save the training and validation trials and keys for training NPLDA and other discriminative models 44 | np.savetxt('trials_and_keys/sre18_egs_train_trial_keys.tsv', train_trial_keys, fmt='%s', delimiter='\t', comments='none') 45 | np.savetxt('trials_and_keys/sre18_egs_validate_trial_keys.tsv', val_trial_keys, fmt='%s', delimiter='\t', comments='none') 46 | sys.exit() 47 | # Save the train and validation xvectors for training a Kaldi PLDA if required 48 | # train_scp_path = '{}/xvectors_swbd_sre04to10_mx6/train_split/xvector.scp'.format(xvectors_base_path) 49 | # valid_scp_path = '{}/xvectors_swbd_sre04to10_mx6/valid_split/xvector.scp'.format(xvectors_base_path) 50 | # save_unique_train_valid_xvector_scps(data_spk2utt_list, xvector_scp_list, train_scp_path, valid_scp_path, train_ratio=0.95) 51 | 52 | # %% Make SRE 18 dev and eval trial keys in required format using existing trial keys 53 | 54 | if stage <= 2: 55 | sre18_dev_trial_key_file_path = "/home/data/SRE2019/LDC2019E59/dev/docs/sre18_dev_trial_key.tsv" 56 | sre18_dev_trial_key = np.genfromtxt(sre18_dev_trial_key_file_path, dtype=str, skip_header=1) 57 | sre18_dev_trial_key[:,2] = (sre18_dev_trial_key[:,3]=='target').astype(int).astype(str) 58 | for i, testid in enumerate(sre18_dev_trial_key[:,1]): 59 | sre18_dev_trial_key[i,1] = os.path.splitext(os.path.basename(testid))[0] 60 | sre18_dev_trial_key = sre18_dev_trial_key[:,:3] 61 | 62 | np.savetxt('trials_and_keys/sre18_dev_keys.tsv', sre18_dev_trial_key, fmt='%s', delimiter='\t', comments='none') 63 | 64 | sre18_eval_trial_key_file_path = "/home/data/SRE2019/LDC2019E59/eval/docs/sre18_eval_trial_key.tsv" 65 | sre18_eval_trial_key = np.genfromtxt(sre18_eval_trial_key_file_path, dtype=str, skip_header=1) 66 | sre18_eval_trial_key[:,2] = (sre18_eval_trial_key[:,3]=='target').astype(int).astype(str) 67 | for i, testid in enumerate(sre18_eval_trial_key[:,1]): 68 | sre18_eval_trial_key[i,1] = os.path.splitext(os.path.basename(testid))[0] 69 | sre18_eval_trial_key = sre18_eval_trial_key[:,:3] 70 | 71 | np.savetxt('trials_and_keys/sre18_eval_keys.tsv', sre18_dev_trial_key, fmt='%s', delimiter='\t', comments='none') 72 | 73 | sys.exit() 74 | 75 | # %% Get SRE 2008 trials in required format 76 | 77 | if stage <= 3: 78 | xvector_scp_file = '{}/xvectors_sre08/xvector_fullpaths.scp'.format(xvectors_base_path) 79 | trials_key_file = '/home/data/SRE08_TEST/export/corpora5/LDC/LDC2011S08/data/keys/NIST_SRE08_KEYS.v0.1/trial-keys/NIST_SRE08_ALL.trial.key' 80 | model_key_file = '/home/data/SRE08_TEST/export/corpora5/LDC/LDC2011S08/data/keys/NIST_SRE08_KEYS.v0.1/model-keys/NIST_SRE08_ALL.model.key' 81 | xvector_scp = np.genfromtxt(xvector_scp_file,dtype=str) 82 | xvector_scp_dict = dict(zip(xvector_scp[:,0], xvector_scp[:,1])) 83 | trials = np.genfromtxt(trials_key_file,dtype=str,delimiter=',') 84 | model_key = np.genfromtxt(model_key_file,dtype=str,delimiter=',') 85 | all_utts_dict = {(w.split('_')[-2]+'_'+w.split('_')[-1].lower()):w for w in xvector_scp[:,0]} 86 | model_key_dict = {w[0]:[all_utts_dict[a] for a in w[2].replace(':','_').split() if a in all_utts_dict] for w in model_key} 87 | model_key_dict = {k:v for k,v in model_key_dict.items() if len(v)>0} 88 | enroll_spk2utt = np.sort(["{} {}".format(k,' '.join(v)) for k,v in model_key_dict.items()]) 89 | trials = [[w[0], w[1]+'_'+w[2], w[3]] for w in trials] 90 | trials_key_SRE08 = [[w[0], all_utts_dict[w[1]],str(int(w[2]=='target'))] for w in trials if w[1] in all_utts_dict] 91 | 92 | np.savetxt('{}/xvectors_sre08/enroll_spk2utt'.format(xvectors_base_path), enroll_spk2utt, fmt='%s', delimiter='\t', comments='none') 93 | subprocess.call(['ivector-mean', 'ark:{}/xvectors_sre08/enroll_spk2utt'.format(xvectors_base_path), 'scp:{}/xvectors_sre08/xvector_fullpaths.scp'.format(xvectors_base_path), 'ark,scp:{}/xvectors_sre08/enroll_xvector.ark,{}/xvectors_sre08/enroll_xvector.scp'.format(xvectors_base_path,xvectors_base_path)]) 94 | np.savetxt('trials_and_keys/sre08_eval_trial_keys.tsv', trials_key_SRE08, fmt='%s', delimiter='\t', comments='none') 95 | 96 | # %% Get SRE 2010 trials in required format 97 | if stage <= 4: 98 | xvector_scp_file = '{}/xvectors_sre10/xvector_fullpaths.scp'.format(xvectors_base_path) 99 | trials_key_file = '/home/data/SRE10/export/corpora5/SRE/SRE2010/eval/keys/NIST_SRE10_ALL.trial.key' 100 | model_key_file = '/home/data/SRE10/export/corpora5/SRE/SRE2010/eval/train/NIST_SRE10_ALL.model.key' 101 | xvector_scp_10 = np.genfromtxt(xvector_scp_file, dtype=str) 102 | xvector_scp_10_dict = dict(zip(xvector_scp_10[:,0], xvector_scp_10[:,1])) 103 | trials_key_10 = np.genfromtxt(trials_key_file, dtype=str, delimiter=',') 104 | trials_key_10_subset = trials_key_10[np.random.rand(len(trials_key_10))<0.12] 105 | model_key_10 = np.asarray([re.split(' m | f ',w.strip()) for w in open(model_key_file,'r').readlines()]) 106 | all_utts_dict_10 = {('_'.join(w.split('_')[-2:])):w for w in xvector_scp_10[:,0]} 107 | model_key_dict_10 = {w[0]:[os.path.basename(x) for x in w[1].replace('.sph','').replace(':','_').split()] for w in model_key_10} 108 | model_key_dict_10 = {k:[all_utts_dict_10[w] for w in v if w in all_utts_dict_10] for k,v in model_key_dict_10.items()} 109 | model_key_dict_10 = {k:v for k,v in model_key_dict_10.items() if len(v)>0} 110 | enroll_spk2utt = np.sort(["{} {}".format(k,' '.join(v)) for k,v in model_key_dict_10.items()]) 111 | 112 | np.savetxt('{}/xvectors_sre10/enroll_spk2utt'.format(xvectors_base_path), enroll_spk2utt, fmt='%s', delimiter='\t', comments='none') 113 | subprocess.call(['ivector-mean', 'ark:{}/xvectors_sre10/enroll_spk2utt'.format(xvectors_base_path), 'scp:{}/xvectors_sre10/xvector_fullpaths.scp'.format(xvectors_base_path), 'ark,scp:{}/xvectors_sre10/enroll_xvector.ark,{}/xvectors_sre10/enroll_xvector.scp'.format(xvectors_base_path,xvectors_base_path)]) 114 | 115 | trials = [[w[0], w[1]+'_'+(w[2]).upper(), w[3]] for w in trials_key_10] 116 | trials_subset = [[w[0], w[1]+'_'+(w[2]).upper(), w[3]] for w in trials_key_10_subset] 117 | trials_key_SRE10 = [[w[0], all_utts_dict_10[w[1]],str(int(w[2]=='target'))] for w in trials if w[1] in all_utts_dict_10] 118 | trials_key_subset_SRE10 = [[w[0], all_utts_dict_10[w[1]],str(int(w[2]=='target'))] for w in trials_subset if w[1] in all_utts_dict_10] 119 | 120 | np.savetxt('trials_and_keys/sre10_eval_trial_keys.tsv', trials_key_SRE10, fmt='%s', delimiter='\t', comments='none') 121 | np.savetxt('trials_and_keys/sre10_eval_trial_keys_subset.tsv', trials_key_subset_SRE10, fmt='%s', delimiter='\t', comments='none') 122 | 123 | # %% Make the mega xvector scp with all the xvectors, averaged enrollment xvectors, etc. 124 | 125 | if stage <= 5: 126 | xvector_scp_list = np.asarray( 127 | ['{}/xvectors_swbd/xvector_fullpaths.scp'.format(xvectors_base_path), 128 | '{}/xvectors_sre/xvector_fullpaths.scp'.format(xvectors_base_path), 129 | '{}/xvectors_mx6/xvector_fullpaths.scp'.format(xvectors_base_path), 130 | '{}/xvectors_sre16_eval_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path), 131 | '{}/xvectors_sre16_eval_test/xvector_fullpaths.scp'.format(xvectors_base_path), 132 | '{}/xvectors_sre16_eval_enrollment/spk_xvector.scp'.format(xvectors_base_path), 133 | '{}/xvectors_sre18_dev_enrollment/spk_xvector.scp'.format(xvectors_base_path), 134 | '{}/xvectors_sre18_dev_test/xvector_fullpaths.scp'.format(xvectors_base_path), 135 | '{}/xvectors_sre18_dev_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path), 136 | '{}/xvectors_sre18_eval_test/xvector_fullpaths.scp'.format(xvectors_base_path), 137 | '{}/xvectors_sre18_eval_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path), 138 | '{}/xvectors_sre18_eval_enrollment/spk_xvector.scp'.format(xvectors_base_path), 139 | '{}/xvectors_sre19_eval_test/xvector_fullpaths.scp'.format(xvectors_base_path), 140 | '{}/xvectors_sre19_eval_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path), 141 | '{}/xvectors_sre19_eval_enrollment/spk_xvector.scp'.format(xvectors_base_path)]) 142 | 143 | mega_scp_dict = {} 144 | mega_xvec_dict = {} 145 | for fx in xvector_scp_list: 146 | subprocess.call(['sed','-i', 's| exp/xvector_nnet_1a| {}|g'.format(xvectors_base_path), fx]) 147 | with open(fx) as f: 148 | scp_list = f.readlines() 149 | scp_dict = {x.split(' ', 1)[0]: x.rstrip('\n').split(' ', 1)[1] for x in scp_list} 150 | xvec_dict = {x.split(' ', 1)[0]: kaldi_io.read_vec_flt(x.rstrip('\n').split(' ', 1)[1]) for x in scp_list} 151 | mega_scp_dict.update(scp_dict) 152 | mega_xvec_dict.update(xvec_dict) 153 | 154 | mega_scp = np.c_[np.asarray(list(mega_scp_dict.keys()))[:,np.newaxis], np.asarray(list(mega_scp_dict.values()))] 155 | 156 | np.savetxt('xvectors/mega_xvector_voxceleb_8k.scp', mega_scp, fmt='%s', delimiter=' ', comments='') 157 | 158 | pickle.dump(mega_xvec_dict, open('xvectors/mega_xvector_voxceleb_8k.pkl', 'wb')) -------------------------------------------------------------------------------- /dataprep_voices_challenge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Feb 25 18:51:09 2020 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | import numpy as np 10 | import random 11 | import pickle 12 | import subprocess 13 | import re 14 | import os 15 | import kaldi_io 16 | 17 | from pdb import set_trace as bp 18 | 19 | from utils.sv_trials_loaders import generate_train_trial_keys, save_unique_train_valid_xvector_scps 20 | 21 | 22 | 23 | if __name__=='__main__': 24 | 25 | base_path = '/home/data2/SRE2019/prashantk/voxceleb/v1' 26 | xv_path = 'exp/xvector_nnet_1a' 27 | xvectors_base_path = os.path.join(base_path, xv_path) 28 | 29 | stage = 3 30 | 31 | # %% Generate and save training trial keys using SRE SWBD and MX6 datasets 32 | if stage <= 1: 33 | data_spk2utt_list = np.asarray([['{}/data/train_16k_combined/male/spk2utt'.format(base_path), '1'], 34 | ['{}/data/train_16k_combined/female/spk2utt'.format(base_path), '1']]) 35 | 36 | xvector_scp_list = xvector_scp_list = np.asarray( 37 | ['{}/xvectors_train_16k_combined/xvector_fullpaths.scp'.format(xvectors_base_path), 38 | '{}/xvectors_voices_dev_enrollment_copy/xvector_fullpaths.scp'.format(xvectors_base_path), 39 | '{}/xvectors_voices_dev_test_copy/xvector_fullpaths.scp'.format(xvectors_base_path), 40 | '{}/xvectors_voices_eval_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path), 41 | '{}/xvectors_voices_eval_test/xvector_fullpaths.scp'.format(xvectors_base_path)]) 42 | 43 | 44 | train_trial_keys, val_trial_keys = generate_train_trial_keys(data_spk2utt_list, xvector_scp_list, train_and_valid=True, train_ratio=0.95) 45 | 46 | # Save the training and validation trials and keys for training NPLDA and other discriminative models 47 | np.savetxt('trials_and_keys/voxceleb_16k_aug_train_trial_keys_1_10.tsv', train_trial_keys, fmt='%s', delimiter='\t', comments='none') 48 | np.savetxt('trials_and_keys/voxceleb_16k_aug_validate_trial_keys_1_10.tsv', val_trial_keys, fmt='%s', delimiter='\t', comments='none') 49 | 50 | # Save the train and validation xvectors for training a Kaldi PLDA if required 51 | train_scp_path = '{}/xvectors_voxceleb_aug/train_split/xvector.scp'.format(xvectors_base_path) 52 | valid_scp_path = '{}/xvectors_voxceleb_aug/valid_split/xvector.scp'.format(xvectors_base_path) 53 | save_unique_train_valid_xvector_scps(data_spk2utt_list, xvector_scp_list, train_scp_path, valid_scp_path, train_ratio=0.95) 54 | bp() 55 | 56 | # %% Get the Voices dev trials in required format 57 | if stage <= 2: 58 | voices_dev_trial_key_file_path = '/home/data/VOICES/interspeech2019Challenge/Development_Data/Speaker_Recognition/sid_dev_lists_and_keys/dev-trial-keys.lst' 59 | voices_dev_trial_key = np.genfromtxt(voices_dev_trial_key_file_path, dtype=str, skip_header=0) 60 | voices_dev_trial_key[:,2] = (voices_dev_trial_key[:,2]=='tgt').astype(int).astype(str) 61 | for i, testid in enumerate(voices_dev_trial_key[:,1]): 62 | voices_dev_trial_key[i,1] = os.path.splitext(os.path.basename(testid))[0] 63 | voices_dev_trial_key = voices_dev_trial_key[:,:3] 64 | 65 | np.savetxt('trials_and_keys/voices_dev_keys.tsv', voices_dev_trial_key, fmt='%s', delimiter='\t', comments='none') 66 | 67 | # %% Make the mega xvector scp with all the xvectors, averaged enrollment xvectors, etc. 68 | 69 | if stage <= 3: 70 | xvector_scp_list = xvector_scp_list = np.asarray( 71 | # ['{}/xvectors_train_16k_combined/xvector_fullpaths.scp'.format(xvectors_base_path), 72 | # '{}/xvectors_voices_dev_enrollment_copy/xvector_fullpaths.scp'.format(xvectors_base_path), 73 | # '{}/xvectors_voices_dev_test_copy/xvector_fullpaths.scp'.format(xvectors_base_path), 74 | # '{}/xvectors_voices_eval_enrollment/xvector_fullpaths.scp'.format(xvectors_base_path), 75 | # '{}/xvectors_voices_eval_test/xvector_fullpaths.scp'.format(xvectors_base_path), 76 | # '{}/xvectors_sitw_combined/xvector.scp'.format(xvectors_base_path), 77 | ['{}/xvectors_sitw_eval_enroll/spk_xvector.scp'.format(xvectors_base_path), 78 | '{}/xvectors_sitw_eval_test/xvector.scp'.format(xvectors_base_path)]) 79 | 80 | # mega_scp_dict = {} 81 | mega_xvec_dict = pickle.load(open('xvectors/mega_xvector_voices_voxceleb_16k.pkl','rb')) 82 | for fx in xvector_scp_list: 83 | subprocess.call(['sed','-i', 's| {}| {}|g'.format(xv_path, xvectors_base_path), fx]) 84 | with open(fx) as f: 85 | scp_list = f.readlines() 86 | scp_dict = {os.path.splitext(os.path.basename(x.split(' ', 1)[0]))[0]: x.rstrip('\n').split(' ', 1)[1] for x in scp_list} 87 | xvec_dict = {os.path.splitext(os.path.basename(x.split(' ', 1)[0]))[0]: kaldi_io.read_vec_flt(x.rstrip('\n').split(' ', 1)[1]) for x in scp_list} 88 | # mega_scp_dict.update(scp_dict) 89 | mega_xvec_dict.update(xvec_dict) 90 | 91 | # mega_scp = np.c_[np.asarray(list(mega_scp_dict.keys()))[:,np.newaxis], np.asarray(list(mega_scp_dict.values()))] 92 | 93 | # np.savetxt('xvectors/mega_xvector_voices_voxceleb_16k.scp', mega_scp, fmt='%s', delimiter=' ', comments='') 94 | 95 | pickle.dump(mega_xvec_dict, open('xvectors/mega_xvector_voices_voxceleb_16k.pkl', 'wb')) -------------------------------------------------------------------------------- /utils/Kaldi2NumpyUtils/__pycache__/kaldiPlda2numpydict.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iiscleap/NeuralPlda/ac99bb4f19ca598dedff7a9f01ca3c81665d060d/utils/Kaldi2NumpyUtils/__pycache__/kaldiPlda2numpydict.cpython-36.pyc -------------------------------------------------------------------------------- /utils/Kaldi2NumpyUtils/__pycache__/kaldiPlda2numpydict.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iiscleap/NeuralPlda/ac99bb4f19ca598dedff7a9f01ca3c81665d060d/utils/Kaldi2NumpyUtils/__pycache__/kaldiPlda2numpydict.cpython-37.pyc -------------------------------------------------------------------------------- /utils/Kaldi2NumpyUtils/kaldiPlda2numpydict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Oct 5 11:17:57 2019 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | import os 10 | import sys 11 | import numpy as np 12 | import subprocess 13 | import pickle 14 | 15 | def kaldiPlda2numpydict(pldaFile, outpicklefile=''): 16 | #logging.debug('kaldi text file to numpy array: {}'.format(textfile)) 17 | fin = subprocess.check_output(["ivector-copy-plda", "--binary=false", pldaFile ,"-"]) 18 | res = {} 19 | fin = fin.decode("utf-8").split('\n') 20 | while '' in fin: 21 | fin.remove('') 22 | splitted = fin[0].strip().split() 23 | res['plda_mean'] = np.asarray(splitted[2:-1]).astype(float) 24 | tmparr=[] 25 | for i,line in enumerate(fin[2:]): 26 | splitted = line.strip().split() 27 | if splitted[-1] == ']': 28 | splitted = splitted[:-1] 29 | tmparr.append(np.asarray(splitted).astype(float)) 30 | break 31 | else: 32 | tmparr.append(np.asarray(splitted).astype(float)) 33 | res['diagonalizing_transform'] = np.asarray(tmparr) 34 | res['Psi_across_covar_diag'] = np.asarray(fin[-2].strip().split()[1:-1]).astype(float) 35 | ac = res['Psi_across_covar_diag'] 36 | tot = 1 + res['Psi_across_covar_diag'] 37 | res['diagP'] = ac/(tot*(tot-ac*ac/tot)) 38 | res['diagQ'] = (1/tot) - 1/(tot - ac*ac/tot) 39 | if outpicklefile: 40 | with open(outpicklefile,'wb') as f: 41 | pickle.dump(res,f) 42 | else: 43 | return res 44 | 45 | 46 | if __name__=='__main__': 47 | if len(sys.argv)==1 or len(sys.argv)>=4: 48 | print("Usage: {} ".format(sys.argv[0])) 49 | elif len(sys.argv)==2: 50 | print(kaldiPlda2numpydict(sys.argv[1])) 51 | else: 52 | kaldiPlda2numpydict(sys.argv[1],sys.argv[2]) 53 | -------------------------------------------------------------------------------- /utils/Kaldi2NumpyUtils/kaldifeats2numpydict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Sep 24 09:52:10 2019 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | import os 10 | import sys 11 | import numpy as np 12 | import subprocess 13 | import pickle 14 | 15 | def kaldifeats2numpydict(inArkOrScpFile, outpicklefile=''): 16 | #logging.debug('kaldi text file to numpy array: {}'.format(textfile)) 17 | if os.path.splitext(inArkOrScpFile)[1] == '.scp': 18 | fin = subprocess.check_output(["copy-feats", "scp:{}".format(inArkOrScpFile),"ark,t:-"]) 19 | else: #Assuming ARK 20 | fin = subprocess.check_output(["copy-feats", "ark:{}".format(inArkOrScpFile),"ark,t:-"]) 21 | res = {} 22 | fin = fin.decode("utf-8").split('\n') 23 | while '' in fin: 24 | fin.remove('') 25 | tmparr=[] 26 | arrname=[] 27 | for line in fin: 28 | splitted = line.strip().split() 29 | if splitted[-1] == '[': 30 | if arrname: 31 | res[arrname] = np.asarray(tmparr) 32 | arrname = splitted[0] 33 | tmparr = [] 34 | else: 35 | if splitted[-1] == ']': 36 | splitted = splitted[:-1] 37 | tmparr.append(np.asarray(splitted).astype(float)) 38 | res[arrname] = np.asarray(tmparr) 39 | if outpicklefile: 40 | with open(outpicklefile,'wb') as f: 41 | pickle.dump(res,f) 42 | else: 43 | return res 44 | 45 | 46 | if __name__=='__main__': 47 | if len(sys.argv)==1 or len(sys.argv)>=4: 48 | print("Usage: {} ".format(sys.argv[0])) 49 | elif len(sys.argv)==2: 50 | print(kaldifeats2numpydict(sys.argv[1])) 51 | else: 52 | kaldifeats2numpydict(sys.argv[1],sys.argv[2]) 53 | -------------------------------------------------------------------------------- /utils/Kaldi2NumpyUtils/kaldivec2numpydict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Sep 23 16:24:18 2019 5 | 6 | @author: shreyasr 7 | """ 8 | import os 9 | import sys 10 | import numpy as np 11 | import subprocess 12 | import pickle 13 | 14 | def kaldivec2numpydict(inArkOrScpFile, outpicklefile=''): 15 | #logging.debug('kaldi text file to numpy array: {}'.format(textfile)) 16 | if os.path.splitext(inArkOrScpFile)[1] == '.scp': 17 | fin = subprocess.check_output(["copy-vector", "scp:{}".format(inArkOrScpFile),"ark,t:-"]) 18 | else: #Assuming ARK 19 | fin = subprocess.check_output(["copy-vector", "ark:{}".format(inArkOrScpFile),"ark,t:-"]) 20 | res = {} 21 | fin = fin.decode("utf-8").split('\n') 22 | while '' in fin: 23 | fin.remove('') 24 | for line in fin: 25 | splitted = line.strip().split() 26 | res[splitted[0]] = np.asarray(splitted[2:-1]).astype(float) 27 | if outpicklefile: 28 | with open(outpicklefile,'wb') as f: 29 | pickle.dump(res,f) 30 | else: 31 | return res 32 | 33 | 34 | if __name__=='__main__': 35 | if len(sys.argv)==1 or len(sys.argv)>=4: 36 | print("Usage: {} ".format(sys.argv[0])) 37 | elif len(sys.argv)==2: 38 | print(kaldivec2numpydict(sys.argv[1])) 39 | else: 40 | kaldivec2numpydict(sys.argv[1],sys.argv[2]) 41 | -------------------------------------------------------------------------------- /utils/NpldaConf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Feb 24 12:15:25 2020 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | import configparser as cp 10 | from utils.scorefile_generator import generate_voices_scores, generate_sre_scores 11 | 12 | class NpldaConf: 13 | def __init__(self, configfile): 14 | config = cp.ConfigParser(interpolation=cp.ExtendedInterpolation()) 15 | try: 16 | config.read(configfile) 17 | except: 18 | raise IOError('Oh No! :-( Something is wrong with the config file.') 19 | self.training_data_trials_list = config['Paths']['training_data_trials_list'].split(',') 20 | self.validation_trials_list = config['Paths']['validation_trials_list'].split(',') 21 | self.test_trials_list = config['Paths']['test_trials_list'].split(',') 22 | self.mega_xvector_scp = config['Paths']['mega_xvector_scp'] 23 | self.mega_xvector_pkl = config['Paths']['mega_xvector_pkl'] 24 | self.meanvec = config['Paths']['meanvec'] 25 | self.transformmat = config['Paths']['transformmat'] 26 | self.kaldiplda = config['Paths']['kaldiplda'] 27 | self.xvector_dim = int(config['NPLDA']['xvector_dim']) 28 | self.layer1_LDA_dim = int(config['NPLDA']['layer1_LDA_dim']) 29 | self.layer2_PLDA_spkfactor_dim = int(config['NPLDA']['layer2_PLDA_spkfactor_dim']) 30 | self.initialization = config['NPLDA']['initialization'] 31 | self.device = config['NPLDA']['device'] 32 | self.seed = int(config['NPLDA']['seed']) 33 | self.alpha = float(config['NPLDA']['alpha']) 34 | self.loss = config['Training']['loss'] 35 | self.cmiss = float(config['Training']['cmiss']) 36 | self.cfa = float(config['Training']['cfa']) 37 | self.target_probs = config['Training']['target_probs'].split(',') 38 | self.beta = [self.cfa*(1-float(pt))/(self.cmiss*float(pt)) for pt in self.target_probs] 39 | self.batch_size = int(config['Training']['batch_size']) 40 | self.n_epochs = int(config['Training']['n_epochs']) 41 | self.lr = float(config['Training']['lr']) 42 | self.heldout_set_for_lr_decay = config['Training']['heldout_set_for_lr_decay'] 43 | self.heldout_set_for_th_init = config['Training']['heldout_set_for_th_init'] 44 | self.log_interval = int(config['Logging']['log_interval']) 45 | if config['Scoring']['scorefile_format'] == 'sre': 46 | self.generate_scorefile = generate_sre_scores 47 | else: 48 | self.generate_scorefile = generate_voices_scores 49 | if config['Training']['train_subsample_factors'] == 'None': 50 | self.train_subsample_factors = None 51 | else: 52 | self.train_subsample_factors = [float(x) for x in config['Training']['train_subsample_factors'].split(',')] 53 | if config['Training']['valid_subsample_factors'] == 'None': 54 | self.valid_subsample_factors = None 55 | else: 56 | self.valid_subsample_factors = [float(x) for x in config['Training']['valid_subsample_factors'].split(',')] 57 | 58 | class E2EConf: 59 | def __init__(self, configfile): 60 | config = cp.ConfigParser(interpolation=cp.ExtendedInterpolation()) 61 | try: 62 | config.read(configfile) 63 | except: 64 | raise IOError('Oh No! :-( Something is wrong with the config file.') 65 | self.base_path = config['Paths']['base_path'] 66 | self.train_spk2utt_list = config['Paths']['train_spk2utt_list'].split(',') 67 | self.training_data_trials_list = config['Paths']['training_data_trials_list'].split(',') 68 | self.validation_trials_list = config['Paths']['validation_trials_list'].split(',') 69 | self.test_trials_list = config['Paths']['test_trials_list'].split(',') 70 | self.mega_mfcc_scp = config['Paths']['mega_mfcc_scp'] 71 | self.mega_mfcc_pkl = config['Paths']['mega_mfcc_pkl'] 72 | self.xvec_model = config['Paths']['xvec_model'] 73 | self.meanvec = config['Paths']['meanvec'] 74 | self.transformmat = config['Paths']['transformmat'] 75 | self.kaldiplda = config['Paths']['kaldiplda'] 76 | self.xvector_dim = int(config['NPLDA']['xvector_dim']) 77 | self.layer1_LDA_dim = int(config['NPLDA']['layer1_LDA_dim']) 78 | self.layer2_PLDA_spkfactor_dim = int(config['NPLDA']['layer2_PLDA_spkfactor_dim']) 79 | self.initialization = config['NPLDA']['initialization'] 80 | self.pooling_function = config['NPLDA']['pooling_function'] 81 | self.device = config['NPLDA']['device'] 82 | self.seed = int(config['NPLDA']['seed']) 83 | self.alpha = float(config['NPLDA']['alpha']) 84 | self.loss = config['Training']['loss'] 85 | self.cmiss = float(config['Training']['cmiss']) 86 | self.cfa = float(config['Training']['cfa']) 87 | self.target_probs = config['Training']['target_probs'].split(',') 88 | self.beta = [self.cfa*(1-float(pt))/(self.cmiss*float(pt)) for pt in self.target_probs] 89 | self.batch_size = int(config['Training']['batch_size']) 90 | self.min_num_spks_per_batch = int(config['Training']['min_num_spks_per_batch']) 91 | self.max_num_spks_per_batch = int(config['Training']['max_num_spks_per_batch']) 92 | self.n_epochs = int(config['Training']['n_epochs']) 93 | self.lr = float(config['Training']['lr']) 94 | self.heldout_set_for_lr_decay = config['Training']['heldout_set_for_lr_decay'] 95 | self.heldout_set_for_th_init = config['Training']['heldout_set_for_th_init'] 96 | self.log_interval = int(config['Logging']['log_interval']) 97 | if config['Scoring']['scorefile_format'] == 'sre': 98 | self.generate_scorefile = generate_sre_scores 99 | else: 100 | self.generate_scorefile = generate_voices_scores 101 | if config['Training']['train_subsample_factors'] == 'None': 102 | self.train_subsample_factors = None 103 | else: 104 | self.train_subsample_factors = [float(x) for x in config['Training']['train_subsample_factors'].split(',')] 105 | if config['Training']['valid_subsample_factors'] == 'None': 106 | self.valid_subsample_factors = None 107 | else: 108 | self.valid_subsample_factors = [float(x) for x in config['Training']['valid_subsample_factors'].split(',')] -------------------------------------------------------------------------------- /utils/__pycache__/calibration.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iiscleap/NeuralPlda/ac99bb4f19ca598dedff7a9f01ca3c81665d060d/utils/__pycache__/calibration.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/sv_trials_loaders.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iiscleap/NeuralPlda/ac99bb4f19ca598dedff7a9f01ca3c81665d060d/utils/__pycache__/sv_trials_loaders.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/sv_trials_loaders.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iiscleap/NeuralPlda/ac99bb4f19ca598dedff7a9f01ca3c81665d060d/utils/__pycache__/sv_trials_loaders.cpython-37.pyc -------------------------------------------------------------------------------- /utils/adaptive_score_normalization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Jan 2 15:11:41 2020 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | import numpy as np 10 | # from pdb import set_trace as bp 11 | 12 | ASnorm_topN = 500 13 | 14 | # raw_score_filename = '/home/data2/SRE2019/prashantk/voxceleb/v2/exp/scores_18/scores_sre18_dev_kaldiplda_xvectors_swbd_sre_mx6_sre16_before_norm.tsv' 15 | # cohort_score_filename = '/home/data2/SRE2019/prashantk/voxceleb/v2/exp/scores_18/scores_sre18_dev_cohort_kaldiplda_xvectors_swbd_sre_mx6_sre16_before_norm.tsv' 16 | 17 | raw_score_filename = '/home/data2/SRE2019/prashantk/voxceleb/v3/exp/xvector_nnet_2a/scores/scores_sre18_eval_kaldiplda_xvectors_swbd_sre_mx6_before_norm.tsv' 18 | cohort_score_filename = '/home/data2/SRE2019/prashantk/voxceleb/v3/exp/xvector_nnet_2a/scores/scores_sre18_eval_cohort_kaldiplda_xvectors_swbd_sre_mx6_before_norm.tsv' 19 | 20 | raw_scorefile_tab = np.genfromtxt(raw_score_filename, dtype='str') 21 | header = raw_scorefile_tab[0] 22 | raw_scorefile_tab = raw_scorefile_tab[1:] 23 | 24 | trials_enroll, trials_test, raw_scores = raw_scorefile_tab[:,0], raw_scorefile_tab[:,1], (raw_scorefile_tab[:,-1]).astype(float) 25 | trials_test = np.asarray([w.replace('.sph','') for w in trials_test]) 26 | 27 | cohort_scorefile_tab = np.genfromtxt(cohort_score_filename, dtype='str', skip_header=1) 28 | num_unlabelled = len(np.unique(cohort_scorefile_tab[:,1])) 29 | unique_enrolls = np.unique(trials_enroll) 30 | unique_test = np.unique(trials_test) 31 | 32 | cohort_score_matrix = np.sort(cohort_scorefile_tab[:,-1].astype(float).reshape(-1,num_unlabelled),axis=1) 33 | mean_cohort_scores = np.mean(cohort_score_matrix, axis=1) 34 | std_cohort_scores = np.std(cohort_score_matrix, axis=1) 35 | mean_top_cohort_scores = np.mean(cohort_score_matrix[:,:ASnorm_topN], axis=1) 36 | std_top_cohort_scores = np.std(cohort_score_matrix[:,:ASnorm_topN], axis=1) 37 | 38 | enrolls_of_cohort = cohort_scorefile_tab[:,0].reshape(-1,num_unlabelled)[:,0] 39 | 40 | 41 | S = dict(zip(enrolls_of_cohort, cohort_score_matrix)) 42 | mean_dict = dict(zip(enrolls_of_cohort, mean_cohort_scores)) 43 | std_dict = dict(zip(enrolls_of_cohort, std_cohort_scores)) 44 | mean_dict_top = dict(zip(enrolls_of_cohort, mean_top_cohort_scores)) 45 | std_dict_top = dict(zip(enrolls_of_cohort, std_top_cohort_scores)) 46 | 47 | 48 | # Se = {enr:cohort_scorefile_tab[:,-1][cohort_scorefile_tab[:,0]==enr].astype(float) for enr in unique_enrolls} 49 | # St = {tst:cohort_scorefile_tab[:,-1][cohort_scorefile_tab[:,0]==tst].astype(float) for tst in unique_test} 50 | 51 | # mean_e = {enr:np.mean(Se[enr]) for enr in Se} 52 | # std_dict = {enr:np.std(Se[enr]) for enr in Se} 53 | 54 | # mean_dict = {tst:np.mean(St[tst]) for tst in St} 55 | # std_dict = {tst:np.std(St[tst]) for tst in St} 56 | 57 | # mean_e_top = {enr:np.mean(np.sort(Se[enr])[:ASnorm_topN]) for enr in Se} 58 | # std_dict_top = {enr:np.std(np.sort(Se[enr])[:ASnorm_topN]) for enr in Se} 59 | 60 | # mean_dict_top = {tst:np.mean(np.sort(St[tst])[:ASnorm_topN]) for tst in St} 61 | # std_dict_top = {tst:np.std(np.sort(St[tst])[:ASnorm_topN]) for tst in St} 62 | 63 | Znorm_scores, Tnorm_scores, Snorm_scores, ASnorm1_scores = [],[],[],[] 64 | 65 | for enr, tst, raw_score in zip(trials_enroll, trials_test, raw_scores): 66 | znorm_scr = (raw_score - mean_dict[enr])/std_dict[enr] 67 | tnorm_scr = (raw_score - mean_dict[tst])/std_dict[tst] 68 | snorm_scr = (znorm_scr + tnorm_scr)/2 69 | asnorm1_scr = ((raw_score - mean_dict_top[enr])/std_dict_top[enr] + (raw_score - mean_dict_top[tst])/std_dict_top[tst])/2 70 | Znorm_scores.append(znorm_scr) 71 | Tnorm_scores.append(tnorm_scr) 72 | Snorm_scores.append(snorm_scr) 73 | ASnorm1_scores.append(asnorm1_scr) 74 | 75 | 76 | Znorm_scores = np.asarray(Znorm_scores).astype(str) 77 | Tnorm_scores = np.asarray(Tnorm_scores).astype(str) 78 | Snorm_scores = np.asarray(Snorm_scores).astype(str) 79 | Asnorm1_scores = np.asarray(ASnorm1_scores).astype(str) 80 | 81 | np.savetxt(raw_score_filename+'_znorm.tsv',np.c_[raw_scorefile_tab[:,:-1],Znorm_scores], header='\t'.join(header), fmt='%s', delimiter='\t') 82 | np.savetxt(raw_score_filename+'_tnorm.tsv',np.c_[raw_scorefile_tab[:,:-1],Tnorm_scores], header='\t'.join(header), fmt='%s', delimiter='\t') 83 | np.savetxt(raw_score_filename+'_snorm.tsv',np.c_[raw_scorefile_tab[:,:-1],Snorm_scores], header='\t'.join(header), fmt='%s', delimiter='\t') 84 | np.savetxt(raw_score_filename+'_asnorm1.tsv',np.c_[raw_scorefile_tab[:,:-1],ASnorm1_scores], header='\t'.join(header), fmt='%s', delimiter='\t') -------------------------------------------------------------------------------- /utils/histograms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Jan 2 23:10:11 2020 5 | 6 | @author: shreyasr 7 | """ 8 | import numpy as np 9 | from matplotlib import pyplot 10 | import os 11 | 12 | 13 | score_key = np.genfromtxt('/run/user/1001/gvfs/sftp:host=10.64.18.30,user=prashantk/home/data/SRE2019/LDC2019E59/dev/docs/sre18_dev_trial_key.tsv', dtype='str', delimiter='\t', skip_header=1) 14 | 15 | cmn2_target_idx = score_key[:,3]=='target' #(score_key[:,-1]=='cmn2') #* () 16 | cmn2_nontarget_idx = score_key[:,3]=='nontarget' #(score_key[:,-1]=='cmn2') #* () 17 | 18 | timestamp=1579168538 19 | for i in range(1,31): 20 | score_tsv = np.genfromtxt('/run/user/1001/gvfs/sftp:host=10.64.18.30,user=prashantk/home/data2/SRE2019/prashantk/NeuralPlda/scores/sre18_dev_kaldipldanet_epoch{}_{}.txt'.format(i,timestamp), dtype='str', delimiter='\t', skip_header=0) 21 | 22 | header = score_tsv[0] 23 | score_tsv = score_tsv[1:] 24 | scores = (score_tsv[:,-1]).astype(float) 25 | 26 | scores_target_cmn2 = scores[cmn2_target_idx] 27 | scores_nontarget_cmn2 = scores[cmn2_nontarget_idx] 28 | 29 | max_scores = max(scores) 30 | min_scores = min(scores) 31 | 32 | bins_cmn2 = np.linspace(min_scores, max_scores, 200) 33 | bins_vast = np.linspace(min_scores, max_scores, 20) 34 | 35 | pyplot.figure() 36 | pyplot.hist(scores_target_cmn2, bins_cmn2, alpha=0.5, label='target_cmn2') 37 | pyplot.hist(scores_nontarget_cmn2, bins_cmn2, alpha=0.5, label='nontarget_cmn2') 38 | pyplot.axis([-2.,2.,0.,2100]) 39 | if not os.path.exists('plots/histograms_{}/'.format(timestamp)): 40 | os.makedirs('plots/histograms_{}/'.format(timestamp)) 41 | pyplot.savefig('plots/histograms_{}/hist_epoch{}_{}.png'.format(timestamp,i,timestamp)) -------------------------------------------------------------------------------- /utils/models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Feb 23 17:58:13 2020 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | import numpy as np 16 | 17 | import pickle 18 | import subprocess 19 | from utils.Kaldi2NumpyUtils.kaldiPlda2numpydict import kaldiPlda2numpydict 20 | from matplotlib import pyplot as plt 21 | from pdb import set_trace as bp 22 | 23 | def arr2val(x, retidx): 24 | if x.size()[0] > 0: 25 | return x[retidx].cpu().item() 26 | else: 27 | return 1. 28 | 29 | class TDNN(nn.Module): 30 | 31 | def __init__( 32 | self, 33 | input_dim=23, 34 | output_dim=512, 35 | context_size=5, 36 | stride=1, 37 | dilation=1, 38 | batch_norm=True 39 | ): 40 | ''' 41 | TDNN as defined by https://www.danielpovey.com/files/2015_interspeech_multisplice.pdf 42 | 43 | Affine transformation not applied globally to all frames but smaller windows with local context 44 | 45 | batch_norm: True to include batch normalisation after the non linearity 46 | 47 | Context size and dilation determine the frames selected 48 | (although context size is not really defined in the traditional sense) 49 | For example: 50 | context size 5 and dilation 1 is equivalent to [-2,-1,0,1,2] 51 | context size 3 and dilation 2 is equivalent to [-2, 0, 2] 52 | context size 1 and dilation 1 is equivalent to [0] 53 | ''' 54 | super(TDNN, self).__init__() 55 | self.context_size = context_size 56 | self.stride = stride 57 | self.input_dim = input_dim 58 | self.output_dim = output_dim 59 | self.dilation = dilation 60 | self.padlen = int(dilation * (context_size - 1) / 2) 61 | self.kernel = nn.Linear(input_dim * context_size, output_dim) 62 | self.nonlinearity = nn.ReLU() 63 | self.batch_norm = batch_norm 64 | if batch_norm: 65 | self.bn = nn.BatchNorm1d(output_dim, affine=False) 66 | 67 | def forward(self, x): 68 | ''' 69 | input: size (batch, seq_len, input_features) 70 | output: size (batch, new_seq_len, output_features) 71 | ''' 72 | # print("In forward of TDNN") 73 | batch_size, _, d = tuple(x.shape) 74 | # print("X : ",x.shape) 75 | # print("D = ",d) 76 | # print(self.input_dim) 77 | x = x.unsqueeze(1) 78 | 79 | # Unfold input into smaller temporal contexts 80 | # bp() 81 | 82 | x = F.unfold(x, (self.context_size, self.input_dim), stride=(1, self.input_dim), dilation=(self.dilation, 1)) 83 | 84 | # N, output_dim*context_size, new_t = x.shape 85 | x = x.transpose(1, 86 | 2) # .reshape(-1,self.context_size, self.input_dim).flip(0,1).flip(1,2).flip(1,0,2).reshape(batch_size,-1,self.context_size*self.input_dim) 87 | x = self.kernel(x.float()) 88 | x = self.nonlinearity(x) 89 | 90 | if self.batch_norm: 91 | x = x.transpose(1, 2) 92 | x = self.bn(x) 93 | x = x.transpose(1, 2) 94 | 95 | return x 96 | 97 | 98 | class XVectorNet_ETDNN_12Layer(nn.Module): 99 | def __init__(self, noclasses=13539, pooling_function=torch.std): 100 | super(XVectorNet_ETDNN_12Layer, self).__init__() 101 | self.tdnn1 = TDNN(input_dim=30, output_dim=512, context_size=5, dilation=1) 102 | # self.tdnn1.requires_grad = False 103 | self.tdnn2 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=1) 104 | # self.tdnn2.requires_grad = False 105 | self.tdnn3 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=2) 106 | # self.tdnn3.requires_grad = False 107 | self.tdnn4 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=1) 108 | # self.tdnn4.requires_grad = False 109 | self.tdnn5 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=3) 110 | # self.tdnn5.requires_grad = False 111 | self.tdnn6 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=1) 112 | # self.tdnn6.requires_grad = False 113 | self.tdnn7 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=4) 114 | # self.tdnn7.requires_grad = False 115 | self.tdnn8 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=1) 116 | # self.tdnn8.requires_grad = False 117 | self.tdnn9 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=1) 118 | # self.tdnn9.requires_grad = False 119 | self.tdnn10 = TDNN(input_dim=512, output_dim=1500, context_size=1, dilation=1) 120 | # self.tdnn10.requires_grad = False 121 | self.pooling_function = pooling_function 122 | self.lin11 = nn.Linear(3000, 512) 123 | self.bn11 = nn.BatchNorm1d(num_features=512, affine=False) 124 | self.bn12 = nn.BatchNorm1d(num_features=512, affine=False) 125 | self.lin12 = nn.Linear(512, 512) 126 | self.finlin = nn.Linear(512, noclasses) 127 | self.smax = nn.Softmax(dim=1) 128 | 129 | 130 | 131 | 132 | 133 | def prestatspool(self, x): 134 | # bp() 135 | x = F.dropout(self.tdnn1(x), p=0.5) 136 | x = F.dropout(self.tdnn2(x), p=0.5) 137 | x = F.dropout(self.tdnn3(x), p=0.5) 138 | x = F.dropout(self.tdnn4(x), p=0.5) 139 | x = F.dropout(self.tdnn5(x), p=0.5) 140 | x = F.dropout(self.tdnn6(x), p=0.5) 141 | x = F.dropout(self.tdnn7(x), p=0.5) 142 | x = F.dropout(self.tdnn8(x), p=0.5) 143 | x = F.dropout(self.tdnn9(x), p=0.5) 144 | x = F.dropout(self.tdnn10(x), p=0.5) 145 | return x 146 | 147 | def statspooling(self, x): 148 | average = x.mean(1) 149 | stddev = self.pooling_function(x,1) # x.std(1) 150 | concatd = torch.cat((average, stddev), 1) 151 | return concatd 152 | 153 | def postpooling(self, x): 154 | x = F.dropout(self.bn11(F.relu(self.lin11(x))), p=0.5) 155 | x = F.dropout(self.bn12(F.relu(self.lin12(x))), p=0.5) 156 | x = F.relu(self.finlin(x)) 157 | return x 158 | 159 | def forward(self, x): 160 | x = x.transpose(1, 2) 161 | # bp() 162 | # print('In forward of XvectorNet') 163 | prepoolout = self.prestatspool(x) 164 | pooledout = self.statspooling(prepoolout) 165 | presoftmax = self.postpooling(pooledout) 166 | finaloutput = self.smax(presoftmax) 167 | return finaloutput 168 | 169 | def extract(self, x): 170 | x = x.transpose(1, 2) 171 | # x = self.prestatspool(x) 172 | x = self.tdnn1.forward(x) 173 | x = self.tdnn2.forward(x) 174 | x = self.tdnn3.forward(x) 175 | x = self.tdnn4.forward(x) 176 | x = self.tdnn5.forward(x) 177 | x = self.tdnn6.forward(x) 178 | x = self.tdnn7.forward(x) 179 | x = self.tdnn8.forward(x) 180 | x = self.tdnn9.forward(x) 181 | x = self.tdnn10.forward(x) 182 | pooledout = self.statspooling(x) 183 | xvec = self.lin11.forward(pooledout) 184 | return xvec 185 | 186 | def LoadFromKaldi(self, weightspath): # Credits: Harsha Varshan 187 | with open(weightspath, 'rb') as f: 188 | kaldiweights = pickle.load(f) 189 | 190 | mdsd = self.state_dict() 191 | 192 | for i in range(1, 11): 193 | mdsd['tdnn{}.kernel.weight'.format(i)].data.copy_( 194 | torch.from_numpy(kaldiweights['tdnn{}.affine'.format(i)]['params']).float()) 195 | mdsd['tdnn{}.kernel.bias'.format(i)].data.copy_( 196 | torch.from_numpy(kaldiweights['tdnn{}.affine'.format(i)]['bias']).float()) 197 | mdsd['tdnn{}.bn.running_mean'.format(i)].data.copy_( 198 | torch.from_numpy(kaldiweights['tdnn{}.batchnorm'.format(i)]['stats-mean']).float()) 199 | mdsd['tdnn{}.bn.running_var'.format(i)].data.copy_( 200 | torch.from_numpy(kaldiweights['tdnn{}.batchnorm'.format(i)]['stats-var']).float()) 201 | 202 | mdsd['lin11.weight'].data.copy_(torch.from_numpy(kaldiweights['tdnn11.affine']['params']).float()) 203 | mdsd['lin11.bias'].data.copy_(torch.from_numpy(kaldiweights['tdnn11.affine']['bias']).float()) 204 | mdsd['bn11.running_mean'].data.copy_(torch.from_numpy(kaldiweights['tdnn11.batchnorm']['stats-mean']).float()) 205 | mdsd['bn11.running_var'].data.copy_(torch.from_numpy(kaldiweights['tdnn11.batchnorm']['stats-var']).float()) 206 | 207 | mdsd['lin12.weight'].data.copy_(torch.from_numpy(kaldiweights['tdnn12.affine']['params']).float()) 208 | mdsd['lin12.bias'].data.copy_(torch.from_numpy(kaldiweights['tdnn12.affine']['bias']).float()) 209 | mdsd['bn12.running_mean'].data.copy_(torch.from_numpy(kaldiweights['tdnn12.batchnorm']['stats-mean']).float()) 210 | mdsd['bn12.running_var'].data.copy_(torch.from_numpy(kaldiweights['tdnn12.batchnorm']['stats-var']).float()) 211 | 212 | mdsd['finlin.weight'].data.copy_(torch.from_numpy(kaldiweights['output.affine']['params']).float()) 213 | mdsd['finlin.bias'].data.copy_(torch.from_numpy(kaldiweights['output.affine']['bias']).float()) 214 | 215 | 216 | class Etdnn_Xvec_NeuralPlda(nn.Module): 217 | def __init__(self, nc): 218 | super(Etdnn_Xvec_NeuralPlda, self).__init__() 219 | if nc.pooling_function=='var': 220 | self.pooling_function = torch.var 221 | else: 222 | self.pooling_function = torch.std 223 | self.xvector_extractor = XVectorNet_ETDNN_12Layer(pooling_function = self.pooling_function) 224 | self.centering_and_LDA = nn.Linear(nc.xvector_dim, nc.layer1_LDA_dim) # Centering, wccn 225 | self.centering_and_wccn_plda = nn.Linear(nc.layer1_LDA_dim, nc.layer2_PLDA_spkfactor_dim) 226 | self.P_sqrt = nn.Parameter(torch.rand(nc.layer2_PLDA_spkfactor_dim, requires_grad=True)) 227 | self.Q = nn.Parameter(torch.rand(nc.layer2_PLDA_spkfactor_dim, requires_grad=True)) 228 | self.threshold = {} 229 | for beta in nc.beta: 230 | self.threshold[beta] = nn.Parameter(0*torch.rand(1, requires_grad=True)) 231 | self.register_parameter("Th{}".format(int(beta)), self.threshold[beta]) 232 | self.threshold_Xent = nn.Parameter(0*torch.rand(1, requires_grad=True)) 233 | self.alpha = torch.tensor(nc.alpha).to(nc.device) 234 | self.beta = nc.beta 235 | self.dropout = nn.Dropout(p=0.5) 236 | self.lossfn = nc.loss 237 | 238 | def train1(self): 239 | self.train() 240 | self.xvector_extractor.tdnn1.bn.training = False 241 | self.xvector_extractor.tdnn2.bn.training = False 242 | self.xvector_extractor.tdnn3.bn.training = False 243 | self.xvector_extractor.tdnn4.bn.training = False 244 | self.xvector_extractor.tdnn5.bn.training = False 245 | self.xvector_extractor.tdnn6.bn.training = False 246 | self.xvector_extractor.tdnn7.bn.training = False 247 | self.xvector_extractor.tdnn8.bn.training = False 248 | self.xvector_extractor.tdnn9.bn.training = False 249 | self.xvector_extractor.tdnn10.bn.training = False 250 | 251 | def extract_plda_embeddings(self, x): 252 | x = self.xvector_extractor.extract(x) 253 | x = self.centering_and_LDA(x) 254 | x = F.normalize(x) 255 | x = self.centering_and_wccn_plda(x) 256 | return x 257 | 258 | def forward_from_plda_embeddings(self,x1,x2): 259 | P = self.P_sqrt * self.P_sqrt 260 | Q = self.Q 261 | S = (x1 * Q * x1).sum(dim=1) + (x2 * Q * x2).sum(dim=1) + 2 * (x1 * P * x2).sum(dim=1) 262 | return S 263 | 264 | def forward(self, x1, x2): 265 | x1 = self.extract_plda_embeddings(x1) 266 | x2 = self.extract_plda_embeddings(x2) 267 | S = self.forward_from_plda_embeddings(x1,x2) 268 | return S 269 | 270 | def softcdet(self, output, target): 271 | sigmoid = nn.Sigmoid() 272 | losses = [((sigmoid(self.alpha * (self.threshold[beta] - output)) * target).sum() / (target.sum()) + beta * (sigmoid(self.alpha * (output - self.threshold[beta])) * (1 - target)).sum() / ((1 - target).sum())) for beta in self.beta] 273 | loss = sum(losses)/len(losses) 274 | return loss 275 | 276 | def crossentropy(self, output, target): 277 | sigmoid = nn.Sigmoid() 278 | loss = F.binary_cross_entropy(sigmoid(output - self.threshold_Xent), target) 279 | return loss 280 | 281 | def loss(self, output, target): 282 | if self.lossfn == 'SoftCdet': 283 | return self.softcdet(output, target) 284 | elif self.lossfn == 'crossentropy': 285 | return self.crossentropy(output, target) 286 | 287 | def cdet(self, output, target): 288 | losses = [((output < self.threshold[beta]).float() * target).sum() / (target.sum()) + beta * ((output > self.threshold[beta]).float() * (1 - target)).sum() / ((1 - target).sum()) for beta in self.beta] 289 | loss = sum(losses)/len(losses) 290 | return loss 291 | 292 | def minc(self, output, target, update_thresholds=False, showplots=False): 293 | scores_target, _ = torch.sort(output[target>0.5]) 294 | scores_nontarget, _ = torch.sort(-output[target<0.5]) 295 | scores_nontarget = -scores_nontarget 296 | pmiss_arr = [arr2val(torch.where(scores_target < i)[0], -1) for i in scores_target] 297 | pmiss = torch.tensor(pmiss_arr).float() / (target.cpu().sum()) 298 | pfa_arr = [arr2val(torch.where(scores_nontarget >= i)[0], -1) for i in scores_target] 299 | pfa = torch.tensor(pfa_arr).float() / ((1-target.cpu()).sum()) 300 | cdet_arr, minc_dict, minc_threshold = {}, {}, {} 301 | for beta in self.beta: 302 | cdet_arr[beta] = pmiss + beta*pfa 303 | minc_dict[beta], thidx = torch.min(cdet_arr[beta], 0) 304 | minc_threshold[beta] = scores_target[thidx] 305 | if update_thresholds: 306 | self.state_dict()["Th{}".format(int(beta))].data.copy_(minc_threshold[beta]) 307 | mincs = list(minc_dict.values()) 308 | minc_avg = sum(mincs)/len(mincs) 309 | if showplots: 310 | plt.figure() 311 | minsc = output.min() 312 | maxsc = output.max() 313 | plt.hist(np.asarray(scores_nontarget), bins=np.linspace(minsc,maxsc,50), alpha=0.5, normed=True) 314 | plt.hist(np.asarray(scores_target), bins=np.linspace(minsc,maxsc,50), alpha=0.5, normed=True) 315 | plt.plot(scores_target, pmiss) 316 | plt.plot(scores_target, pfa) 317 | plt.plot(scores_target, cdet_arr[99]) 318 | plt.plot(scores_target, cdet_arr[199]) 319 | # plt.ylim([0,3]) 320 | # plt.xlim([0,1.4]) 321 | plt.show() 322 | return minc_avg, minc_threshold 323 | 324 | def LoadParamsFromKaldi(self, xvec_etdnn_pickle_file, mean_vec_file, transform_mat_file, PldaFile): 325 | self.xvector_extractor.LoadFromKaldi(xvec_etdnn_pickle_file) 326 | plda = kaldiPlda2numpydict(PldaFile) 327 | transform_mat = np.asarray([w.split() for w in np.asarray( 328 | subprocess.check_output(["copy-matrix", "--binary=false", transform_mat_file, "-"]).decode('utf-8').strip()[ 329 | 2:-2].split('\n'))]).astype(float) 330 | mean_vec = np.asarray( 331 | subprocess.check_output(["copy-vector", "--binary=false", mean_vec_file, "-"]).decode('utf-8').strip()[ 332 | 1:-2].split()).astype(float) 333 | mdsd = self.state_dict() 334 | mdsd['centering_and_LDA.weight'].data.copy_(torch.from_numpy(transform_mat[:, :-1]).float()) 335 | mdsd['centering_and_LDA.bias'].data.copy_( 336 | torch.from_numpy(transform_mat[:, -1] - transform_mat[:, :-1].dot(mean_vec)).float()) 337 | mdsd['centering_and_wccn_plda.weight'].data.copy_(torch.from_numpy(plda['diagonalizing_transform']).float()) 338 | mdsd['centering_and_wccn_plda.bias'].data.copy_( 339 | torch.from_numpy(-plda['diagonalizing_transform'].dot(plda['plda_mean'])).float()) 340 | mdsd['P_sqrt'].data.copy_(torch.from_numpy(np.sqrt(plda['diagP'])).float()) 341 | mdsd['Q'].data.copy_(torch.from_numpy(plda['diagQ']).float()) 342 | 343 | def SaveModel(self, filename): 344 | with open(filename, 'wb') as f: 345 | pickle.dump(self, f) 346 | 347 | 348 | class NeuralPlda(nn.Module): 349 | def __init__(self, nc): 350 | super(NeuralPlda, self).__init__() 351 | self.centering_and_LDA = nn.Linear(nc.xvector_dim, nc.layer1_LDA_dim) # Centering, wccn 352 | self.centering_and_wccn_plda = nn.Linear(nc.layer1_LDA_dim, nc.layer2_PLDA_spkfactor_dim) 353 | self.P_sqrt = nn.Parameter(torch.rand(nc.layer2_PLDA_spkfactor_dim, requires_grad=True)) 354 | self.Q = nn.Parameter(torch.rand(nc.layer2_PLDA_spkfactor_dim, requires_grad=True)) 355 | self.threshold = {} 356 | for beta in nc.beta: 357 | self.threshold[beta] = nn.Parameter(0*torch.rand(1, requires_grad=True)) 358 | self.register_parameter("Th{}".format(int(beta)), self.threshold[beta]) 359 | self.threshold_Xent = nn.Parameter(0*torch.rand(1, requires_grad=True)) 360 | self.alpha = torch.tensor(nc.alpha).to(nc.device) 361 | self.beta = nc.beta 362 | self.dropout = nn.Dropout(p=0.5) 363 | self.lossfn = nc.loss 364 | 365 | 366 | def extract_plda_embeddings(self, x): 367 | x = self.centering_and_LDA(x) 368 | x = F.normalize(x) 369 | x = self.centering_and_wccn_plda(x) 370 | return x 371 | 372 | def forward_from_plda_embeddings(self,x1,x2): 373 | P = self.P_sqrt * self.P_sqrt 374 | Q = self.Q 375 | S = (x1 * Q * x1).sum(dim=1) + (x2 * Q * x2).sum(dim=1) + 2 * (x1 * P * x2).sum(dim=1) 376 | return S 377 | 378 | def forward(self, x1, x2): 379 | x1 = self.extract_plda_embeddings(x1) 380 | x2 = self.extract_plda_embeddings(x2) 381 | S = self.forward_from_plda_embeddings(x1,x2) 382 | return S 383 | 384 | def softcdet(self, output, target): 385 | sigmoid = nn.Sigmoid() 386 | losses = [((sigmoid(self.alpha * (self.threshold[beta] - output)) * target).sum() / (target.sum()) + beta * (sigmoid(self.alpha * (output - self.threshold[beta])) * (1 - target)).sum() / ((1 - target).sum())) for beta in self.beta] 387 | loss = sum(losses)/len(losses) 388 | return loss 389 | 390 | def crossentropy(self, output, target): 391 | sigmoid = nn.Sigmoid() 392 | loss = F.binary_cross_entropy(sigmoid(output - self.threshold_Xent), target) 393 | return loss 394 | 395 | def loss(self, output, target): 396 | if self.lossfn == 'SoftCdet': 397 | return self.softcdet(output, target) 398 | elif self.lossfn == 'crossentropy': 399 | return self.crossentropy(output, target) 400 | 401 | def cdet(self, output, target): 402 | losses = [((output < self.threshold[beta]).float() * target).sum() / (target.sum()) + beta * ((output > self.threshold[beta]).float() * (1 - target)).sum() / ((1 - target).sum()) for beta in self.beta] 403 | loss = sum(losses)/len(losses) 404 | return loss 405 | 406 | def minc(self, output, target, update_thresholds=False, showplots=False): 407 | scores_target, _ = torch.sort(output[target>0.5]) 408 | scores_nontarget, _ = torch.sort(-output[target<0.5]) 409 | scores_nontarget = -scores_nontarget 410 | pmiss_arr = [arr2val(torch.where(scores_target < i)[0], -1) for i in scores_target] 411 | pmiss = torch.tensor(pmiss_arr).float() / (target.cpu().sum()) 412 | pfa_arr = [arr2val(torch.where(scores_nontarget >= i)[0], -1) for i in scores_target] 413 | pfa = torch.tensor(pfa_arr).float() / ((1-target.cpu()).sum()) 414 | cdet_arr, minc_dict, minc_threshold = {}, {}, {} 415 | for beta in self.beta: 416 | cdet_arr[beta] = pmiss + beta*pfa 417 | minc_dict[beta], thidx = torch.min(cdet_arr[beta], 0) 418 | minc_threshold[beta] = scores_target[thidx] 419 | if update_thresholds: 420 | self.state_dict()["Th{}".format(int(beta))].data.copy_(minc_threshold[beta]) 421 | mincs = list(minc_dict.values()) 422 | minc_avg = sum(mincs)/len(mincs) 423 | if showplots: 424 | plt.figure() 425 | minsc = output.min() 426 | maxsc = output.max() 427 | plt.hist(np.asarray(scores_nontarget), bins=np.linspace(minsc,maxsc,50), alpha=0.5, normed=True) 428 | plt.hist(np.asarray(scores_target), bins=np.linspace(minsc,maxsc,50), alpha=0.5, normed=True) 429 | plt.plot(scores_target, pmiss) 430 | plt.plot(scores_target, pfa) 431 | plt.plot(scores_target, cdet_arr[99]) 432 | plt.plot(scores_target, cdet_arr[199]) 433 | # plt.ylim([0,3]) 434 | # plt.xlim([0,1.4]) 435 | plt.show() 436 | return minc_avg, minc_threshold 437 | 438 | 439 | 440 | 441 | def LoadPldaParamsFromKaldi(self, mean_vec_file, transform_mat_file, PldaFile): 442 | plda = kaldiPlda2numpydict(PldaFile) 443 | transform_mat = np.asarray([w.split() for w in np.asarray( 444 | subprocess.check_output(["copy-matrix", "--binary=false", transform_mat_file, "-"]).decode('utf-8').strip()[ 445 | 2:-2].split('\n'))]).astype(float) 446 | mean_vec = np.asarray( 447 | subprocess.check_output(["copy-vector", "--binary=false", mean_vec_file, "-"]).decode('utf-8').strip()[ 448 | 1:-2].split()).astype(float) 449 | mdsd = self.state_dict() 450 | mdsd['centering_and_LDA.weight'].data.copy_(torch.from_numpy(transform_mat[:, :-1]).float()) 451 | mdsd['centering_and_LDA.bias'].data.copy_( 452 | torch.from_numpy(transform_mat[:, -1] - transform_mat[:, :-1].dot(mean_vec)).float()) 453 | mdsd['centering_and_wccn_plda.weight'].data.copy_(torch.from_numpy(plda['diagonalizing_transform']).float()) 454 | mdsd['centering_and_wccn_plda.bias'].data.copy_( 455 | torch.from_numpy(-plda['diagonalizing_transform'].dot(plda['plda_mean'])).float()) 456 | mdsd['P_sqrt'].data.copy_(torch.from_numpy(np.sqrt(plda['diagP'])).float()) 457 | mdsd['Q'].data.copy_(torch.from_numpy(plda['diagQ']).float()) 458 | 459 | def SaveModel(self, filename): 460 | with open(filename, 'wb') as f: 461 | pickle.dump(self, f) 462 | 463 | class DPlda(nn.Module): 464 | def __init__(self, nc): 465 | super(DPlda, self).__init__() 466 | self.centering_and_LDA = nn.Linear(nc.xvector_dim, nc.layer1_LDA_dim) # Centering, wccn 467 | self.logistic_regres = nn.Linear(nc.layer1_LDA_dim*nc.layer1_LDA_dim*2+nc.layer1_LDA_dim,1) 468 | self.threshold = {} 469 | for beta in nc.beta: 470 | self.threshold[beta] = nn.Parameter(0*torch.rand(1, requires_grad=True)) 471 | self.register_parameter("Th{}".format(int(beta)), self.threshold[beta]) 472 | self.alpha = torch.tensor(nc.alpha).to(nc.device) 473 | self.beta = nc.beta 474 | self.dropout = nn.Dropout(p=0.5) 475 | self.lossfn = nc.loss 476 | 477 | 478 | def extract_plda_embeddings(self, x): 479 | x = self.centering_and_LDA(x) 480 | x = F.normalize(x) 481 | return x 482 | 483 | def forward_from_plda_embeddings(self,x1,x2): 484 | x_between = torch.bmm(x1.unsqueeze(2), x2.unsqueeze(1)).reshape(x1.shape[0],-1) + torch.bmm(x2.unsqueeze(2), x1.unsqueeze(1)).reshape(x1.shape[0],-1) 485 | x_within = torch.bmm(x1.unsqueeze(2), x1.unsqueeze(1)).reshape(x1.shape[0],-1) + torch.bmm(x2.unsqueeze(2), x2.unsqueeze(1)).reshape(x1.shape[0],-1) 486 | x_sum = x1+x2 487 | x = torch.cat((x_between,x_within,x_sum),dim=1) 488 | S = self.logistic_regres(x)[:,0] 489 | return S 490 | 491 | def forward(self, x1, x2): 492 | x1 = self.extract_plda_embeddings(x1) 493 | x2 = self.extract_plda_embeddings(x2) 494 | S = self.forward_from_plda_embeddings(x1,x2) 495 | return S 496 | 497 | def softcdet(self, output, target): 498 | sigmoid = nn.Sigmoid() 499 | losses = [((sigmoid(self.alpha * (self.threshold[beta] - output)) * target).sum() / (target.sum()) + beta * (sigmoid(self.alpha * (output - self.threshold[beta])) * (1 - target)).sum() / ((1 - target).sum())) for beta in self.beta] 500 | loss = sum(losses)/len(losses) 501 | return loss 502 | 503 | def crossentropy(self, output, target): 504 | sigmoid = nn.Sigmoid() 505 | loss = F.binary_cross_entropy(sigmoid(output), target) 506 | return loss 507 | 508 | def loss(self, output, target): 509 | if self.lossfn == 'SoftCdet': 510 | return self.softcdet(output, target) 511 | elif self.lossfn == 'crossentropy': 512 | return self.crossentropy(output, target) 513 | 514 | def cdet(self, output, target): 515 | losses = [((output < self.threshold[beta]).float() * target).sum() / (target.sum()) + beta * ((output > self.threshold[beta]).float() * (1 - target)).sum() / ((1 - target).sum()) for beta in self.beta] 516 | loss = sum(losses)/len(losses) 517 | return loss 518 | 519 | def minc(self, output, target, update_thresholds=False, showplots=False): 520 | scores_target, _ = torch.sort(output[target>0.5]) 521 | scores_nontarget, _ = torch.sort(-output[target<0.5]) 522 | scores_nontarget = -scores_nontarget 523 | pmiss_arr = [arr2val(torch.where(scores_target < i)[0], -1) for i in scores_target] 524 | pmiss = torch.tensor(pmiss_arr).float() / (target.cpu().sum()) 525 | pfa_arr = [arr2val(torch.where(scores_nontarget >= i)[0], -1) for i in scores_target] 526 | pfa = torch.tensor(pfa_arr).float() / ((1-target.cpu()).sum()) 527 | cdet_arr, minc_dict, minc_threshold = {}, {}, {} 528 | for beta in self.beta: 529 | cdet_arr[beta] = pmiss + beta*pfa 530 | minc_dict[beta], thidx = torch.min(cdet_arr[beta], 0) 531 | minc_threshold[beta] = scores_target[thidx] 532 | if update_thresholds: 533 | self.state_dict()["Th{}".format(int(beta))].data.copy_(minc_threshold[beta]) 534 | mincs = list(minc_dict.values()) 535 | minc_avg = sum(mincs)/len(mincs) 536 | if showplots: 537 | plt.figure() 538 | minsc = output.min() 539 | maxsc = output.max() 540 | plt.hist(np.asarray(scores_nontarget), bins=np.linspace(minsc,maxsc,50), alpha=0.5, normed=True) 541 | plt.hist(np.asarray(scores_target), bins=np.linspace(minsc,maxsc,50), alpha=0.5, normed=True) 542 | plt.plot(scores_target, pmiss) 543 | plt.plot(scores_target, pfa) 544 | plt.plot(scores_target, cdet_arr[99]) 545 | plt.plot(scores_target, cdet_arr[199]) 546 | # plt.ylim([0,3]) 547 | # plt.xlim([0,1.4]) 548 | plt.show() 549 | return minc_avg, minc_threshold 550 | 551 | 552 | 553 | 554 | def LoadParamsFromKaldi(self, mean_vec_file, transform_mat_file): 555 | transform_mat = np.asarray([w.split() for w in np.asarray( 556 | subprocess.check_output(["copy-matrix", "--binary=false", transform_mat_file, "-"]).decode('utf-8').strip()[ 557 | 2:-2].split('\n'))]).astype(float) 558 | mean_vec = np.asarray( 559 | subprocess.check_output(["copy-vector", "--binary=false", mean_vec_file, "-"]).decode('utf-8').strip()[ 560 | 1:-2].split()).astype(float) 561 | mdsd = self.state_dict() 562 | mdsd['centering_and_LDA.weight'].data.copy_(torch.from_numpy(transform_mat[:, :-1]).float()) 563 | mdsd['centering_and_LDA.bias'].data.copy_( 564 | torch.from_numpy(transform_mat[:, -1] - transform_mat[:, :-1].dot(mean_vec)).float()) 565 | 566 | 567 | def SaveModel(self, filename): 568 | with open(filename, 'wb') as f: 569 | pickle.dump(self, f) 570 | 571 | class GaussianBackend(nn.Module): 572 | def __init__(self, nc): 573 | super(GaussianBackend, self).__init__() 574 | self.centering_and_LDA = nn.Linear(nc.xvector_dim, nc.layer1_LDA_dim) 575 | self.centering_and_LDA.weight.requires_grad = False 576 | self.centering_and_LDA.bias.requires_grad = False 577 | self.paired_mean_target = torch.rand(2*nc.layer1_LDA_dim) 578 | self.paired_cov_inv_target = torch.rand(2*nc.layer1_LDA_dim,2*nc.layer1_LDA_dim) 579 | self.paired_mean_nontarget = torch.rand(2*nc.layer1_LDA_dim) 580 | self.paired_cov_inv_nontarget = torch.rand(2*nc.layer1_LDA_dim,2*nc.layer1_LDA_dim) 581 | 582 | 583 | 584 | def forward(self, x1, x2): 585 | x1 = self.centering_and_LDA(x1) #(x1-self.mu)/self.stddev 586 | x2 = self.centering_and_LDA(x2) #(x2-self.mu)/self.stddev 587 | x1 = F.normalize(x1) 588 | x2 = F.normalize(x2) 589 | x = torch.cat((x1,x2),dim=1) 590 | St = (-((x-self.paired_mean_target).mm(self.paired_cov_inv_target))*(x-self.paired_mean_target)).sum(dim=1) 591 | Snt = (-((x-self.paired_mean_nontarget).mm(self.paired_cov_inv_nontarget))*(x-self.paired_mean_nontarget)).sum(dim=1) 592 | S = St - Snt 593 | return S 594 | 595 | def forward_getpaired(self, x1, x2): 596 | x1 = self.centering_and_LDA(x1) #(x1-self.mu)/self.stddev 597 | x2 = self.centering_and_LDA(x2) #(x2-self.mu)/self.stddev 598 | x1 = F.normalize(x1) 599 | x2 = F.normalize(x2) 600 | x = torch.cat((x1,x2),dim=1) 601 | return x 602 | 603 | def softcdet(self, output, target): 604 | sigmoid = nn.Sigmoid() 605 | losses = [((sigmoid(self.alpha * (self.threshold[beta] - output)) * target).sum() / (target.sum()) + beta * (sigmoid(self.alpha * (output - self.threshold[beta])) * (1 - target)).sum() / ((1 - target).sum())) for beta in self.beta] 606 | loss = sum(losses)/len(losses) 607 | return loss 608 | 609 | def crossentropy(self, output, target): 610 | sigmoid = nn.Sigmoid() 611 | loss = F.binary_cross_entropy(sigmoid(output), target) 612 | return loss 613 | 614 | def loss(self, output, target): 615 | if self.lossfn == 'SoftCdet': 616 | return self.softcdet(output, target) 617 | elif self.lossfn == 'crossentropy': 618 | return self.crossentropy(output, target) 619 | 620 | def cdet(self, output, target): 621 | losses = [((output < self.threshold[beta]).float() * target).sum() / (target.sum()) + beta * ((output > self.threshold[beta]).float() * (1 - target)).sum() / ((1 - target).sum()) for beta in self.beta] 622 | loss = sum(losses)/len(losses) 623 | return loss 624 | 625 | def minc(self, output, target, update_thresholds=False, showplots=False): 626 | scores_target, _ = torch.sort(output[target>0.5]) 627 | scores_nontarget, _ = torch.sort(-output[target<0.5]) 628 | scores_nontarget = -scores_nontarget 629 | pmiss_arr = [arr2val(torch.where(scores_target < i)[0], -1) for i in scores_target] 630 | pmiss = torch.tensor(pmiss_arr).float() / (target.cpu().sum()) 631 | pfa_arr = [arr2val(torch.where(scores_nontarget >= i)[0], -1) for i in scores_target] 632 | pfa = torch.tensor(pfa_arr).float() / ((1-target.cpu()).sum()) 633 | cdet_arr, minc_dict, minc_threshold = {}, {}, {} 634 | for beta in self.beta: 635 | cdet_arr[beta] = pmiss + beta*pfa 636 | minc_dict[beta], thidx = torch.min(cdet_arr[beta], 0) 637 | minc_threshold[beta] = scores_target[thidx] 638 | if update_thresholds: 639 | self.state_dict()["Th{}".format(int(beta))].data.copy_(minc_threshold[beta]) 640 | mincs = list(minc_dict.values()) 641 | minc_avg = sum(mincs)/len(mincs) 642 | if showplots: 643 | plt.figure() 644 | minsc = output.min() 645 | maxsc = output.max() 646 | plt.hist(np.asarray(scores_nontarget), bins=np.linspace(minsc,maxsc,50), alpha=0.5, normed=True) 647 | plt.hist(np.asarray(scores_target), bins=np.linspace(minsc,maxsc,50), alpha=0.5, normed=True) 648 | plt.plot(scores_target, pmiss) 649 | plt.plot(scores_target, pfa) 650 | plt.plot(scores_target, cdet_arr[99]) 651 | plt.plot(scores_target, cdet_arr[199]) 652 | # plt.ylim([0,3]) 653 | # plt.xlim([0,1.4]) 654 | plt.show() 655 | return minc_avg, minc_threshold 656 | 657 | def LoadPldaParamsFromKaldi(self, mean_vec_file, transform_mat_file): 658 | transform_mat = np.asarray([w.split() for w in np.asarray(subprocess.check_output(["copy-matrix","--binary=false", transform_mat_file, "-"]).decode('utf-8').strip()[2:-2].split('\n'))]).astype(float) 659 | mean_vec = np.asarray(subprocess.check_output(["copy-vector", "--binary=false", mean_vec_file, "-"]).decode('utf-8').strip()[1:-2].split()).astype(float) 660 | mdsd = self.state_dict() 661 | mdsd['centering_and_LDA.weight'].data.copy_(torch.from_numpy(transform_mat[:,:-1]).float()) 662 | mdsd['centering_and_LDA.bias'].data.copy_(torch.from_numpy(transform_mat[:,-1]-transform_mat[:,:-1].dot(mean_vec)).float()) 663 | 664 | def SaveModel(self, filename): 665 | with open(filename,'wb') as f: 666 | pickle.dump(self,f) -------------------------------------------------------------------------------- /utils/plotting.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Dec 10 15:53:41 2019 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | from textwrap import wrap 12 | 13 | 14 | def grep(l,s): 15 | return [i for i in l if s in i] 16 | 17 | def plot_valid_mincs(logfile, savefile='', nepochs=20): 18 | a = np.genfromtxt(logfile,dtype='str',delimiter=',,,,') 19 | b = grep(a,"Test set: C_min(149):") 20 | losses = [float(w.split()[-1]) for w in b] 21 | train_losses = np.array([l for i,l in enumerate(losses[1:]) if i%3==0]) 22 | val_losses = np.array([l for i,l in enumerate(losses[1:]) if i%3==1]) 23 | sre18_dev_losses = np.array([l for i,l in enumerate(losses[1:]) if i%3==2]) 24 | plt.figure(figsize=(5,3.5)) 25 | plt.plot(train_losses[:nepochs]) 26 | plt.plot(val_losses[:nepochs]) 27 | plt.plot(sre18_dev_losses[:nepochs]) 28 | x1,x2,y1,y2 = plt.axis() 29 | # plt.axis((x1,x2,0,1)) 30 | plt.legend(['Vox Train','Vox unseen val. set','VOiCES Dev']) 31 | plt.xlabel("Epoch #") 32 | plt.ylabel("minDCF") 33 | plt.gcf().subplots_adjust(bottom=0.15) 34 | # plt.legend(['Train data Cmin','5% Unseen Validation Cmin','VOICES_Dev Cmin']) 35 | # title = '\n'.join(wrap("Plot of C_{min}. "+a[1], 60)) 36 | # plt.title(title) 37 | if savefile: 38 | plt.savefig("{}_minc.pdf".format(savefile)) 39 | 40 | def plot_valid_softcdets(logfile, savefile=''): 41 | a = np.genfromtxt(logfile,dtype='str',delimiter=',,,,') 42 | b = grep(a,"Test set: C_mdl(149):") 43 | losses = [float(w.split()[-1]) for w in b] 44 | train_losses = np.array([l for i,l in enumerate(losses[1:]) if i%3==0]) 45 | val_losses = np.array([l for i,l in enumerate(losses[1:]) if i%3==1]) 46 | sre18_dev_losses = np.array([l for i,l in enumerate(losses[1:]) if i%3==2]) 47 | plt.figure(figsize=(8,8)) 48 | plt.plot(train_losses) 49 | plt.plot(val_losses) 50 | plt.plot(sre18_dev_losses) 51 | x1,x2,y1,y2 = plt.axis() 52 | plt.axis((x1,x2,0,1)) 53 | plt.legend(['Train data Cdet loss','5% Unseen Validation Cdet loss','SRE 2018 Cdet loss']) 54 | title = '\n'.join(wrap("Plot of C_{det} computed at model threshold (Used for backprop). "+a[1],60)) 55 | plt.title(title) 56 | if savefile: 57 | plt.savefig("{}_cdet.png".format(savefile)) 58 | 59 | def plot_thresholds(logfile,threshold_file, savefile=''): 60 | a = np.genfromtxt(threshold_file) 61 | b = np.genfromtxt(logfile,dtype='str',delimiter=',,,,') 62 | x = np.linspace(0,30,len(a)) 63 | # try: 64 | plt.figure(figsize=(8,8)) 65 | plt.plot(x,a[:,2:4]) 66 | plt.plot(x,a[:,5]) 67 | plt.plot(x,a[:,8]) 68 | plt.legend(["Model Threshold1","Model Threshold2","MinC Threshold1","MinC Threshold2"]) 69 | title = '\n'.join(wrap("Plot of MinC Thresholds for training data. "+b[1],60)) 70 | plt.title(title) 71 | if savefile: 72 | plt.savefig("{}_th_train.png".format(savefile)) 73 | plt.figure(figsize=(8,8)) 74 | plt.plot(x,a[:,2:4]) 75 | plt.plot(x,a[:,6]) 76 | plt.plot(x,a[:,9]) 77 | plt.legend(["Model Threshold1","Model Threshold2","MinC Threshold1","MinC Threshold2"]) 78 | title = '\n'.join(wrap("Plot of MinC Thresholds for 5% unseen data. "+b[1],60)) 79 | plt.title(title) 80 | if savefile: 81 | plt.savefig("{}_th_unseen.png".format(savefile)) 82 | plt.figure(figsize=(8,8)) 83 | plt.plot(x,a[:,2:4]) 84 | plt.plot(x,a[:,7]) 85 | plt.plot(x,a[:,10]) 86 | plt.legend(["Model Threshold1","Model Threshold2","MinC Threshold1","MinC Threshold2"]) 87 | title = '\n'.join(wrap("Plot of MinC Thresholds for SRE18 dev data. "+b[1],60)) 88 | plt.title(title) 89 | if savefile: 90 | plt.savefig("{}_th_sre18dev.png".format(savefile)) 91 | # except: 92 | # print("Sh*t happened") 93 | 94 | def generate_plots(): 95 | logfiles = np.genfromtxt('logs/logs_27122019',dtype='str',delimiter=',,,,') 96 | thresholds_files = np.genfromtxt('logs/thresholds_27122019',dtype='str',delimiter=',,,,') 97 | for l,t in zip(logfiles,thresholds_files): 98 | logfile = 'logs/'+l 99 | threshold_file = 'logs/'+t 100 | savefilename = "plots/plt_{}".format(threshold_file.split('_')[-1]) 101 | try: 102 | plot_valid_softcdets(logfile, savefile=savefilename) 103 | except: 104 | print("plot_valid_softcdets failed for {}, {}.".format(logfile,threshold_file)) 105 | try: 106 | plot_valid_mincs(logfile, savefile=savefilename) 107 | except: 108 | print("plot_valid_mincs failed for {}, {}.".format(logfile,threshold_file)) 109 | try: 110 | plot_thresholds(logfile,threshold_file, savefile=savefilename) 111 | except: 112 | print("plot_thresholds failed for {}, {}.".format(logfile,threshold_file)) -------------------------------------------------------------------------------- /utils/score_calibration.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Feb 1 15:17:04 2020 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | import numpy as np 10 | import scipy.stats as P 11 | import os 12 | from pdb import set_trace as bp 13 | 14 | def calibrate_train(train_scores, train_labels): 15 | scores_target = train_scores[(train_labels=='target') + (train_labels=='tgt')] 16 | scores_non_target = train_scores[(train_labels=='nontarget') + (train_labels=='imp')] 17 | t_mean = np.mean(scores_target) 18 | nt_mean = np.mean(scores_non_target) 19 | t_std = np.std(scores_target) 20 | nt_std = np.std(scores_non_target) 21 | calib_mdl = dict() 22 | calib_mdl['tgt'] = P.norm(t_mean, t_std) 23 | calib_mdl['imp'] = P.norm(nt_mean, nt_std) 24 | return calib_mdl 25 | 26 | def calibrate_apply(scores, calib_mdl): 27 | scores_calibrated = calib_mdl['tgt'].logpdf(scores) - calib_mdl['imp'].logpdf(scores) 28 | return scores_calibrated 29 | 30 | dev_score_file = '/home/data2/SRE2019/prashantk/voxceleb/v3/exp/xvector_nnet_2a/scores/scores_sre18_dev_kaldiplda_xvectors_swbd_sre_mx6_before_norm.tsv' 31 | dev_key_file = '/home/data/SRE2019/LDC2019E59/dev/docs/sre18_dev_trial_key.tsv' 32 | 33 | dev_score_files = ['/home/data1/prachis/SRE_19/Focal/fusionsre19_BG/Fusedfinal_sre18_eval_test_score_asnorm1.tsv'] 34 | score_files_list = ['/home/data1/prachis/SRE_19/Focal/fusionsre19_BG/Fusedfinal_sre19_eval_test_score_asnorm1.tsv'] #, #dev #Eval score files, cohort score files, etc. 35 | 36 | float_formatter = "{:.5f}".format 37 | np.set_printoptions(formatter={'float_kind':float_formatter}) 38 | 39 | for f in range(len(score_files_list)): 40 | scores = np.genfromtxt(dev_score_files[f], dtype='str', 41 | skip_header=1)[:,-1].astype(float) 42 | dev_key = np.genfromtxt(dev_key_file, dtype='str', skip_header=1)[:,3] 43 | 44 | calib_mdl = calibrate_train(scores, dev_key) 45 | score_tsv = np.genfromtxt(score_files_list[f], dtype='str',skip_header=0) 46 | scores = score_tsv[1:,-1].astype(float) #or score_tsv[:,-1].astype(float) ## (if there is no header) 47 | scores_calibrated = calibrate_apply(scores, calib_mdl) 48 | scores_calibrated_1 = ['{:f}'.format(item) for item in scores_calibrated] 49 | score_tsv[1:,-1] = scores_calibrated_1#.astype("%.5f") 50 | save_filename = '_calibrated'.join(os.path.splitext(score_files_list[f])) 51 | np.savetxt(save_filename, score_tsv, fmt='%s', delimiter='\t',comments='') 52 | -------------------------------------------------------------------------------- /utils/scorefile_generator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Feb 26 16:26:30 2020 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | import re 10 | import numpy as np 11 | import random 12 | import sys 13 | import subprocess 14 | import pickle 15 | import os 16 | import torch 17 | from torch.utils.data import TensorDataset, DataLoader, ConcatDataset, Subset 18 | import kaldi_io 19 | from pdb import set_trace as bp 20 | from utils.sv_trials_loaders import load_xvec_trials_from_idbatch 21 | 22 | def generate_sre_scores(score_filename, trials_file, mega_dict, model, device, batch_size = 102400): 23 | # To reduce memory usage on CPU, scores are generated in batches and then concatenated 24 | 25 | model = model.to(torch.device('cpu')) 26 | trials = np.genfromtxt(trials_file, dtype='str') 27 | header = '\t'.join(trials[0]) + '\tLLR' 28 | trials = trials[1:] 29 | iters = len(trials) // batch_size 30 | S = torch.tensor([]) 31 | model = model.eval() 32 | with torch.no_grad(): 33 | for i in range(iters+1): 34 | x1_b, x2_b = load_xvec_trials_from_idbatch(mega_dict, trials[i * batch_size:i * batch_size + batch_size], device=torch.device('cpu')) 35 | S_b = model.forward(x1_b, x2_b) 36 | S = torch.cat((S, S_b)) 37 | scores = np.asarray(S.detach()).astype(str) 38 | np.savetxt(score_filename, np.c_[trials, scores], header=header, fmt='%s', delimiter='\t', comments='') 39 | model = model.to(device) 40 | 41 | def generate_voices_scores(score_filename, trials_file, mega_dict, model, device, batch_size = 102400): 42 | # To reduce memory usage on CPU, scores are generated in batches and then concatenated 43 | 44 | model = model.to(torch.device('cpu')) 45 | trials = np.genfromtxt(trials_file, dtype='str')[:,:2] 46 | iters = len(trials) // batch_size 47 | S = torch.tensor([]) 48 | model = model.eval() 49 | with torch.no_grad(): 50 | for i in range(iters+1): 51 | x1_b, x2_b = load_xvec_trials_from_idbatch(mega_dict, trials[i * batch_size:i * batch_size + batch_size], device=torch.device('cpu')) 52 | S_b = model.forward(x1_b, x2_b) 53 | S = torch.cat((S, S_b)) 54 | scores = np.asarray(S.detach()).astype(str) 55 | np.savetxt(score_filename, np.c_[trials, scores], fmt='%s', delimiter='\t', comments='') 56 | model = model.to(device) -------------------------------------------------------------------------------- /utils/sv_trials_loaders.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Feb 17 12:15:48 2020 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | import re 10 | import numpy as np 11 | import random 12 | import sys 13 | import subprocess 14 | import pickle 15 | import os 16 | import torch 17 | from torch.utils.data import TensorDataset, DataLoader, ConcatDataset, Subset 18 | import kaldi_io 19 | from pdb import set_trace as bp 20 | 21 | 22 | class TrialSampler: 23 | def __init__(self, spk2utt_file, batch_size, num_spks_per_batch, mega_scp_dict, id_to_num_dict): 24 | with open(spk2utt_file) as f: 25 | self.spk2utt_list = f.readlines() 26 | random.shuffle(self.spk2utt_list) 27 | self.batch_size = batch_size 28 | self.num_spks_per_batch = num_spks_per_batch 29 | self.num_utts_per_spk = batch_size/num_spks_per_batch 30 | self.n_repeats_tgt = int(0.7*(self.num_utts_per_spk - 1)) 31 | self. n_repeats_imp = int(0.7 * self.num_utts_per_spk * (self.num_spks_per_batch - 1)) 32 | self.mega_scp_dict = mega_scp_dict 33 | self.id_to_num_dict = id_to_num_dict 34 | self.spk2utt_dict = {} 35 | 36 | def spk2utt_dict_from_list(self): 37 | for x in self.spk2utt_list: 38 | a = np.asarray(x.rstrip('\n').split(' ', 1)[1].split(' ')) 39 | random.shuffle(a) 40 | self.spk2utt_dict[x.split(' ', 1)[0]] = np.array_split(a, np.ceil(a.shape[0] / self.num_utts_per_spk)) 41 | 42 | def check_spk2utt_dict(self): 43 | keys_to_remove=[] 44 | for k, v in self.spk2utt_dict.items(): 45 | if len(self.spk2utt_dict[k]) == 0: 46 | keys_to_remove.append(k) 47 | if len(keys_to_remove) > 0: 48 | for k in keys_to_remove: 49 | del self.spk2utt_dict[k] 50 | return len(self.spk2utt_dict.keys()) > self.num_spks_per_batch 51 | 52 | def get_batch(self): 53 | spk2utt_keys = list(self.spk2utt_dict.keys()) 54 | random.shuffle(spk2utt_keys) 55 | keys_to_sample = spk2utt_keys[:self.num_spks_per_batch] 56 | sampled_keys_utts_per_spk = [] 57 | diff_speaker_spk2utt_dict = {} 58 | for x in keys_to_sample: 59 | sampled_keys_utts_per_spk.append(self.spk2utt_dict[x][0]) 60 | diff_speaker_spk2utt_dict[x] = self.spk2utt_dict[x][0] 61 | del self.spk2utt_dict[x][0] 62 | t1, t2 = same_speaker_list(sampled_keys_utts_per_spk, self.mega_scp_dict, self.id_to_num_dict, n_repeats=self.n_repeats_tgt) 63 | nt1, nt2 = diff_speaker_list(diff_speaker_spk2utt_dict, self.mega_scp_dict, self.id_to_num_dict, n_repeats=self.n_repeats_imp) 64 | targets = torch.ones(t1.size()) 65 | non_targets = torch.zeros(nt1.size()) 66 | d1, d2, labels = torch.cat((t1,nt1)).float(), torch.cat((t2,nt2)).float(), torch.cat((targets,non_targets)).float() 67 | return d1, d2, labels 68 | 69 | 70 | def load_epoch(self): 71 | self.spk2utt_dict_from_list() 72 | epoch_data = [] 73 | while self.check_spk2utt_dict(): 74 | epoch_data.append(self.get_batch()) 75 | return epoch_data 76 | 77 | 78 | def custom_loader_e2e(data_spk2utt_list, mega_scp_dict, id_to_num_dict, batch_size=64, num_spks_per_batch=4): 79 | mega_loader = [] 80 | if type(data_spk2utt_list) == str: 81 | data_spk2utt_list = np.genfromtxt(data_spk2utt_list, dtype='str') 82 | if data_spk2utt_list.ndim == 2: 83 | data_spk2utt_list = data_spk2utt_list[:, 0] 84 | else: 85 | raise("Something wrong here.") 86 | for spk2utt_file in data_spk2utt_list: 87 | ts = TrialSampler(spk2utt_file, batch_size, num_spks_per_batch, mega_scp_dict, id_to_num_dict) 88 | mega_loader.extend(ts.load_epoch()) 89 | random.shuffle(mega_loader) 90 | return mega_loader 91 | 92 | def custom_loader_e2e_v2(nc, mega_scp_dict, id_to_num_dict): 93 | mega_loader = [] 94 | for spk2utt_file in nc.train_spk2utt_list: 95 | for num_spks_per_batch in range(nc.min_num_spks_per_batch, nc.max_num_spks_per_batch+1): 96 | ts = TrialSampler(spk2utt_file, nc.batch_size, num_spks_per_batch, mega_scp_dict, id_to_num_dict) 97 | mega_loader.extend(ts.load_epoch()) 98 | random.shuffle(mega_loader) 99 | return mega_loader 100 | 101 | def same_speaker_list(utts_per_spk, combined_scp_dict, id_to_num_dict, n_repeats=1): 102 | d1,d2 = [], [] 103 | for repeats in range(n_repeats): 104 | for utts in utts_per_spk: 105 | utts_shuffled = list(utts.copy()) 106 | random.shuffle(utts_shuffled) 107 | while len(utts_shuffled) >= 2: 108 | tmp1 = utts_shuffled.pop() 109 | if tmp1 not in combined_scp_dict: 110 | continue 111 | tmp2 = utts_shuffled.pop() 112 | if tmp2 not in combined_scp_dict: 113 | continue 114 | d1.append(id_to_num_dict[tmp1]) 115 | d2.append(id_to_num_dict[tmp2]) 116 | d1, d2 = torch.tensor(d1), torch.tensor(d2) 117 | return d1, d2 118 | 119 | 120 | def diff_speaker_list(spk2utt_dict, combined_scp_dict, id_to_num_dict, n_repeats=1): 121 | spk2utt_keys = list(spk2utt_dict.keys()) 122 | utt2spk = [] 123 | for i in spk2utt_keys: 124 | for j in spk2utt_dict[i]: 125 | utt2spk.append([j, i]) 126 | d1, d2 = [], [] 127 | for repeats in range(n_repeats): 128 | utt2spk_list = list(utt2spk) 129 | random.shuffle(utt2spk_list) 130 | i = 0 131 | while len(utt2spk_list) >= 2: 132 | if utt2spk_list[-1][1] != utt2spk_list[-2][1]: 133 | tmp1 = utt2spk_list.pop() 134 | if list(tmp1)[0] not in combined_scp_dict: 135 | continue 136 | tmp2 = utt2spk_list.pop() 137 | if list(tmp2)[0] not in combined_scp_dict: 138 | continue 139 | d1.append(id_to_num_dict[list(tmp1)[0]]) 140 | d2.append(id_to_num_dict[list(tmp2)[0]]) 141 | i = 0 142 | else: 143 | i = i + 1 144 | random.shuffle(utt2spk_list) 145 | if i == 50: 146 | break 147 | d1, d2 = torch.tensor(d1), torch.tensor(d2) 148 | return d1, d2 149 | 150 | 151 | def make_same_speaker_list(spk2utt_file, combined_scp_dict, n_repeats=1, train_and_valid=False,train_ratio=0.95): 152 | # print("In same speaker list") 153 | assert train_ratio < 1, "train_ratio should be less than 1." 154 | with open(spk2utt_file) as f: 155 | spk2utt_list = f.readlines() 156 | random.seed(2) 157 | random.shuffle(spk2utt_list) 158 | uttsperspk = [(a.rstrip('\n').split(' ', 1)[1]).split(' ') for a in spk2utt_list] 159 | 160 | train_uttsperspk = uttsperspk[:int(train_ratio * len(uttsperspk))] 161 | train_same_speaker_list = [] 162 | for repeats in range(n_repeats): 163 | for utts in train_uttsperspk: 164 | utts_shuffled = utts.copy() 165 | random.shuffle(utts_shuffled) 166 | while len(utts_shuffled) >= 2: 167 | tmp1 = utts_shuffled.pop() 168 | if tmp1 not in combined_scp_dict: 169 | continue 170 | tmp2 = utts_shuffled.pop() 171 | if tmp2 not in combined_scp_dict: 172 | continue 173 | train_same_speaker_list.append([tmp1, tmp2]) 174 | train_same_speaker_list = np.asarray(train_same_speaker_list) 175 | 176 | valid_uttsperspk = uttsperspk[int((train_ratio) * len(uttsperspk)):] 177 | valid_same_speaker_list = [] 178 | for repeats in range(n_repeats): 179 | for utts in valid_uttsperspk: 180 | utts_shuffled = utts.copy() 181 | random.shuffle(utts_shuffled) 182 | while len(utts_shuffled) >= 2: 183 | tmp1 = utts_shuffled.pop() 184 | if tmp1 not in combined_scp_dict: 185 | continue 186 | tmp2 = utts_shuffled.pop() 187 | if tmp2 not in combined_scp_dict: 188 | continue 189 | valid_same_speaker_list.append([tmp1, tmp2]) 190 | valid_same_speaker_list = np.asarray(valid_same_speaker_list) 191 | 192 | return train_same_speaker_list, valid_same_speaker_list 193 | 194 | if train_and_valid: # Returns two lists for training and validation 195 | return train_same_speaker_list, valid_same_speaker_list 196 | else: 197 | return train_same_speaker_list + valid_same_speaker_list 198 | 199 | 200 | def make_diff_speaker_list(spk2utt_file, combined_scp_dict, n_repeats=1, train_and_valid=True, train_ratio=0.95): 201 | # print("In diff speaker list") 202 | assert train_ratio < 1, "train_ratio should be less than 1." 203 | with open(spk2utt_file) as f: 204 | spk2utt_list = f.readlines() 205 | random.seed(2) 206 | random.shuffle(spk2utt_list) 207 | spk2utt_dict = {x.split(' ', 1)[0]: x.rstrip('\n').split(' ', 1)[1].split(' ') for x in spk2utt_list} 208 | spk2utt_keys = list(spk2utt_dict.keys()) 209 | train_keys = spk2utt_keys[:int(train_ratio * len(spk2utt_keys))] 210 | valid_keys = spk2utt_keys[int(train_ratio * len(spk2utt_keys)):] 211 | utt2spk_train = [] 212 | utt2spk_valid = [] 213 | for i in train_keys: 214 | for j in spk2utt_dict[i]: 215 | utt2spk_train.append([j, i]) 216 | for i in valid_keys: 217 | for j in spk2utt_dict[i]: 218 | utt2spk_valid.append([j, i]) 219 | 220 | train_diff_speaker_list = [] 221 | for repeats in range(n_repeats): 222 | utt2spk_list = list(utt2spk_train) 223 | random.shuffle(utt2spk_list) 224 | i = 0 225 | while len(utt2spk_list) >= 2: 226 | if utt2spk_list[-1][1] != utt2spk_list[-2][1]: 227 | tmp1 = utt2spk_list.pop() 228 | if list(tmp1)[0] not in combined_scp_dict: 229 | continue 230 | tmp2 = utt2spk_list.pop() 231 | if list(tmp2)[0] not in combined_scp_dict: 232 | continue 233 | train_diff_speaker_list.append([list(tmp1)[0], list(tmp2)[0]]) 234 | i = 0 235 | else: 236 | i = i + 1 237 | random.shuffle(utt2spk_list) 238 | if i == 50: 239 | # bp() 240 | break 241 | 242 | valid_diff_speaker_list = [] 243 | for repeats in range(n_repeats): 244 | utt2spk_list = list(utt2spk_valid) 245 | random.shuffle(utt2spk_list) 246 | i = 0 247 | while len(utt2spk_list) >= 2: 248 | if utt2spk_list[-1][1] != utt2spk_list[-2][1]: 249 | tmp1 = utt2spk_list.pop() 250 | if list(tmp1)[0] not in combined_scp_dict: 251 | continue 252 | tmp2 = utt2spk_list.pop() 253 | if list(tmp2)[0] not in combined_scp_dict: 254 | continue 255 | valid_diff_speaker_list.append([list(tmp1)[0], list(tmp2)[0]]) 256 | i = 0 257 | else: 258 | i = i + 1 259 | random.shuffle(utt2spk_list) 260 | if i == 50: 261 | # bp() 262 | break 263 | train_diff_speaker_list = np.asarray(train_diff_speaker_list) 264 | valid_diff_speaker_list = np.asarray(valid_diff_speaker_list) 265 | 266 | if train_and_valid: # Returns two lists for training and validation 267 | return train_diff_speaker_list, valid_diff_speaker_list 268 | else: 269 | return train_diff_speaker_list + valid_diff_speaker_list 270 | 271 | 272 | def generate_train_trial_keys(data_spk2utt_list, xvector_scp_list, train_and_valid=True, train_ratio=0.95): 273 | 274 | # Make sure that each spk2utt in data_spk2utt_list is of same gender, same source, same language, etc. More Matching Metadata --> Better the model training. 275 | 276 | # Can also specify the num_repeats after the dir name followed with space/tab separation in 2 column format. If not specified, default num_repeats is set to 1. 277 | 278 | xvector_scp_combined = {} 279 | 280 | for fx in xvector_scp_list: 281 | with open(fx) as f: 282 | scp_list = f.readlines() 283 | scp_dict = {os.path.splitext(os.path.basename(x.split(' ', 1)[0]))[0]: x.rstrip('\n').split(' ', 1)[1] for x in scp_list} 284 | xvector_scp_combined.update(scp_dict) 285 | 286 | if type(data_spk2utt_list) == str: 287 | data_spk2utt_list = np.genfromtxt(data_spk2utt_list, dtype='str') 288 | 289 | if data_spk2utt_list.ndim == 2: 290 | num_repeats_list = data_spk2utt_list[:, 1].astype(int) 291 | data_spk2utt_list = data_spk2utt_list[:, 0] 292 | elif data_spk2utt_list.ndim == 1: 293 | num_repeats_list = np.ones(len(data_spk2utt_list)).astype(int) 294 | else: 295 | raise("Something wrong here.") 296 | 297 | 298 | sampled_list_train = [] 299 | sampled_list_valid = [] 300 | 301 | for i, d in enumerate(data_spk2utt_list): 302 | # print("In for loop get train dataset") 303 | same_train_list, same_valid_list = make_same_speaker_list(d, xvector_scp_combined, xvector_scp_list, n_repeats = num_repeats_list[i], train_and_valid=True, train_ratio=0.95) 304 | diff_train_list, diff_valid_list = make_diff_speaker_list(d, xvector_scp_combined, n_repeats = num_repeats_list[i], train_and_valid=True, train_ratio=0.95) 305 | # bp() 306 | zeros = np.zeros((diff_train_list.shape[0], 1)).astype(int) 307 | ones = np.ones((same_train_list.shape[0], 1)).astype(int) 308 | same_list_with_label_train = np.concatenate((same_train_list, ones), axis=1) 309 | diff_list_with_label_train = np.concatenate((diff_train_list, zeros), axis=1) 310 | zeros = np.zeros((diff_valid_list.shape[0], 1)).astype(int) 311 | ones = np.ones((same_valid_list.shape[0], 1)).astype(int) 312 | same_list_with_label_valid = np.concatenate((same_valid_list, ones), axis=1) 313 | diff_list_with_label_valid = np.concatenate((diff_valid_list, zeros), axis=1) 314 | concat_pair_list_train = np.concatenate((same_list_with_label_train, diff_list_with_label_train)) 315 | concat_pair_list_valid = np.concatenate((same_list_with_label_valid, diff_list_with_label_valid)) 316 | 317 | np.random.shuffle(concat_pair_list_train) 318 | sampled_list_train.extend(concat_pair_list_train) 319 | 320 | np.random.shuffle(concat_pair_list_valid) 321 | sampled_list_valid.extend(concat_pair_list_valid) 322 | 323 | if train_and_valid: 324 | return sampled_list_train, sampled_list_valid 325 | else: 326 | return sampled_list_train + sampled_list_valid 327 | 328 | def save_unique_train_valid_xvector_scps(data_spk2utt_list, xvector_scp_list, train_scp_path, valid_scp_path, train_ratio=0.95): 329 | if type(data_spk2utt_list) == str: 330 | data_spk2utt_list = np.genfromtxt(data_spk2utt_list, dtype='str') 331 | 332 | if data_spk2utt_list.ndim == 2: 333 | data_spk2utt_list = data_spk2utt_list[:, 0] 334 | 335 | xvector_scp_combined = {} 336 | 337 | for fx in xvector_scp_list: 338 | with open(fx) as f: 339 | scp_list = f.readlines() 340 | scp_dict = {x.split(' ', 1)[0]: x.rstrip('\n').split(' ', 1)[1] for x in scp_list} 341 | xvector_scp_combined.update(scp_dict) 342 | 343 | train_scp = [] 344 | valid_scp = [] 345 | # bp() 346 | for i, d in enumerate(data_spk2utt_list): 347 | with open(d) as f: 348 | spk2utt_list = f.readlines() 349 | random.seed(2) 350 | random.shuffle(spk2utt_list) 351 | spk2utt_dict = {x.split(' ', 1)[0]: x.rstrip('\n').split(' ', 1)[1].split(' ') for x in spk2utt_list} 352 | spks = list(spk2utt_dict.keys()) 353 | train_keys = spks[:int(train_ratio * len(spks))] 354 | valid_keys = spks[int(train_ratio * len(spks)):] 355 | # bp() 356 | for i in train_keys: 357 | for j in spk2utt_dict[i]: 358 | if j in xvector_scp_combined: 359 | train_scp.append([j, xvector_scp_combined[j]]) 360 | for i in valid_keys: 361 | for j in spk2utt_dict[i]: 362 | if j in xvector_scp_combined: 363 | valid_scp.append([j, xvector_scp_combined[j]]) 364 | train_scp = np.asarray(train_scp) 365 | valid_scp = np.asarray(valid_scp) 366 | subprocess.call(['mkdir','-p',os.path.dirname(train_scp_path)]) 367 | subprocess.call(['mkdir','-p',os.path.dirname(valid_scp_path)]) 368 | np.savetxt(train_scp_path, train_scp, fmt='%s', delimiter=' ', comments='') 369 | np.savetxt(valid_scp_path, valid_scp, fmt='%s', delimiter=' ', comments='') 370 | 371 | def combine_trials_and_get_loader(trials_key_files_list, id_to_num_dict, subsample_factors=None, batch_size=2048, subset=0): 372 | if subsample_factors is None: 373 | subsample_factors = [1 for w in trials_key_files_list] 374 | datasets = [] 375 | for f, sf in zip(trials_key_files_list, subsample_factors): 376 | t = np.genfromtxt(f, dtype = 'str') 377 | x1, x2, l = [], [], [] 378 | for tr in t: 379 | try: 380 | a, b, c = id_to_num_dict[tr[0]], id_to_num_dict[tr[1]], float(tr[2]) 381 | x1.append(a); x2.append(b); l.append(c) 382 | except: 383 | pass 384 | tdset = TensorDataset(torch.tensor(x1),torch.tensor(x2),torch.tensor(l)) 385 | inds = np.arange(len(tdset))[np.random.rand(len(tdset)) 0: 389 | inds = np.arange(len(combined_dataset))[np.random.rand(len(combined_dataset)) 0: 411 | inds = np.arange(len(dataset))[np.random.rand(len(dataset)) all_losses[-2]) and (all_losses[-2] > all_losses[-3]): 185 | nc.lr = nc.lr / 2 186 | print("REDUCING LEARNING RATE to {} since loss trend looks like {}".format(nc.lr, all_losses[-3:])) 187 | logging.info("REDUCING LEARNING RATE to {} since loss trend looks like {}".format(nc.lr, all_losses[-3:])) 188 | optimizer = optim.Adam(model.parameters(), lr=nc.lr, weight_decay=1e-5) 189 | except: 190 | pass 191 | 192 | # %% __main__ 193 | 194 | if __name__ == '__main__': 195 | main_kaldiplda() -------------------------------------------------------------------------------- /xvector_GaussianBackend_pytorch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Oct 3 21:15:54 2019 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import torch.optim as optim 14 | import numpy as np 15 | import os 16 | import sys 17 | import random 18 | import pickle 19 | import subprocess 20 | from utils.NpldaConf import NpldaConf 21 | from pdb import set_trace as bp 22 | from utils.sv_trials_loaders import combine_trials_and_get_loader, get_trials_loaders_dict, load_xvec_from_numbatch, load_xvec_from_idbatch 23 | 24 | from datetime import datetime 25 | import logging 26 | 27 | from utils.models import GaussianBackend 28 | 29 | 30 | def train(nc, model, train_loader, mega_xvec_dict, num_to_id_dict): 31 | model.eval() 32 | target_sum = torch.zeros(model.paired_mean_target.shape) 33 | non_target_sum = torch.zeros(model.paired_mean_target.shape) 34 | target_sq_sum = torch.zeros(model.paired_cov_inv_target.shape) 35 | non_target_sq_sum = torch.zeros(model.paired_cov_inv_target.shape) 36 | target_count = 0 37 | non_target_count = 0 38 | with torch.no_grad(): 39 | for data1, data2, target in train_loader: 40 | data1_xvec, data2_xvec = load_xvec_from_numbatch(mega_xvec_dict, num_to_id_dict, data1, data2, device=torch.device('cpu')) 41 | x = model.forward_getpaired(data1,data2) 42 | 43 | target_count += target.sum().item() 44 | if target.sum().item() >= 0.5: 45 | target_sum += x[target>0.5].sum(dim=0) 46 | target_sq_sum += x[target>0.5].t() @ x[target>0.5] 47 | 48 | non_target_count += (1-target).sum().item() 49 | if (1-target).sum().item() >= 0.5: 50 | non_target_sum += x[target<0.5].sum(dim=0) 51 | non_target_sq_sum += x[target<0.5].t() @ x[target<0.5] 52 | model.paired_mean_target = target_sum/target_count 53 | model.paired_cov_inv_target = torch.inverse(target_sq_sum/target_count - (model.paired_mean_target[:,np.newaxis] @ model.paired_mean_target[np.newaxis,:])) 54 | model.paired_mean_nontarget = non_target_sum/(non_target_count-1) 55 | model.paired_cov_inv_nontarget = torch.inverse(non_target_sq_sum/(non_target_count-1)- (model.paired_mean_nontarget[:,np.newaxis] @ model.paired_mean_nontarget[np.newaxis,:])) 56 | return model 57 | 58 | 59 | def validate(nc, model, data_loader, mega_xvec_dict, num_to_id_dict, device=torch.device('cpu')): 60 | model.eval() 61 | with torch.no_grad(): 62 | targets, scores = torch.tensor([]).to(device), torch.tensor([]).to(device) 63 | for data1, data2, target in data_loader: 64 | data1, data2, target = data1.to(device), data2.to(device), target.to(device) 65 | data1_xvec, data2_xvec = load_xvec_from_numbatch(mega_xvec_dict, num_to_id_dict, data1, data2, 66 | device) 67 | targets = torch.cat((targets, target)) 68 | scores_batch = model.forward(data1_xvec, data2_xvec) 69 | scores = torch.cat((scores, scores_batch)) 70 | soft_cdet_loss = model.softcdet(scores, targets) 71 | cdet_mdl = model.cdet(scores, targets) 72 | minc, minc_threshold = model.minc(scores, targets) 73 | 74 | logging.info('\n\nTest set: C_det (mdl): {:.4f}\n'.format(cdet_mdl)) 75 | logging.info('Test set: soft C_det (mdl): {:.4f}\n'.format(soft_cdet_loss)) 76 | logging.info('Test set: C_min: {:.4f}\n'.format(minc)) 77 | for beta in nc.beta: 78 | logging.info('Test set: argmin threshold [{}]: {:.4f}\n'.format(beta, minc_threshold[beta])) 79 | 80 | print('\n\nTest set: C_det (mdl): {:.4f}\n'.format(cdet_mdl)) 81 | print('Test set: soft C_det (mdl): {:.4f}\n'.format(soft_cdet_loss)) 82 | print('Test set: C_min: {:.4f}\n'.format(minc)) 83 | for beta in nc.beta: 84 | print('Test set: argmin threshold [{}]: {:.4f}\n'.format(beta, minc_threshold[beta])) 85 | 86 | return minc, minc_threshold 87 | 88 | 89 | def main_GB(): 90 | 91 | timestamp = int(datetime.timestamp(datetime.now())) 92 | print(timestamp) 93 | logging.basicConfig(filename='logs/kaldiplda_{}.log'.format(timestamp), 94 | filemode='a', 95 | format='%(levelname)s: %(message)s', 96 | datefmt='%H:%M:%S', 97 | level=logging.DEBUG) 98 | # %% Configure Training 99 | configfile = 'conf/voices_config.cfg' 100 | 101 | nc = NpldaConf(configfile) 102 | 103 | torch.manual_seed(nc.seed) 104 | np.random.seed(nc.seed) 105 | random.seed(nc.seed) 106 | 107 | logging.info("Started at {}.\n\n GAUSSIAN BACKEND \n\n".format(datetime.now())) 108 | 109 | nc.device='cpu' #CPU enough for GB 110 | 111 | print("Running on {}...".format(nc.device)) 112 | logging.info("\nConfiguration:\n\n{}\n\n".format(''.join(open(configfile,'r').readlines()))) 113 | logging.info("Running on {} ...\n".format(nc.device)) 114 | 115 | # %%Load the generated training data trials and make loaders here 116 | 117 | mega_xvec_dict = pickle.load(open(nc.mega_xvector_pkl, 'rb')) 118 | num_to_id_dict = {i: j for i, j in enumerate(list(mega_xvec_dict))} 119 | id_to_num_dict = {v: k for k, v in num_to_id_dict.items()} 120 | 121 | train_loader = combine_trials_and_get_loader(nc.training_data_trials_list, id_to_num_dict, batch_size=nc.batch_size) 122 | 123 | # train_loader_sampled = combine_trials_and_get_loader(nc.training_data_trials_list, id_to_num_dict, batch_size=nc.batch_size, subset=0.05) 124 | 125 | valid_loaders_dict = get_trials_loaders_dict(nc.validation_trials_list, id_to_num_dict, batch_size=5*nc.batch_size) 126 | 127 | model = GaussianBackend() 128 | 129 | train(nc, model, train_loader, mega_xvec_dict, num_to_id_dict) 130 | 131 | for val_set, valid_loader in valid_loaders_dict.items(): 132 | print("Validating {}".format(val_set)) 133 | minc, minc_threshold = validate(nc, model, valid_loader, mega_xvec_dict, num_to_id_dict) 134 | 135 | model.SaveModel("models/GaussianBackend_swbd_sre_mx6.{}.pt".format(timestamp)) 136 | for trial_file in nc.test_trials_list: 137 | print("Generating scores for Gaussian Backend for trial file {}".format(trial_file)) 138 | 139 | nc.generate_scorefile("scores/GaussianBackend_{}_{}.txt".format(os.path.splitext(os.path.basename(trial_file))[0], timestamp), trial_file, mega_xvec_dict, model, nc.device) 140 | 141 | 142 | 143 | if __name__ == '__main__': 144 | main_GB() 145 | -------------------------------------------------------------------------------- /xvector_NeuralPlda_pytorch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Oct 3 21:15:54 2019 5 | 6 | @author: shreyasr, prashantk 7 | """ 8 | 9 | # %% imports and definitions 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | import torch.optim as optim 15 | import numpy as np 16 | import os 17 | import sys 18 | import random 19 | import pickle 20 | import subprocess 21 | from utils.NpldaConf import NpldaConf 22 | from pdb import set_trace as bp 23 | from utils.sv_trials_loaders import combine_trials_and_get_loader, get_trials_loaders_dict, load_xvec_trials_from_numbatch, load_xvec_trials_from_idbatch 24 | 25 | from datetime import datetime 26 | import logging 27 | 28 | from utils.models import NeuralPlda 29 | 30 | def train(nc, model, device, train_loader, mega_xvec_dict, num_to_id_dict, optimizer, epoch, valid_loaders=None): 31 | 32 | model.train() 33 | losses = [] 34 | 35 | for batch_idx, (data1, data2, target) in enumerate(train_loader): 36 | optimizer.zero_grad() 37 | data1, data2, target = data1.to(device), data2.to(device), target.to(device) 38 | data1_xvec, data2_xvec = load_xvec_trials_from_numbatch(mega_xvec_dict, num_to_id_dict, data1, data2, device) 39 | output = model(data1_xvec, data2_xvec) 40 | loss = model.loss(output, target) 41 | losses.append(loss.item()) 42 | loss.backward() 43 | optimizer.step() 44 | 45 | if batch_idx % nc.log_interval == 0: 46 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\t {}: {:.6f}'.format( 47 | epoch, batch_idx * len(data1), len(train_loader.dataset), 48 | 100. * batch_idx / len(train_loader), nc.loss, sum(losses)/len(losses))) 49 | logging.info('Train Epoch: {} [{}/{} ({:.0f}%)]\t {}: {:.6f}'.format( 50 | epoch, batch_idx * len(data1), len(train_loader.dataset), 51 | 100. * batch_idx / len(train_loader), nc.loss, sum(losses)/len(losses))) 52 | losses = [] 53 | 54 | 55 | 56 | def validate(nc, model, device, mega_xvec_dict, num_to_id_dict, data_loader, update_thresholds=False): 57 | model.eval() 58 | with torch.no_grad(): 59 | targets, scores = torch.tensor([]).to(device), torch.tensor([]).to(device) 60 | for data1, data2, target in data_loader: 61 | data1, data2, target = data1.to(device), data2.to(device), target.to(device) 62 | data1_xvec, data2_xvec = load_xvec_trials_from_numbatch(mega_xvec_dict, num_to_id_dict, data1, data2, 63 | device) 64 | targets = torch.cat((targets, target)) 65 | scores_batch = model.forward(data1_xvec, data2_xvec) 66 | scores = torch.cat((scores, scores_batch)) 67 | soft_cdet_loss = model.softcdet(scores, targets) 68 | cdet_mdl = model.cdet(scores, targets) 69 | minc, minc_threshold = model.minc(scores, targets, update_thresholds) 70 | 71 | logging.info('\n\nTest set: C_det (mdl): {:.4f}\n'.format(cdet_mdl)) 72 | logging.info('Test set: soft C_det (mdl): {:.4f}\n'.format(soft_cdet_loss)) 73 | logging.info('Test set: C_min: {:.4f}\n'.format(minc)) 74 | for beta in nc.beta: 75 | logging.info('Test set: argmin threshold [{}]: {:.4f}\n'.format(beta, minc_threshold[beta])) 76 | 77 | print('\n\nTest set: C_det (mdl): {:.4f}\n'.format(cdet_mdl)) 78 | print('Test set: soft C_det (mdl): {:.4f}\n'.format(soft_cdet_loss)) 79 | print('Test set: C_min: {:.4f}\n'.format(minc)) 80 | for beta in nc.beta: 81 | print('Test set: argmin threshold [{}]: {:.4f}\n'.format(beta, minc_threshold[beta])) 82 | 83 | return minc, minc_threshold 84 | 85 | 86 | # %% main_kaldiplda 87 | 88 | def main_kaldiplda(): 89 | timestamp = int(datetime.timestamp(datetime.now())) 90 | print(timestamp) 91 | logging.basicConfig(filename='logs/kaldiplda_{}.log'.format(timestamp), 92 | filemode='a', 93 | format='%(levelname)s: %(message)s', 94 | datefmt='%H:%M:%S', 95 | level=logging.DEBUG) 96 | # %% Configure Training 97 | configfile = 'conf/voices_config.cfg' 98 | 99 | nc = NpldaConf(configfile) 100 | 101 | torch.manual_seed(nc.seed) 102 | np.random.seed(nc.seed) 103 | random.seed(nc.seed) 104 | 105 | logging.info(" Running file {}\n\nStarted at {}.\n".format(sys.argv[0], datetime.now())) 106 | 107 | if not torch.cuda.is_available(): 108 | nc.device='cpu' 109 | device = torch.device(nc.device) 110 | 111 | print("Running on {}...".format(nc.device)) 112 | logging.info("Running on {} ...\n".format(nc.device)) 113 | logging.info("\nConfiguration:\n\n{}\n\n".format(''.join(open(configfile,'r').readlines()))) 114 | 115 | # %%Load the generated training data trials and make loaders here 116 | 117 | mega_xvec_dict = pickle.load(open(nc.mega_xvector_pkl, 'rb')) 118 | num_to_id_dict = {i: j for i, j in enumerate(list(mega_xvec_dict))} 119 | id_to_num_dict = {v: k for k, v in num_to_id_dict.items()} 120 | 121 | train_loader = combine_trials_and_get_loader(nc.training_data_trials_list, id_to_num_dict, subsample_factors=nc.train_subsample_factors ,batch_size=nc.batch_size) 122 | 123 | # train_loader_sampled = combine_trials_and_get_loader(nc.training_data_trials_list, id_to_num_dict, batch_size=nc.batch_size, subset=0.05) 124 | 125 | valid_loaders_dict = get_trials_loaders_dict(nc.validation_trials_list, id_to_num_dict, subsample_factors=nc.valid_subsample_factors, batch_size=5*nc.batch_size) 126 | 127 | # %% Initialize model and stuff 128 | 129 | model = NeuralPlda(nc).to(device) 130 | 131 | ## To load a Kaldi trained PLDA model, Specify the paths of 'mean.vec', 'transform.mat' and 'plda' generated from stage 8 of https://github.com/kaldi-asr/kaldi/blob/master/egs/sre16/v2/run.sh 132 | 133 | if nc.initialization == 'kaldi': 134 | model.LoadPldaParamsFromKaldi(nc.meanvec, nc.transformmat, nc.kaldiplda) 135 | 136 | ## Uncomment to initialize with a pickled pretrained model 137 | # model = pickle.load(open('/home/data2/SRE2019/shreyasr/X/models/kaldi_pldaNet_sre0410_swbd_16_16.swbdsremx6epoch.1571651491.pt','rb')) 138 | 139 | optimizer = optim.Adam(model.parameters(), lr=nc.lr, weight_decay=1e-5) 140 | 141 | print("Initializing the thresholds... Whatever numbers that get printed here are junk.\n") 142 | valloss, minC_threshold = validate(nc, model, device, mega_xvec_dict, num_to_id_dict, valid_loaders_dict[nc.heldout_set_for_th_init], update_thresholds=True) 143 | 144 | 145 | 146 | # %% Train and Validate model 147 | print("\n\nEpoch 0: After Initialization\n") 148 | all_losses = [] 149 | for val_set, valid_loader in valid_loaders_dict.items(): 150 | print("Validating {}".format(val_set)) 151 | logging.info("Validating {}".format(val_set)) 152 | valloss, minC_threshold = validate(nc, model, device, mega_xvec_dict, num_to_id_dict, valid_loader) 153 | if val_set==nc.heldout_set_for_lr_decay: 154 | all_losses.append(valloss) 155 | 156 | 157 | for epoch in range(1, nc.n_epochs + 1): 158 | train(nc, model, device, train_loader , mega_xvec_dict, num_to_id_dict, optimizer, epoch) 159 | 160 | for val_set, valid_loader in valid_loaders_dict.items(): 161 | print("Validating {}".format(val_set)) 162 | logging.info("Validating {}".format(val_set)) 163 | valloss, minC_threshold = validate(nc, model, device, mega_xvec_dict, num_to_id_dict, valid_loader) 164 | if val_set==nc.heldout_set_for_lr_decay: 165 | all_losses.append(valloss) 166 | 167 | 168 | model.SaveModel("models/NPLDA_{}_{}.pt".format(epoch, timestamp)) 169 | for trial_file in nc.test_trials_list: 170 | print("Generating scores for Epoch {} with trial file {}".format(epoch, trial_file)) 171 | 172 | nc.generate_scorefile("scores/kaldipldanet_epoch{}_{}_{}.txt".format(epoch, os.path.splitext(os.path.basename(trial_file))[0], timestamp), trial_file, mega_xvec_dict, model, device, 5*nc.batch_size) 173 | 174 | try: 175 | if (all_losses[-1] > all_losses[-2]) and (all_losses[-2] > all_losses[-3]): 176 | nc.lr = nc.lr / 2 177 | print("REDUCING LEARNING RATE to {} since loss trend looks like {}".format(nc.lr, all_losses[-3:])) 178 | logging.info("REDUCING LEARNING RATE to {} since loss trend looks like {}".format(nc.lr, all_losses[-3:])) 179 | optimizer = optim.Adam(model.parameters(), lr=nc.lr, weight_decay=1e-5) 180 | except: 181 | pass 182 | 183 | # %% __main__ 184 | 185 | if __name__ == '__main__': 186 | main_kaldiplda() -------------------------------------------------------------------------------- /xvector_generate_scores.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Apr 18 16:11:05 2020 5 | 6 | @author: shreyasr 7 | """ 8 | 9 | 10 | import torch 11 | import numpy as np 12 | import os 13 | import random 14 | import pickle 15 | from utils.NpldaConf import NpldaConf 16 | 17 | # %% Set config and stuff here 18 | 19 | configfile = 'conf/voices_config.cfg' 20 | timestamp = '1586347612' 21 | epoch = '13' 22 | 23 | # %% Main 24 | 25 | nc = NpldaConf(configfile) 26 | 27 | torch.manual_seed(nc.seed) 28 | np.random.seed(nc.seed) 29 | random.seed(nc.seed) 30 | 31 | mega_xvec_dict = pickle.load(open(nc.mega_xvector_pkl, 'rb')) 32 | num_to_id_dict = {i: j for i, j in enumerate(list(mega_xvec_dict))} 33 | id_to_num_dict = {v: k for k, v in num_to_id_dict.items()} 34 | 35 | if not torch.cuda.is_available(): 36 | nc.device='cpu' 37 | device = torch.device(nc.device) 38 | 39 | model = pickle.load(open("models/NPLDA_{}_{}.pt".format(epoch, timestamp),'rb')) 40 | 41 | for trial_file in nc.test_trials_list: 42 | print("Generating scores for Epoch {} with trial file {}".format(epoch, trial_file)) 43 | 44 | nc.generate_scorefile("scores/kaldipldanet_epoch{}_{}_{}_scores.txt".format(epoch, os.path.splitext(os.path.basename(trial_file))[0], timestamp), trial_file, mega_xvec_dict, model, device, 5*nc.batch_size) 45 | 46 | --------------------------------------------------------------------------------