├── .gitignore
├── README.md
├── asvtorch
    ├── backend
    │   ├── plda.py
    │   └── vector_processing.py
    ├── evaluation
    │   ├── eval_metrics.py
    │   ├── parameters.py
    │   └── trials.py
    ├── global_setup.py
    ├── ivector
    │   ├── featureloader.py
    │   ├── gmm.py
    │   ├── ivector_extractor.py
    │   ├── posteriors.py
    │   ├── settings.py
    │   └── statloader.py
    ├── kaldidata
    │   ├── kaldifeatloaders.py
    │   ├── posterior_io.py
    │   └── utils.py
    └── misc
    │   └── misc.py
├── config.py
├── environment.yml
├── kaldi
    └── egs
    │   └── voxceleb
    │       └── v1
    │           └── extract_feats_and_train_ubm.sh
└── run_voxceleb_ivector.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/
2 | __pycache__/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | #### GPU accelerated PyTorch implementation of frame posterior computation and i-vector extractor training.
 2 | Kaldi is required for MFCC extraction and UBM training.
 3 | 
 4 | #### Steps to run example script with VoxCeleb data:
 5 | - Move **kaldi/egs/voxceleb/v1/extract_feats_and_train_ubm.sh** to the corresponding folder in your Kaldi installation
 6 | - In **extract_feats_and_train_ubm.sh**, update **output_dir**, **voxceleb1_root**, and **voxceleb2_root**.
 7 |   - If you are using newer version of VoxCeleb1 (1.1), you might have to modify **kaldi/egs/voxceleb/v1/local/make_voxceleb1.pl** as the data organization is different than in the original VoxCeleb release.
 8 | - run **extract_feats_and_train_ubm.sh**
 9 | - update **DATA_FOLDER** in **run_voxceleb_ivector.py**
10 | - install and activate compatible conda environment
11 |   - **environment.yml** has all the needed packages
12 |   - Main requirements: Python (>3.6), PyTorch(>1.1), NumPy, SciPy, PyKaldi
13 | - run **run_voxceleb_ivector.py**
14 | 
15 | 
16 | For more details:
17 | http://dx.doi.org/10.21437/Interspeech.2019-1955
18 | 
19 | ```
20 | @inproceedings{Vestman2019,
21 |   author={Ville Vestman and Kong Aik Lee and Tomi H. Kinnunen and Takafumi Koshinaka},
22 |   title={{Unleashing the Unused Potential of i-Vectors Enabled by GPU Acceleration}},
23 |   year=2019,
24 |   booktitle={Proc. Interspeech 2019},
25 |   pages={351--355},
26 |   doi={10.21437/Interspeech.2019-1955},
27 |   url={http://dx.doi.org/10.21437/Interspeech.2019-1955}
28 | }
29 | ```
30 | 
31 | 


--------------------------------------------------------------------------------
/asvtorch/backend/plda.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | import time
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | from scipy.linalg import inv, svd
  7 | 
  8 | # Based on the PLDA in LRE 2017 baseline
  9 | 
 10 | class Plda:
 11 |     def __init__(self, St, Sb):      
 12 |         self.St = St
 13 |         self.Sb = Sb
 14 |         self.plda_dim = 0
 15 |         self.l = None
 16 |         self.uk = None
 17 |         self.qhat = None
 18 |     
 19 |     @classmethod
 20 |     def train_closed_form(cls, data, speaker_labels, device):
 21 |         print('Training PLDA...')
 22 |         data = data.to(device)
 23 |         data, class_boundaries = _rearrange_data(data, speaker_labels)
 24 |         print('Computing within class covariance...')
 25 |         Sw = _compute_within_cov(data, class_boundaries)
 26 |         print('Computing data covariance...')
 27 |         St = _compute_cov(data)
 28 |         Sb = St - Sw
 29 |         print('PLDA trained!...')
 30 |         return Plda(St, Sb)
 31 | 
 32 |     @classmethod
 33 |     def train_em(cls, data, speaker_labels, plda_dim, iterations, device):
 34 |         print('Initializing simplified PLDA...')
 35 |         data = data.to(device)     
 36 |         n_total_sessions, data_dim = data.size()
 37 |         F = torch.randn(data_dim, plda_dim, device=device)
 38 |         F = _orthogonalize_columns(F)
 39 |         S = 1000 * torch.randn(data_dim, data_dim, device=device)
 40 |         data_covariance = torch.matmul(data.t(), data)
 41 |         data_list, count_list = _arrange_data_by_counts(data, speaker_labels)
 42 |         eye_matrix = torch.eye(plda_dim, device=device)
 43 | 
 44 |         for iteration in range(1, iterations+1):
 45 |             print('Iteration {}...'.format(iteration), end='')
 46 |             iter_start_time = time.time()
 47 |             
 48 |             FS = torch.solve(F, S.t())[0].t()
 49 |             FSF = torch.matmul(FS, F) 
 50 |                 
 51 |             dataEh = torch.zeros(data_dim, plda_dim, device=device)
 52 |             Ehh = torch.zeros(plda_dim, plda_dim, device=device)
 53 |             #print(count_list)
 54 |             for count_data, count in zip(data_list, count_list):
 55 |                 Sigma = torch.inverse(eye_matrix + count * FSF)
 56 |                 my = torch.chain_matmul(Sigma, FS.repeat(1, count), count_data.view(-1, data_dim * count).t())     
 57 |                 #print(torch.norm(my[:, 0]))          
 58 |                 dataEh += torch.matmul(count_data.t(), my.repeat(count, 1).t().reshape(count_data.size()[0], -1))
 59 |                 Ehh += count * (my.size()[1] * Sigma + torch.matmul(my, my.t()))              
 60 |             
 61 |             F = torch.solve(dataEh.t(), Ehh.t())[0].t()
 62 |             S = (data_covariance - torch.chain_matmul(F, Ehh, F.t())) / n_total_sessions
 63 | 
 64 |             Sb = torch.matmul(F, F.t())
 65 |             St = Sb + S
 66 | 
 67 |             print(' [elapsed time = {:0.1f} s]'.format(time.time() - iter_start_time))
 68 |             yield Plda(St, Sb)
 69 | 
 70 |     def _compute_scoring_matrices(self, plda_dim):
 71 |         if self.plda_dim != plda_dim:
 72 |             self.plda_dim = plda_dim
 73 |             iSt = torch.inverse(self.St)
 74 |             iS = torch.inverse(self.St - torch.chain_matmul(self.Sb, iSt, self.Sb))
 75 |             Q = iSt - iS
 76 |             P = torch.chain_matmul(iSt, self.Sb, iS)
 77 |             U, s = torch.svd(P)[:2]
 78 |             self.l = s[:plda_dim]
 79 |             self.uk = U[:, :plda_dim]
 80 |             self.qhat = torch.chain_matmul(self.uk.t(), Q, self.uk)
 81 | 
 82 |     def score_trials(self, model_iv, test_iv, plda_dim):
 83 |         self._compute_scoring_matrices(plda_dim)
 84 |         model_iv = model_iv.to(self.uk.device)
 85 |         test_iv = test_iv.to(self.uk.device)
 86 |         model_iv = torch.matmul(model_iv, self.uk)
 87 |         test_iv  = torch.matmul(test_iv, self.uk)
 88 |         score_h1 = torch.sum(torch.matmul(model_iv, self.qhat) * model_iv, 1)
 89 |         score_h2 = torch.sum(torch.matmul(test_iv, self.qhat) * test_iv, 1)
 90 |         score_h1h2 = 2 * torch.sum(model_iv * self.l * test_iv, 1)
 91 |         scores = score_h1h2 + score_h1 + score_h2
 92 |         return scores.cpu().numpy()
 93 | 
 94 |     def compress(self, vectors, plda_dim):
 95 |         self._compute_scoring_matrices(plda_dim)
 96 |         return torch.matmul(vectors, self.uk.to(vectors.device))
 97 | 
 98 |     def save(self, filename):
 99 |         print('Saving PLDA to file {}'.format(filename))
100 |         np.savez(filename, St=self.St.cpu().numpy(), Sb=self.Sb.cpu().numpy())
101 | 
102 |     @classmethod
103 |     def load(cls, filename, device):
104 |         print('Loading PLDA from file {}'.format(filename))
105 |         holder = np.load(filename)
106 |         St, Sb = holder['St'], holder['Sb']
107 |         return Plda(torch.from_numpy(St).to(device), torch.from_numpy(Sb).to(device))
108 | 
109 | 
110 | def _compute_cov(data):
111 |     data -= torch.mean(data, dim=0)
112 |     cov = torch.matmul(data.t(), data) / (data.size()[0] - 1)
113 |     return cov
114 | 
115 | def _compute_within_cov(data, class_boundaries):
116 |     data = data.clone()
117 |     for start, end in zip(class_boundaries[:-1], class_boundaries[1:]):
118 |         data[start:end, :] -= data[start:end, :].mean(dim=0)        
119 |     return _compute_cov(data)      
120 | 
121 | def _rearrange_data(data, speaker_labels):
122 |         print('Rearranging data for PLDA training...')
123 |         index_dict = defaultdict(list)
124 |         for index, label in enumerate(speaker_labels):
125 |             index_dict[label].append(index)
126 |         new_data = torch.zeros(*data.size())
127 |         class_boundaries = [0]
128 |         counter = 0
129 |         for key in index_dict:
130 |             indices = index_dict[key]
131 |             new_data[counter:counter + len(indices), :] = data[indices, :]
132 |             counter += len(indices)
133 |             class_boundaries.append(counter)
134 |         return new_data, class_boundaries
135 | 
136 | def _orthogonalize_columns(matrix):
137 |     matrix -= torch.mean(matrix, 1).unsqueeze(1)
138 |     D, V = torch.svd(matrix)[1:]
139 |     W = torch.matmul(V, torch.diag((1./(torch.sqrt(D) + 1e-10))))
140 |     return torch.matmul(matrix, W)
141 | 
142 | def _arrange_data_by_counts(data, labels):
143 |     spk2indices = defaultdict(list)
144 |     for index, label in enumerate(labels):
145 |         spk2indices[label].append(index)
146 | 
147 |     count2spks = defaultdict(list)
148 |     for spk in spk2indices:
149 |         count2spks[len(spk2indices[spk])].append(spk)
150 | 
151 |     data_list = []
152 |     count_list = []
153 |     for count in count2spks:
154 |         count_list.append(count)
155 |         count_indices = []
156 |         for spk in count2spks[count]:
157 |             count_indices.extend(spk2indices[spk])
158 |         data_list.append(data[count_indices, :])
159 | 
160 |     return data_list, count_list


--------------------------------------------------------------------------------
/asvtorch/backend/vector_processing.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import torch
 4 | 
 5 | class VectorProcessor:
 6 | 
 7 |     def __init__(self, centering_vectors, whitening_matrices, processing_instruction):
 8 |         self.centering_vectors = centering_vectors
 9 |         self.whitening_matrices = whitening_matrices
10 |         self.processing_instruction = processing_instruction
11 | 
12 |     @classmethod
13 |     def train(cls, vectors, processing_instruction, device):
14 |         """[summary]
15 |         
16 |         Arguments:
17 |             vectors {[type]} -- [description]
18 |             processing_instruction {String} -- [Contains characters 'c', 'w', 'l'. For example 'cwlc' performs centering, whitening, length normalization, and centering (2nd time) in this order.]
19 |         """
20 |         print('Training vector processor ...')
21 | 
22 |         c_count = processing_instruction.count('c')
23 |         w_count = processing_instruction.count('w')
24 | 
25 |         vec_size = vectors.size()[1]
26 | 
27 |         whitening_matrices = torch.zeros(w_count, vec_size, vec_size, device=device)
28 |         centering_vectors = torch.zeros(c_count, vec_size, device=device)
29 | 
30 |         vectors = vectors.to(device)
31 | 
32 |         c_count = 0
33 |         w_count = 0
34 |         for c in processing_instruction:
35 |             if c == 'c':
36 |                 print('Centering...')
37 |                 centering_vectors[c_count, :] = torch.mean(vectors, dim=0)
38 |                 vectors = vectors - centering_vectors[c_count, :]
39 |                 c_count += 1
40 |             elif c == 'w':
41 |                 print('Whitening...')
42 |                 l, U = torch.symeig(torch.matmul(vectors.t(), vectors) / vectors.size()[0], eigenvectors=True)
43 |                 l = torch.clamp(l, min=1e-10)
44 |                 whitening_matrices[w_count, :, :] = torch.rsqrt(l) * U  # transposed
45 |                 vectors = torch.matmul(vectors, whitening_matrices[w_count, :, :])
46 |                 w_count += 1
47 |             elif c == 'l':
48 |                 print('Normalizing length...')
49 |                 vectors = unit_len_norm(vectors)
50 | 
51 |         return VectorProcessor(centering_vectors, whitening_matrices, processing_instruction)
52 |             
53 |     def process(self, vectors):
54 |         print('Processing {} vectors ...'.format(vectors.size()[0]))
55 |         vectors = vectors.to(self.centering_vectors.device)
56 |         c_count = 0
57 |         w_count = 0
58 |         for c in self.processing_instruction:
59 |             if c == 'c':
60 |                 print('Centering...')
61 |                 vectors = vectors - self.centering_vectors[c_count, :]
62 |                 c_count += 1
63 |             elif c == 'w':
64 |                 print('Whitening...')
65 |                 vectors = torch.matmul(vectors, self.whitening_matrices[w_count, :, :])
66 |                 w_count += 1
67 |             elif c == 'l':
68 |                 print('Normalizing length...')
69 |                 vectors = unit_len_norm(vectors)
70 |         return vectors
71 | 
72 |     def save(self, output_file):
73 |         data = {'c': self.centering_vectors.cpu(), 'w': self.whitening_matrices.cpu(), 'i': self.processing_instruction}
74 |         with open(output_file, 'wb') as outfile:
75 |             pickle.dump(data, outfile, protocol=pickle.HIGHEST_PROTOCOL)
76 |         print('VectorProcessor saved to {}'.format(output_file))
77 | 
78 |     @classmethod
79 |     def load(cls, input_file, device):
80 |         with open(input_file, 'rb') as infile:
81 |             data = pickle.load(infile)
82 |         centering_vectors = data['c'].to(device)
83 |         whitening_matrices = data['w'].to(device)
84 |         processing_instruction = data['i']
85 |         print('VectorProcessor loaded from {}'.format(input_file))
86 |         return VectorProcessor(centering_vectors, whitening_matrices, processing_instruction)        
87 | 
88 | def unit_len_norm(data):
89 |     data_norm = torch.sqrt(torch.sum(data ** 2, 1))
90 |     data_norm[data_norm == 0] = 1
91 |     return data / data_norm.unsqueeze(1)
92 | 


--------------------------------------------------------------------------------
/asvtorch/evaluation/eval_metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def compute_det_curve(target_scores, nontarget_scores):
 4 |     n_scores = target_scores.size + nontarget_scores.size
 5 |     all_scores = np.concatenate((target_scores, nontarget_scores))
 6 |     labels = np.concatenate((np.ones(target_scores.size), np.zeros(nontarget_scores.size)))
 7 | 
 8 |     # Sort labels based on scores
 9 |     indices = np.argsort(all_scores, kind='mergesort')
10 |     labels = labels[indices]
11 | 
12 |     # Compute false rejection and false acceptance rates
13 |     tar_trial_sums = np.cumsum(labels)
14 |     nontarget_trial_sums = nontarget_scores.size - (np.arange(1, n_scores + 1) - tar_trial_sums)
15 | 
16 |     frr = np.concatenate((np.atleast_1d(0), tar_trial_sums / target_scores.size))  # false rejection rates
17 |     far = np.concatenate((np.atleast_1d(1), nontarget_trial_sums / nontarget_scores.size))  # false acceptance rates
18 |     thresholds = np.concatenate((np.atleast_1d(all_scores[indices[0]] - 0.001), all_scores[indices]))  # Thresholds are the sorted scores
19 | 
20 |     return frr, far, thresholds
21 | 
22 | 
23 | def compute_eer(target_scores, nontarget_scores):
24 |     """ Returns equal error rate (EER) and the corresponding threshold. """
25 |     frr, far, thresholds = compute_det_curve(target_scores, nontarget_scores)
26 |     abs_diffs = np.abs(frr - far)
27 |     min_index = np.argmin(abs_diffs)
28 |     eer = np.mean((frr[min_index], far[min_index]))
29 |     return eer, thresholds[min_index]
30 | 
31 | def compute_min_dcf(target_scores, nontarget_scores, p_target, c_miss, c_fa):
32 |     frr, far, thresholds = compute_det_curve(target_scores, nontarget_scores)
33 |     return _compute_min_dcf(frr, far, thresholds, p_target, c_miss, c_fa)
34 | 
35 | # Obtained from KALDI Toolkit:
36 | def _compute_min_dcf(fnrs, fprs, thresholds, p_target, c_miss, c_fa):
37 |     min_c_det = float("inf")
38 |     min_c_det_threshold = thresholds[0]
39 |     for i in range(0, len(fnrs)):
40 |         c_det = c_miss * fnrs[i] * p_target + c_fa * fprs[i] * (1 - p_target)
41 |         if c_det < min_c_det:
42 |             min_c_det = c_det
43 |             min_c_det_threshold = thresholds[i]
44 |     c_def = min(c_miss * p_target, c_fa * (1 - p_target))
45 |     min_dcf = min_c_det / c_def
46 |     return min_dcf, min_c_det_threshold


--------------------------------------------------------------------------------
/asvtorch/evaluation/parameters.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import itertools
  3 | 
  4 | class ParameterChanger():
  5 |     def __init__(self, config_file, obj_dict):
  6 |         print('Initializing parameter changer...')
  7 |         
  8 |         self.configs = []
  9 |         self._index = 0
 10 |         self._old_config = None
 11 |         self.obj2id = {v: k for k, v in obj_dict.items()}
 12 | 
 13 |         with open(config_file) as f:
 14 |             settings = []
 15 |             for line in f:
 16 |                 line = line.strip()
 17 |                 if line.startswith('#'):
 18 |                     continue                
 19 |                 if line:
 20 |                     settings.append(line)
 21 |                 elif settings:          
 22 |                     self._process_config_set(settings, obj_dict)
 23 |                     settings = []
 24 |             if settings:
 25 |                 self._process_config_set(settings, obj_dict)
 26 |                  
 27 |     def _process_config_set(self, settings, obj_dict):
 28 |         line_objects = []
 29 |         line_attributes = []
 30 |         value_lists = []
 31 |         for setting in settings:
 32 |             setting = setting.strip()
 33 |             name, values = setting.split('=')                  
 34 |             name = name.strip()
 35 |             obj_id, attr = name.split('.')
 36 |             obj = obj_dict[obj_id]
 37 |             if _exists(obj, attr):
 38 |                 line_objects.append(obj)
 39 |                 line_attributes.append(attr)
 40 |             values = values.strip()
 41 |             ldict = {}
 42 |             exec('value_list = {}'.format(values), globals(), ldict)
 43 |             value_list = ldict['value_list']
 44 |             if not isinstance(value_list, (list, tuple)):
 45 |                 value_list = [value_list]                        
 46 |             value_lists.append(value_list)      
 47 |         for value_combination in itertools.product(*value_lists):
 48 |             self.configs.append((line_objects, line_attributes, value_combination))
 49 | 
 50 |     def next(self):
 51 |         # Reverting to the initial config:
 52 |         if self._old_config is not None:
 53 |             _set_attributes(self._old_config)
 54 |         
 55 |         # Getting new config:
 56 |         try:
 57 |             config = self.configs[self._index]
 58 |         except IndexError:
 59 |             self._index = 0
 60 |             self._old_config = None
 61 |             return False
 62 |         self._index += 1
 63 | 
 64 |         # Saving current config:
 65 |         self._old_config = _get_current_config(config[0], config[1])
 66 |         # Setting new config:
 67 |         _set_attributes(config)
 68 | 
 69 |         print('Parameter values (non-default ones):\n{}'.format(self.get_current_string(compact=False)))
 70 |         return True
 71 | 
 72 |     def get_current_string(self, compact=True):
 73 |         config = _get_current_config(*self._old_config[0:2])
 74 |         string = ''
 75 |         for obj, attr, value in zip(*config):
 76 |             if compact:
 77 |                 string += '{}.{} = {}; '.format(self.obj2id[obj], attr, value)
 78 |             else:
 79 |                 string += '{}.{} = {}\n'.format(self.obj2id[obj], attr, value)
 80 |         if compact:
 81 |             string = string[:-2]
 82 |         return string
 83 | 
 84 |     def get_value_string(self):
 85 |         config = _get_current_config(*self._old_config[0:2])
 86 |         return ';'.join(str(value) for value in config[2])
 87 |                         
 88 | def _get_current_config(objs, attrs):
 89 |     values = []
 90 |     for obj, attr in zip(objs, attrs):
 91 |         values.append(getattr(obj, attr))
 92 |     return (objs, attrs, values)
 93 | 
 94 | def _set_attributes(config):
 95 |     for obj, attr, value in zip(*config):
 96 |         setattr(obj, attr, value)
 97 | 
 98 | def _exists(obj, attr):
 99 |     if not hasattr(obj, attr):
100 |         sys.exit('Config attribute does not exist: {}'.format(attr))
101 |     return True


--------------------------------------------------------------------------------
/asvtorch/evaluation/trials.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | def organize_trials(vectors, utt_ids, trial_file):
 5 |     trial_vector_dict = {}
 6 |     for index, segment in enumerate(utt_ids):
 7 |         trial_vector_dict[segment] = vectors[index, :]
 8 | 
 9 |     trials = []
10 |     with open(trial_file) as f:
11 |         for line in f:
12 |             parts = line.split()
13 |             if parts[2].strip() == 'target':
14 |                 label = 1
15 |             else:
16 |                 label = 0
17 |             trials.append((parts[0], parts[1], label))
18 | 
19 |     left_vectors = torch.zeros(len(trials), vectors.shape[1], device=vectors.device)
20 |     right_vectors = torch.zeros(len(trials), vectors.shape[1], device=vectors.device)
21 | 
22 |     labels = []
23 |     for index, trial in enumerate(trials):
24 |         left_vectors[index, :] = trial_vector_dict[trial[0]]
25 |         right_vectors[index, :] = trial_vector_dict[trial[1]]
26 |         labels.append(trial[2])
27 | 
28 |     labels = np.asarray(labels, dtype=bool)
29 | 
30 |     return left_vectors, right_vectors, labels
31 | 
32 | 
33 | def organize_trials_in_chunks(vectors, utt_ids, trial_file, chunk_size):
34 |     
35 |     print('Preparing to iterate over trials...')
36 | 
37 |     trial_vector_dict = {}
38 |     for index, segment in enumerate(utt_ids):
39 |         trial_vector_dict[segment] = vectors[index, :]
40 | 
41 |     trials = []
42 |     with open(trial_file) as f:
43 |         for line in f:
44 |             parts = line.split()
45 |             if parts[2].strip() == 'target':
46 |                 label = 1
47 |             else:
48 |                 label = 0
49 |             trials.append((parts[0], parts[1], label))
50 | 
51 |     i = 0
52 | 
53 |     while i < len(trials):
54 |         print('Iterated over {} trials'.format(i))
55 |         chunk_trials = trials[i:i+chunk_size]
56 |         i += chunk_size
57 | 
58 |         left_vectors = torch.zeros(len(chunk_trials), vectors.shape[1], device=vectors.device)
59 |         right_vectors = torch.zeros(len(chunk_trials), vectors.shape[1], device=vectors.device)
60 | 
61 |         labels = []
62 |         for index, trial in enumerate(chunk_trials):
63 |             left_vectors[index, :] = trial_vector_dict[trial[0]]
64 |             right_vectors[index, :] = trial_vector_dict[trial[1]]
65 |             labels.append(trial[2])
66 | 
67 |         labels = np.asarray(labels, dtype=bool)
68 | 
69 |         yield left_vectors, right_vectors, labels
70 | 


--------------------------------------------------------------------------------
/asvtorch/global_setup.py:
--------------------------------------------------------------------------------
 1 | # Do not touch this file directly. Change the device from run_voxceleb_ivector.py.
 2 | 
 3 | import torch
 4 | 
 5 | device = torch.device("cpu")
 6 | 
 7 | def set_gpu(device_id):
 8 |     if torch.cuda.is_available():
 9 |         global device
10 |         device = torch.device('cuda:{}'.format(device_id))
11 |         torch.backends.cudnn.benchmark = False
12 |         print('Using GPU!')
13 |     else:
14 |         print('Cuda is not available!')
15 | 


--------------------------------------------------------------------------------
/asvtorch/ivector/featureloader.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch.utils import data
  4 | 
  5 | from asvtorch.kaldidata.utils import count_total_number_of_active_frames
  6 | 
  7 | 
  8 | def _get_clip_indices(utt_start, utt_end, batch_start, batch_end):    
  9 |     """ Cuts the parts of the utterance that do not fit into the batch window.
 10 |     
 11 |     Arguments:
 12 |         utt_start {int} -- start point of the utterance
 13 |         utt_end {int} -- end point of the utterance
 14 |         batch_start {int} -- start point of the batch window
 15 |         batch_end {int} -- end point of the batch window
 16 |     
 17 |     Returns:
 18 |         (int, int), bool -- a tuple containing clipped start and end point of an utterance, the boolean flag is True if the end of the utterance is inside the batch window.
 19 |     """
 20 |     if utt_end <= batch_start:
 21 |         return None
 22 |     if utt_start >= batch_end:
 23 |         return None
 24 |     start = 0
 25 |     end = utt_end - utt_start
 26 |     if utt_start < batch_start:
 27 |         start = batch_start - utt_start
 28 |     if utt_end > batch_end:
 29 |         end = batch_end - utt_start
 30 |     if utt_end <= batch_end:
 31 |         ends = True
 32 |     else:
 33 |         ends = False
 34 |     return (start, end), ends
 35 | 
 36 | class _Kaldi_Dataset(data.Dataset):
 37 |     def __init__(self, rxspecifiers, feature_loader, frames_per_batch):
 38 |         self.feat_rxspecifiers = rxspecifiers[0]
 39 |         self.vad_rxspecifiers = rxspecifiers[1]
 40 |         self.feature_loader = feature_loader
 41 |       
 42 |         n_active_frames, break_points = count_total_number_of_active_frames(self.vad_rxspecifiers)
 43 |         n_batches = int(np.ceil(n_active_frames / frames_per_batch))
 44 | 
 45 |         utt_index = 0
 46 |         self.batches = []
 47 | 
 48 |         for i in range(n_batches):
 49 |             batch_indices = []
 50 |             batch_endpoints = []
 51 |             window_start = i * frames_per_batch
 52 |             window_end = (i + 1) * frames_per_batch
 53 |             while utt_index < len(self.feat_rxspecifiers):
 54 |                 clip_indices = _get_clip_indices(break_points[utt_index], break_points[utt_index + 1], window_start, window_end)
 55 |                 utt_index += 1
 56 |                 if clip_indices is not None:
 57 |                     batch_indices.append((utt_index - 1, clip_indices[0]))
 58 |                     if clip_indices[1]:
 59 |                         batch_endpoints.append(break_points[utt_index])
 60 |                     else:
 61 |                         break
 62 |                 else:
 63 |                     if batch_indices:
 64 |                         break
 65 |             self.batches.append((batch_indices, np.asarray(batch_endpoints)))
 66 |             batch_indices = []
 67 |             batch_endpoints = []
 68 |             utt_index -= 1
 69 | 
 70 |     def __len__(self):
 71 |         return len(self.batches)
 72 | 
 73 |     def __getitem__(self, index):
 74 |         batch_indices, batch_endpoints = self.batches[index]
 75 |         frames = []
 76 |         for utt_indices in batch_indices:
 77 |             utt_index, selection_indices = utt_indices
 78 |             feats = self.feature_loader.load_features(self.feat_rxspecifiers[utt_index], self.vad_rxspecifiers[utt_index])
 79 |             frames.append(feats[selection_indices[0]:selection_indices[1], :])        
 80 |         frames = torch.from_numpy(np.vstack(frames))
 81 |         return frames, batch_endpoints
 82 | 
 83 | 
 84 | def _collater(batch):
 85 |     """ In this "hack" batches are already formed in the DataSet object (batch consists of a single element, which is actually the batch). 
 86 |     """
 87 |     return batch[0]
 88 | 
 89 | def get_feature_loader(rxspecifiers, feature_loader, batch_size, num_workers):
 90 |     """Returs a DataLoader that is used to load features from multiple utterances using a fixed batch size given in (active speech) frames.
 91 |     
 92 |     Arguments:
 93 |         rxspecifiers {(list, list)} -- [description]
 94 |         feature_loader {KaldiFeatureLoader} -- Feature loader.
 95 |         batch_size {int} -- Batch size in frames.
 96 |         num_workers {int} -- Number of processes used for data loading.
 97 |     
 98 |     Returns:
 99 |         DataLoader -- DataLoader for reading features.
100 |     """
101 |     dataset = _Kaldi_Dataset(rxspecifiers, feature_loader, batch_size)
102 |     return data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=num_workers, collate_fn=_collater)
103 | 


--------------------------------------------------------------------------------
/asvtorch/ivector/gmm.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import kaldi.util.io as kio
 4 | from kaldi.gmm import FullGmm as KaldiFullGmm
 5 | from kaldi.matrix import Matrix as KaldiMatrix
 6 | 
 7 | import asvtorch.global_setup as gs
 8 | from asvtorch.misc.misc import ensure_npz
 9 | from asvtorch.misc.misc import test_finiteness
10 | 
11 | class Gmm():
12 |     def __init__(self, means, covariances, weights, device=torch.device("cpu")):
13 |         self.means = means.to(device)
14 |         self.covariances = covariances.to(device)
15 |         self.weights = weights.to(device)
16 |         # Preparation for posterior computation:
17 |         const = torch.Tensor([-0.5 * self.means.size()[1] * np.log(2 * np.pi)]).to(self.means.device)
18 |         self._inv_covariances = torch.inverse(self.covariances)
19 |         self._component_constants = torch.zeros(self.weights.numel(), device=self.means.device)
20 |         for i in range(self.weights.numel()):
21 |             self._component_constants[i] = -0.5 * torch.logdet(self.covariances[i, :, :]) + const + torch.log(self.weights[i])
22 | 
23 |     def to_device(self, device):
24 |         return Gmm(self.means, self.covariances, self.weights, device=device)
25 | 
26 |     def compute_posteriors_top_select(self, frames, top_indices):
27 |         logprob = torch.zeros(top_indices.size(), device=self.means.device)
28 |         for i in range(self.weights.numel()):
29 |             indices_of_component = (top_indices == i)
30 |             frame_selection = torch.any(indices_of_component, 0)
31 |             post_index = torch.argmax(indices_of_component, 0)[frame_selection]
32 |             centered_frames = frames[frame_selection, :] - self.means[i, :]        
33 |             logprob[post_index, frame_selection] = self._component_constants[i] - 0.5 * torch.sum(torch.mm(centered_frames, self._inv_covariances[i, :, :]) * centered_frames, 1)
34 |         llk = torch.logsumexp(logprob, dim=0)
35 |         return torch.exp(logprob - llk)
36 | 
37 |     def compute_posteriors(self, frames):
38 |         logprob = torch.zeros(self.weights.numel(), frames.size()[0], device=self.means.device)
39 |         for i in range(self.weights.numel()):
40 |             centered_frames = frames - self.means[i, :]
41 |             logprob[i, :] = self._component_constants[i] - 0.5 * torch.sum(torch.mm(centered_frames, self._inv_covariances[i, :, :]) * centered_frames, 1)
42 |         llk = torch.logsumexp(logprob, dim=0)
43 |         return torch.exp(logprob - llk)
44 | 
45 |     def save_npz(self, filename):
46 |         np.savez(filename, weights=self.weights.cpu().numpy(), means=self.means.cpu().numpy(), covariances=self.covariances.cpu().numpy())
47 |         print('GMM saved to {}'.format(ensure_npz(filename)))
48 |     
49 |     @classmethod
50 |     def from_npz(cls, filename, device):
51 |         data = np.load(ensure_npz(filename))
52 |         weights = torch.from_numpy(data['weights'])
53 |         means = torch.from_numpy(data['means'])
54 |         covariances = torch.from_numpy(data['covariances'])
55 |         return Gmm(means, covariances, weights, device)
56 | 
57 |     @classmethod
58 |     def from_kaldi(cls, filename, device):
59 |         ubm = KaldiFullGmm()
60 |         with kio.xopen(filename) as f:
61 |             ubm.read(f.stream(), f.binary)
62 |         means = torch.from_numpy(ubm.get_means().numpy())
63 |         weights = torch.from_numpy(ubm.weights().numpy())
64 |         n_components = weights.numel()
65 |         feat_dim = means.size()[1]
66 |         covariances = torch.zeros([n_components, feat_dim, feat_dim], device='cpu', dtype=torch.float32)
67 |         for index, kaldicovar in enumerate(ubm.get_covars()):
68 |             covariances[index, :, :] = torch.from_numpy(KaldiMatrix(kaldicovar).numpy())
69 |         return Gmm(means, covariances, weights, device=device)
70 | 
71 | class DiagGmm():
72 |     def __init__(self, means, covariances, weights, device=torch.device("cpu")):
73 |         self.means = means.to(device)
74 |         self.covariances = covariances.to(device)
75 |         self.weights = weights.to(device)
76 |         # Preparation for posterior computation:
77 |         const = torch.Tensor([self.means.size()[1] * np.log(2 * np.pi)]).to(self.means.device)
78 |         self.posterior_constant = torch.sum(self.means * self.means / self.covariances, 1) + torch.sum(torch.log(self.covariances), 1) + const
79 |         self.posterior_constant = self.posterior_constant.unsqueeze(1)
80 |         self.precisions = (1 / self.covariances)
81 |         self.mean_pres = (self.means / self.covariances)
82 | 
83 |     def compute_posteriors(self, frames):
84 |         logprob = torch.mm(self.precisions, (frames * frames).t()) - 2 * torch.mm(self.mean_pres, frames.t())
85 |         logprob = -0.5 * (logprob + self.posterior_constant)
86 |         logprob = logprob + torch.log(self.weights.unsqueeze(1))
87 |         llk = torch.logsumexp(logprob, 0)
88 |         return torch.exp(logprob - llk)
89 | 
90 |     @classmethod
91 |     def from_full_gmm(cls, full_gmm, device):
92 |         means = full_gmm.means.clone()
93 |         weights = full_gmm.weights.clone()
94 |         covariances = torch.zeros(means.size(), device=full_gmm.covariances.device, dtype=full_gmm.covariances.dtype)
95 |         for index in range(weights.numel()):
96 |             covariances[index, :] = full_gmm.covariances[index, :, :].diag()
97 |         return DiagGmm(means, covariances, weights, device=device)
98 |            
99 |     


--------------------------------------------------------------------------------
/asvtorch/ivector/ivector_extractor.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import datetime
  3 | 
  4 | import torch
  5 | import numpy as np
  6 | 
  7 | import asvtorch.ivector.statloader
  8 | from asvtorch.misc.misc import ensure_npz
  9 | from asvtorch.ivector.gmm import Gmm
 10 | 
 11 | class IVectorExtractor():
 12 |     def __init__(self, t_matrix, means, inv_covariances, prior_offset, device):
 13 |         # When prior offset is zero, standard (non-augmented) i-vector formulation is used.
 14 |         self.t_matrix = t_matrix.to(device)
 15 |         self.prior_offset = prior_offset.to(device)
 16 |         self.means = means.to(device)
 17 |         self.inv_covariances = inv_covariances.to(device)
 18 |         self.n_components, self.ivec_dim, self.feat_dim = self.t_matrix.size()
 19 |         self.identity = torch.eye(self.ivec_dim, device=device).unsqueeze(0)
 20 |         self.bias_offset = None
 21 | 
 22 |     def _compute_posterior_means_and_covariances(self, n_all, f_all, batch_size, component_batches):
 23 |         covariances = torch.zeros(self.ivec_dim, batch_size, self.ivec_dim, device=self.t_matrix.device)
 24 |         means = torch.zeros(self.ivec_dim, batch_size, device=self.t_matrix.device)
 25 |         for bstart, bend in component_batches:
 26 |             n = n_all[:, bstart:bend]
 27 |             f = f_all[bstart:bend, :, :]
 28 |             sub_t = self.t_matrix[bstart:bend, :, :]
 29 |             sub_inv_covars = self.inv_covariances[bstart:bend, :, :]
 30 |             sub_tc = torch.bmm(sub_t, sub_inv_covars)
 31 |             tt = torch.bmm(sub_tc, torch.transpose(sub_t, 1, 2))
 32 |             tt.transpose_(0, 1)
 33 |             covariances += torch.matmul(n, tt)
 34 |             means = torch.addbmm(means, sub_tc, f)        
 35 |         covariances.transpose_(0, 1)
 36 |         covariances += self.identity
 37 |         covariances = torch.inverse(covariances)
 38 |         means.t_()
 39 |         means[:, 0] += self.prior_offset
 40 |         means.unsqueeze_(2)
 41 |         means = torch.bmm(covariances, means)
 42 |         means = means.view((means.size()[:2]))
 43 |         return means, covariances
 44 | 
 45 |     def _get_component_batches(self, n_component_batches):
 46 |         cbatch_size = self.n_components // n_component_batches
 47 |         component_batches = []
 48 |         for cbatch_index in range(n_component_batches):
 49 |             bstart = cbatch_index * cbatch_size
 50 |             bend = (cbatch_index + 1) * cbatch_size
 51 |             component_batches.append((bstart, bend))
 52 |         return component_batches
 53 | 
 54 |     def _get_stat_loader(self, rxspecifiers, feature_loader, second_order, batch_size, n_workers):
 55 |         data_dims = (self.n_components, self.feat_dim)
 56 |         if self.prior_offset == 0:
 57 |             stat_loader = asvtorch.ivector.statloader.get_stat_loader(rxspecifiers, feature_loader, data_dims, batch_size, second_order, self.means, n_workers)
 58 |         else:  # Kaldi style i-vector (augmented form) --> No centering required
 59 |             stat_loader = asvtorch.ivector.statloader.get_stat_loader(rxspecifiers, feature_loader, data_dims, batch_size, second_order, None, n_workers)
 60 |         return stat_loader
 61 | 
 62 |     def get_updated_ubm(self, ubm, device):
 63 |         if self.prior_offset == 0:
 64 |             means = self.means.clone()
 65 |         else:
 66 |             means = self.t_matrix[:, 0, :] * self.prior_offset
 67 |         covariances = ubm.covariances.clone()
 68 |         weights = ubm.weights.clone()
 69 |         return Gmm(means, covariances, weights, device)
 70 |  
 71 |     def extract(self, rxspecifiers, feature_loader, settings):
 72 |         stat_loader = self._get_stat_loader(rxspecifiers, feature_loader, False, settings.batch_size_in_utts, settings.dataloader_workers)
 73 |         component_batches = self._get_component_batches(settings.n_component_batches)        
 74 |         print('Extracting i-vectors for {} utterances...'.format(len(rxspecifiers[0])))
 75 |         start_time = time.time()
 76 |         ivectors = torch.zeros(len(rxspecifiers[0]), self.ivec_dim, device=self.t_matrix.device) 
 77 |         counter = 0
 78 |         for batch_index, batch in enumerate(stat_loader):
 79 |             n_all, f_all = batch
 80 |             batch_size = n_all.size()[0]
 81 |             print('{:.0f} seconds elapsed, Batch {}/{}: utterance count = {}'.format(time.time() - start_time, batch_index+1, stat_loader.__len__(), batch_size))
 82 |             n_all = n_all.to(self.t_matrix.device)
 83 |             f_all = f_all.to(self.t_matrix.device)
 84 |             means = self._compute_posterior_means_and_covariances(n_all, f_all, batch_size, component_batches)[0]
 85 |             ivectors[counter:counter+batch_size, :] = means
 86 |             counter += batch_size       
 87 |         ivectors[:, 0] -= self.prior_offset
 88 |         print('I-vector extraction completed in {:.0f} seconds.'.format(time.time() - start_time))
 89 |         return ivectors
 90 |   
 91 |     def train(self, rxspecifiers, feature_loader, output_filename, settings, resume=0):
 92 |         if resume < 0:
 93 |             resume = 0
 94 |         elif resume > 0:
 95 |             print('Resuming i-vector extractor training from iteration {}...'.format(resume))
 96 |             extractor = IVectorExtractor.from_npz('{}.{}'.format(ensure_npz(output_filename, inverse=True), resume), self.t_matrix.device)
 97 |             self.t_matrix = extractor.t_matrix
 98 |             self.means = extractor.means
 99 |             self.inv_covariances = extractor.inv_covariances
100 |             self.prior_offset = extractor.prior_offset
101 | 
102 |         print('Training i-vector extractor ({} iterations)...'.format(settings.n_iterations))
103 | 
104 |         n_utts = len(rxspecifiers[0])
105 |         component_batches = self._get_component_batches(settings.n_component_batches)
106 |         
107 |         print('Allocating memory for accumulators...')
108 |         z = torch.zeros(self.n_components, device=self.t_matrix.device)
109 |         S = torch.zeros(self.n_components, self.feat_dim, self.feat_dim, device=self.t_matrix.device)
110 |         Y = torch.zeros(self.n_components, self.feat_dim, self.ivec_dim, device=self.t_matrix.device)
111 |         R = torch.zeros(self.n_components, self.ivec_dim, self.ivec_dim, device=self.t_matrix.device)  # The biggest memory consumer!
112 |         h = torch.zeros(self.ivec_dim, device=self.t_matrix.device)
113 |         H = torch.zeros(self.ivec_dim, self.ivec_dim, device=self.t_matrix.device)   
114 |         
115 |         iteration_times = []
116 |         start_time = time.time()        
117 |         for iteration in range(1, settings.n_iterations + 1):       
118 |             iter_start_time = time.time()
119 | 
120 |             print('Initializing statistics loader...')
121 |             accumulate_2nd_stats = settings.update_covariances and iteration == 1  # 2nd order stats need to be accumulated only once
122 |             stat_loader = self._get_stat_loader(rxspecifiers, feature_loader, accumulate_2nd_stats, settings.batch_size_in_utts, settings.dataloader_workers)        
123 |             
124 |             print('Iterating over batches of utterances...')
125 |             for batch_index, batch in enumerate(stat_loader):            
126 |                 
127 |                 if accumulate_2nd_stats:
128 |                     n_all, f_all, s_batch_sum = batch
129 |                     S += s_batch_sum.to(self.t_matrix.device)  
130 |                 else:
131 |                     n_all, f_all = batch
132 |                               
133 |                 batch_size = n_all.size()[0]
134 |                 print('Iteration {} ({:.0f} seconds), Batch {}/{}: utterance count = {}'.format(iteration + resume, time.time() - iter_start_time, batch_index+1, stat_loader.__len__(), batch_size))
135 | 
136 |                 n_all = n_all.to(self.t_matrix.device)
137 |                 f_all = f_all.to(self.t_matrix.device)
138 |                 if iteration == 1:  # Need to be accumulated only once
139 |                     z += torch.sum(n_all, dim=0)
140 | 
141 |                 means, covariances = self._compute_posterior_means_and_covariances(n_all, f_all, batch_size, component_batches)
142 |                 
143 |                 # Accumulating...
144 |                 h += torch.sum(means, dim=0)
145 |                 yy = torch.baddbmm(covariances, means.unsqueeze(2), means.unsqueeze(1))
146 |                 H += torch.sum(yy, dim=0)
147 |                 yy = yy.permute((1, 2, 0))                
148 |                 for bstart, bend in component_batches: # Batching over components saves GPU memory
149 |                     n = n_all[:, bstart:bend]
150 |                     f = f_all[bstart:bend, :, :]                    
151 |                     Y[bstart:bend, :, :] += torch.matmul(f, means)
152 |                     R[bstart:bend, :, :] += torch.matmul(yy, n).permute((2, 0, 1))
153 | 
154 |             self.weights = z / torch.sum(z) * n_utts
155 |             h = h / n_utts
156 |             H = H / n_utts
157 |             H = H - torch.ger(h, h)
158 | 
159 |             # Updating:
160 |             if settings.update_projections: self._update_projections(Y, R, component_batches)
161 |             if settings.update_covariances: self._update_covariances(Y, R, z, S, component_batches)
162 |             if settings.minimum_divergence: self._minimum_divergence_whitening(h, H, component_batches)
163 |             if settings.update_means:       self._minimum_divergence_centering(h, component_batches)
164 |         
165 |             print('Zeroing accumulators...')
166 |             Y.zero_()
167 |             R.zero_()
168 |             h.zero_()
169 |             H.zero_()
170 | 
171 |             if settings.save_every_iteration:
172 |                 self.save_npz('{}.{}'.format(ensure_npz(output_filename, inverse=True), iteration + resume))
173 | 
174 |             iteration_times.append(time.time() - iter_start_time)
175 | 
176 |         self.save_npz(output_filename)
177 |         print('Training completed in {:.0f} seconds.'.format(time.time() - start_time))
178 |         return iteration_times
179 |           
180 |     def _update_projections(self, Y, R, component_batches):
181 |         print('Updating projections...')
182 |         for bstart, bend in component_batches:
183 |             self.t_matrix[bstart:bend, :, :] = torch.cholesky_solve(Y[bstart:bend, :, :].transpose(1, 2), torch.cholesky(R[bstart:bend, :, :], upper=True), upper=True)
184 | 
185 |     def _update_covariances(self, Y, R, z, S, component_batches):
186 |         print('Updating covariances...')
187 |         for bstart, bend in component_batches:
188 |             crossterm = torch.matmul(Y[bstart:bend, :, :], self.t_matrix[bstart:bend, :, :])
189 |             crossterm = crossterm + crossterm.transpose(1, 2)        
190 |             self.inv_covariances[bstart:bend, :, :] = S[bstart:bend, :, :] - 0.5 * crossterm
191 | 
192 |         var_floor = torch.sum(self.inv_covariances, dim=0)
193 |         var_floor *= 0.1 / torch.sum(z)
194 |         self.inv_covariances = self.inv_covariances / z.unsqueeze(1).unsqueeze(1)
195 |         self._covariances = (self.inv_covariances).clone()
196 |         self._apply_floor_(self.inv_covariances, var_floor, component_batches)
197 |         self.inv_covariances = torch.inverse(self.inv_covariances)
198 | 
199 |     def _apply_floor_(self, A, B, component_batches):
200 |         #B = self._apply_floor_scalar(B, self._max_abs_eig(B) * 1e-4)[0]  # To prevent Cholesky from failing
201 |         L = torch.cholesky(B)
202 |         L_inv = torch.inverse(L)
203 |         num_floored = 0
204 |         batch_size = component_batches[0][1] - component_batches[0][0]
205 |         l = torch.zeros(batch_size, self.feat_dim, device=self.t_matrix.device)
206 |         U = torch.zeros(batch_size, self.feat_dim, self.feat_dim, device=self.t_matrix.device) 
207 |         for bstart, bend in component_batches:
208 |             D = L_inv.matmul(A[bstart:bend, :, :]).matmul(L_inv.t())   
209 |             for c in range(batch_size):
210 |                 l[c, :], U[c, :, :] = torch.symeig(D[c, :, :], eigenvectors=True)
211 |             num_floored += torch.sum(l < 1).item()
212 |             l = torch.clamp(l, min=1)
213 |             D = U.matmul(l.unsqueeze(2) * U.transpose(1,2))
214 |             A[bstart:bend, :, :] = L.matmul(D.transpose(1, 2)).matmul(L.t())
215 |         print('Floored {:.1%} of the eigenvalues...'.format(num_floored / (self.n_components * self.feat_dim)))
216 | 
217 |     def _max_abs_eig(self, A):
218 |         l = torch.symeig(A)[0]
219 |         return torch.max(torch.abs(l))
220 | 
221 |     def _apply_floor_scalar(self, A, b):
222 |         l, U = torch.symeig(A, eigenvectors=True)
223 |         num_floored = torch.sum(l < b).item()
224 |         l = torch.clamp(l, min=b)
225 |         A = torch.matmul(U, l.unsqueeze(1) * U.t())
226 |         return A, num_floored
227 | 
228 |     def _minimum_divergence_whitening(self, h, H, component_batches):
229 |         print('Minimum divergence re-estimation...')
230 |         l, U = torch.symeig(H, eigenvectors=True)
231 |         l = torch.clamp(l, min=1e-7)
232 |         P1 = torch.rsqrt(l) * U  # transposed
233 |         torch.matmul(h, P1, out=h)  # In place operation, so that the result is available for update_means()
234 |         if self.prior_offset != 0:  # Augmented formulation 
235 |             self.prior_offset = h[0]
236 |             print('Prior offset: {}'.format(self.prior_offset))
237 |         P1 = torch.inverse(P1)
238 |         for bstart, bend in component_batches:
239 |             self.t_matrix[bstart:bend, :, :] = P1.matmul(self.t_matrix[bstart:bend, :, :])
240 | 
241 |     def _minimum_divergence_centering(self, h, component_batches):
242 |         if self.prior_offset == 0:
243 |             self.means += torch.sum(self.t_matrix * h.unsqueeze(0).unsqueeze(2), dim=1)
244 |         else:  # Augmented formulation uses the Householder transformation
245 |             x = h / h.norm()
246 |             alpha = torch.rsqrt(2 * (1 - x[0]))
247 |             print('Alpha: {}'.format(alpha))
248 |             a = x * alpha
249 |             a[0] -= alpha
250 |             P2 = self.identity - 2 * torch.ger(a, a)
251 |             self.prior_offset = torch.dot(h, P2[:, 0].squeeze())
252 |             print('Prior offset: {}'.format(self.prior_offset))
253 |             P2 = torch.inverse(P2)
254 |             for bstart, bend in component_batches:
255 |                 self.t_matrix[bstart:bend, :, :] = P2.matmul(self.t_matrix[bstart:bend, :, :])
256 | 
257 |     def save_npz(self, filename):
258 |         np.savez(filename, t_matrix=self.t_matrix.cpu().numpy(), means=self.means.cpu().numpy(), inv_covariances=self.inv_covariances.cpu().numpy(), prior_offset=self.prior_offset.cpu().numpy())
259 |         print('I-vector extractor saved to {}'.format(ensure_npz(filename)))
260 | 
261 |     @classmethod
262 |     def random_init(cls, ubm, settings, device, seed=0):
263 |         torch.manual_seed(seed)
264 |         t_matrix = torch.randn(ubm.covariances.size()[0], settings.ivec_dim, ubm.covariances.size()[1])
265 |         means = ubm.means.cpu().clone()
266 |         inv_covariances = ubm._inv_covariances.clone()
267 |         if settings.type == 'augmented':
268 |             prior_offset = torch.tensor([float(settings.initial_prior_offset)])   
269 |             t_matrix[:, 0, :] = means / prior_offset
270 |         else:
271 |             prior_offset = torch.tensor([float(0)])   
272 |         return IVectorExtractor(t_matrix, means, inv_covariances, prior_offset, device)
273 | 
274 |     @classmethod
275 |     def from_npz(cls, filename, device, iteration=None):
276 |         if iteration is not None:
277 |             filename = '{}.{}'.format(ensure_npz(filename, inverse=True), iteration)
278 |         data = np.load(ensure_npz(filename))
279 |         t_matrix = torch.from_numpy(data['t_matrix'])
280 |         means = torch.from_numpy(data['means'])
281 |         inv_covariances = torch.from_numpy(data['inv_covariances'])
282 |         prior_offset = torch.from_numpy(data['prior_offset'])
283 |         return IVectorExtractor(t_matrix, means, inv_covariances, prior_offset, device)
284 | 


--------------------------------------------------------------------------------
/asvtorch/ivector/posteriors.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | from kaldi.util.table import VectorWriter
  6 | from kaldi.matrix import Vector
  7 | 
  8 | import asvtorch.ivector.featureloader
  9 | import asvtorch.global_setup as gs
 10 | from asvtorch.ivector.gmm import DiagGmm
 11 | from asvtorch.kaldidata.posterior_io import PosteriorWriter
 12 | from asvtorch.misc.misc import ensure_npz
 13 | 
 14 | 
 15 | def batch_extract_posteriors(rx_specifiers, utt_ids, feature_loader, ubm, output_filename, settings):
 16 |     """Extracts posteriors using full covariance matrices. Computational requirements and disk space requirements are reduced by performing Gaussian selection using diagonal covariances and by thresholding posteriors.
 17 |     
 18 |     Arguments:
 19 |         rx_specifiers {(list, list)} --  Two lists in a tuple containing scp lines without utterance IDs for features and VAD labels, respectively.
 20 |         utt_ids {list} -- Utterance IDs.
 21 |         feature_loader {KaldiFeatureLoader} -- Feature loader.
 22 |         ubm {Gmm} -- A GMM (UBM).
 23 |         output_filename {string} -- Output filename for posteriors (without extension).
 24 |         settings {PosteriorExtractionSettings} - Settings.
 25 |     """
 26 |     
 27 |     print('Extracting posteriors for {} utterances...'.format(len(rx_specifiers[0])))
 28 | 
 29 |     dataloader = asvtorch.ivector.featureloader.get_feature_loader(rx_specifiers, feature_loader, settings.batch_size_in_frames, settings.dataloader_workers)
 30 | 
 31 |     diag_ubm = DiagGmm.from_full_gmm(ubm, gs.device)
 32 | 
 33 |     sub_batch_count = int(np.ceil(ubm.means.size()[0] / ubm.means.size()[1]))
 34 | 
 35 |     wspecifier_top_posterior = "ark,scp:{0}.ark,{0}.scp".format(output_filename)
 36 |     posterior_writer = PosteriorWriter(wspecifier_top_posterior)
 37 | 
 38 |     posterior_buffer = torch.Tensor()
 39 |     top_buffer = torch.LongTensor()
 40 |     count_buffer = torch.LongTensor()
 41 | 
 42 |     start_time = time.time()
 43 |     frame_counter = 0
 44 |     utterance_counter = 0
 45 | 
 46 |     start_time = time.time()
 47 |     for batch_index, batch in enumerate(dataloader):
 48 | 
 49 |         frames, end_points = batch
 50 |         frames = frames.to(gs.device)
 51 |         frames_in_batch = frames.size()[0]
 52 | 
 53 |         chunks = torch.chunk(frames, sub_batch_count, dim=0)
 54 |         top_gaussians = []
 55 |         for chunk in chunks:
 56 |             posteriors = diag_ubm.compute_posteriors(chunk)  
 57 |             top_gaussians.append(torch.topk(posteriors, settings.n_top_gaussians, dim=0, largest=True, sorted=False)[1])
 58 |         
 59 |         top_gaussians = torch.cat(top_gaussians, dim=1)
 60 |         
 61 |         posteriors = ubm.compute_posteriors_top_select(frames, top_gaussians)
 62 | 
 63 |         # Posterior thresholding:
 64 |         max_indices = torch.argmax(posteriors, dim=0)
 65 |         mask = posteriors.ge(settings.posterior_threshold)
 66 |         top_counts = torch.sum(mask, dim=0)
 67 |         posteriors[~mask] = 0
 68 |         divider = torch.sum(posteriors, dim=0)
 69 |         mask2 = divider.eq(0) # For detecting special cases
 70 |         posteriors[:, ~mask2] = posteriors[:, ~mask2] / divider[~mask2]
 71 |         # Special case that all the posteriors are discarded (force to use 1):
 72 |         posteriors[max_indices[mask2], mask2] = 1 
 73 |         mask[max_indices[mask2], mask2] = 1 
 74 |         top_counts[mask2] = 1
 75 | 
 76 |         # Vectorize the data & move to cpu memory
 77 |         posteriors = posteriors.t().masked_select(mask.t())
 78 |         top_gaussians = top_gaussians.t().masked_select(mask.t())
 79 |         posteriors = posteriors.cpu()
 80 |         top_gaussians = top_gaussians.cpu()
 81 |         top_counts = top_counts.cpu()
 82 | 
 83 |         end_points = end_points - frame_counter  # relative end_points in a batch
 84 | 
 85 |         if end_points.size != 0:
 86 |             # Save utterance data that continues from the previous batch:
 87 |             psave = torch.cat([posterior_buffer, posteriors[:torch.sum(top_counts[:end_points[0]])]])
 88 |             tsave = torch.cat([top_buffer, top_gaussians[:torch.sum(top_counts[:end_points[0]])]])
 89 |             csave = torch.cat([count_buffer, top_counts[:end_points[0]]])
 90 |             posterior_writer.write(utt_ids[utterance_counter], csave, psave, tsave)
 91 |             utterance_counter += 1
 92 | 
 93 |             # Save utterance data that is fully included in this batch:
 94 |             for start_point, end_point in zip(end_points[:-1], end_points[1:]):
 95 |                 psave = posteriors[torch.sum(top_counts[:start_point]):torch.sum(top_counts[:end_point])]
 96 |                 tsave = top_gaussians[torch.sum(top_counts[:start_point]):torch.sum(top_counts[:end_point])]
 97 |                 csave = top_counts[start_point:end_point]
 98 |                 posterior_writer.write(utt_ids[utterance_counter], csave, psave, tsave)
 99 |                 utterance_counter += 1
100 |             
101 |             # Buffer partial data to be used in the next batch:
102 |             posterior_buffer = posteriors[torch.sum(top_counts[:end_points[-1]]):]
103 |             top_buffer = top_gaussians[torch.sum(top_counts[:end_points[-1]]):]
104 |             count_buffer = top_counts[end_points[-1]:]
105 |         else:
106 |             # Buffer the whole data for the next batch (if the utterance is longer than the current batch (special case)):
107 |             posterior_buffer = torch.cat([posterior_buffer, posteriors])
108 |             top_buffer = torch.cat([top_buffer, top_gaussians])
109 |             count_buffer = torch.cat([count_buffer, top_counts])
110 | 
111 |         frame_counter += frames_in_batch
112 | 
113 |         print('{:.0f} seconds elapsed, batch {}/{}: {}, utterance count (roughly) = {}'.format(time.time() - start_time, batch_index+1, dataloader.__len__(), frames.size(), len(end_points)))
114 | 
115 |     posterior_writer.close()
116 |     print('Posterior computation completed in {:.3f} seconds'.format(time.time() - start_time))
117 | 
118 |     return time.time() - start_time
119 | 


--------------------------------------------------------------------------------
/asvtorch/ivector/settings.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class PosteriorExtractionSettings():
 3 |     def __init__(self):
 4 |         # general settings
 5 |         self.n_top_gaussians = 20
 6 |         self.posterior_threshold = 0.025
 7 | 
 8 |         # data loading & batching settings
 9 |         self.batch_size_in_frames = 500000
10 |         self.dataloader_workers = 4
11 |         
12 |     def print_settings(self):
13 |         print('POSTERIOR EXTRACTION SETTINGS')
14 |         print('- Number of top Gaussians to select for each frame: {}'.format(self.n_top_gaussians))
15 |         print('- Select Gaussians only if frame posterior is higher than: {}'.format(self.posterior_threshold))
16 |         
17 |         print('Data loading & batching settings')
18 |         print('- Number of data loader workers: {}'.format(self.dataloader_workers))
19 |         print('- Number of frames in a batch: {}'.format(self.batch_size_in_frames))
20 |         print('')
21 | 
22 | 
23 | 
24 | class IVectorSettings():
25 |     
26 |     def __init__(self):
27 | 
28 |         # general settings
29 |         self.ivec_dim = 400
30 |         self.type = 'kaldi' # 'standard'
31 |         
32 |         # training settings     
33 |         self.n_iterations = 5
34 |         self.initial_prior_offset = 100  # Only useful in the augmented formulation ('kaldi')
35 |         self.update_projections = True
36 |         self.update_covariances = True
37 |         self.minimum_divergence = True
38 |         self.update_means = True
39 |         
40 |         # data loading & batching settings
41 |         self.dataloader_workers = 6
42 |         self.batch_size_in_utts = 200   # Higher batch size will have higher GPU memory usage.
43 |         self.n_component_batches = 16   # must be a power of two! The higher the value, the less GPU memory will be used.
44 |         
45 |         # model saving settings
46 |         self.save_every_iteration = True
47 |         
48 |     
49 |     def print_settings(self):
50 |         print('I-VECTOR EXTRACTOR SETTINGS')
51 |         print('- I-vector type: {}'.format(self.type))
52 |         print('- I-vector dimensionality: {}'.format(self.ivec_dim))
53 |         
54 |         print('Training settings')
55 |         print('- Number of iterations: {}'.format(self.n_iterations))
56 |         if self.type == 'kaldi':
57 |             print('- Initial prior offset: {}'.format(self.initial_prior_offset))
58 |         print('- Update projections (T matrix): {}'.format(self.update_projections))
59 |         print('- Update residual covariances: {}'.format(self.update_covariances))
60 |         print('- Minimum divergence re-estimation: {}'.format(self.minimum_divergence))
61 |         print('- Update means (bias term): {}'.format(self.update_means))
62 |         
63 |         print('Data loading & batching settings')
64 |         print('- Number of data loader workers: {}'.format(self.dataloader_workers))
65 |         print('- Number of utterances in a batch: {}'.format(self.batch_size_in_utts))
66 |         print('- Number of batches for components (has to be power of 2): {}'.format(self.n_component_batches))
67 |         
68 |         print('Saving settings')
69 |         print('- Save model after every iteration: {}'.format(self.save_every_iteration))
70 |         print('')
71 | 


--------------------------------------------------------------------------------
/asvtorch/ivector/statloader.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | from torch.utils import data
  6 | import kaldi.util.io as kio
  7 | 
  8 | import asvtorch.kaldidata.posterior_io
  9 | 
 10 | class _StatDataset(data.Dataset):
 11 |     def __init__(self, rxspecifiers, feature_loader, data_dims, second_order, centering_means=None):
 12 |         self.feat_rxspecifiers = rxspecifiers[0]
 13 |         self.vad_rxspecifiers = rxspecifiers[1]
 14 |         self.posterior_rxspecifiers = rxspecifiers[2]
 15 |         self.feature_loader = feature_loader
 16 |         if centering_means is not None:
 17 |             self.centering_means = centering_means.cpu().numpy()
 18 |         else:
 19 |             self.centering_means = None
 20 |         self.second_order = second_order
 21 |         self.data_dims = data_dims
 22 |         if(second_order):
 23 |             self.second_order_sum = np.zeros((data_dims[0], data_dims[1], data_dims[1]), dtype=np.float32)
 24 | 
 25 |     def __len__(self):
 26 |         return len(self.feat_rxspecifiers)
 27 | 
 28 |     def accumulate_stats(self, feats, counts, posteriors, indices):
 29 |         """Computes 0th and 1st order statistics from the selected posteriors.
 30 |         
 31 |         Arguments:
 32 |             feats {ndarray} -- Feature array (feature vectors as rows).
 33 |             counts {ndarray} -- Array containing the numbers of selected posteriors for each frame.
 34 |             posteriors {ndarray} -- Array containing posteriors (flattened).
 35 |             indices {ndarray} -- Array containing Gaussian indices (flattened).
 36 |         
 37 |         Returns:
 38 |             ndarray -- 0th order statistics (row vector).
 39 |             ndarray -- 1st order statistics (row index = component index).
 40 |         """
 41 | 
 42 |         n = np.zeros(self.data_dims[0], dtype=np.float32)
 43 |         f = np.zeros(self.data_dims, dtype=np.float32)
 44 |         posterior_count = 0
 45 |         for frame_index in range(counts.size):
 46 |             end = posterior_count+counts[frame_index]
 47 |             gaussian_indices = indices[posterior_count:end]
 48 |             frame_posteriors = posteriors[posterior_count:end]
 49 |             n[gaussian_indices] += frame_posteriors
 50 |             f[gaussian_indices, :] += np.outer(frame_posteriors, feats[frame_index, :])
 51 |             if self.second_order:
 52 |                 if self.centering_means is not None:
 53 |                     feats_centered = np.atleast_3d(np.atleast_2d(feats[frame_index, :]) - self.centering_means[gaussian_indices, :]) # Ok: (atleast_2d and atleast_3d prepend and append dimensions, respectively)
 54 |                     feat_outer = np.matmul(feats_centered, np.transpose(feats_centered, (0, 2, 1)))
 55 |                 else:
 56 |                     feat_outer = np.outer(feats[frame_index, :], feats[frame_index, :])
 57 |                 self.second_order_sum[gaussian_indices, :, :] += frame_posteriors[:, np.newaxis, np.newaxis] * feat_outer
 58 |             posterior_count += counts[frame_index]            
 59 |         return n, f
 60 |         
 61 |     def __getitem__(self, index):
 62 |         feats = self.feature_loader.load_features(self.feat_rxspecifiers[index], self.vad_rxspecifiers[index])
 63 |         counts, posteriors, indices = asvtorch.kaldidata.posterior_io.load_posteriors(self.posterior_rxspecifiers[index])
 64 |         n, f = self.accumulate_stats(feats, counts, posteriors, indices)
 65 |         if self.centering_means is not None:
 66 |             f -= n[:, None] * self.centering_means       
 67 |         return n, f
 68 | 
 69 |     def collater(self, batch):
 70 |         """Collates sufficient statistics from many utterances to form a batch.
 71 |         
 72 |         Returns:
 73 |             Tensor -- 0th order statistics (number of utterances x number of components)
 74 |             Tensor -- 1st order statistics (#components x feat_dim x #utterances)
 75 |             Tensor -- Sum of 2nd order statistics (#components x feat_dim x feat_dim)
 76 |         """
 77 |         n, f = zip(*batch)
 78 |         n = np.stack(n, axis=0) 
 79 |         f = np.stack(f, axis=2)
 80 | 
 81 |         if self.second_order:
 82 |             s = self.second_order_sum
 83 |             self.second_order_sum = np.zeros(self.second_order_sum.shape, dtype=np.float32)  # Zero the accumulator
 84 |             return torch.from_numpy(n), torch.from_numpy(f), torch.from_numpy(s)
 85 |         else:
 86 |             return torch.from_numpy(n), torch.from_numpy(f)
 87 |             
 88 | 
 89 | def get_stat_loader(rxspecifiers, feature_loader, data_dims, batch_size, second_order, centering_means, num_workers):
 90 |     """Loads Baum-Welch statistics in batches.
 91 |     
 92 |     Arguments:
 93 |         rxspecifiers {(list, list, list)} -- Three lists in a tuple containing scp lines without utterance IDs for features, VADs, and posteriors, respectively.
 94 |         feature_loader {KaldiFeatureLoader} -- Feature loader.
 95 |         data_dims -- {tuple} (#components, feat_dim).
 96 |         batch_size {int} -- Batch size in utterances.
 97 |         second_order {boolean} -- Whether or not to compute 2nd order stats.
 98 |         centering_means {Tensor} -- Which means to use for centering statistics.
 99 |         num_workers {int} -- Number of processes used for data loading.
100 |     
101 |     Returns:
102 |         DataLoader -- A dataloader for loading Baum-Welch statistics.
103 |     """
104 |     dataset = _StatDataset(rxspecifiers, feature_loader, data_dims, second_order, centering_means)
105 |     return data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=dataset.collater)


--------------------------------------------------------------------------------
/asvtorch/kaldidata/kaldifeatloaders.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | 
 3 | import numpy as np
 4 | 
 5 | import kaldi.feat.functions as featfuncs
 6 | import kaldi.util.io as kio
 7 | 
 8 | class KaldiFeatureLoader(abc.ABC):
 9 |     @abc.abstractmethod
10 |     def load_features(self, feat_rxspecifier, vad_rxspecifier):
11 |         """Loads and processes KALDI features.
12 |         
13 |         Arguments:
14 |             feat_rxspecifier {string} -- A line from feats.scp file excluding the utterance ID
15 |             vad_rxspecifier {string} -- A line from vad.scp file excluding the utterance ID
16 |         """
17 |         pass
18 | 
19 | 
20 | class VoxcelebFeatureLoader(KaldiFeatureLoader):
21 |     """ This class is used to read features extracted by the KALDI recipe "egs/voxceleb/v1/". After loading the features are procesessed in the same manner as done in the KALDI recipe.
22 |     """
23 |     def __init__(self):
24 |         self.delta_opts = featfuncs.DeltaFeaturesOptions(order=2, window=3)
25 |         self.cmn_opts = featfuncs.SlidingWindowCmnOptions()
26 |         self.cmn_opts.center = True
27 |         self.cmn_opts.cmn_window = 300
28 |         self.cmn_opts.normalize_variance = False
29 | 
30 |     def load_features(self, feat_rxspecifier, vad_rxspecifier):
31 |         feats = kio.read_matrix(feat_rxspecifier)
32 |         vad_labels = kio.read_vector(vad_rxspecifier)
33 |         feats = featfuncs.compute_deltas(self.delta_opts, feats)
34 |         featfuncs.sliding_window_cmn(self.cmn_opts, feats, feats)
35 |         feats = feats.numpy()[vad_labels.numpy().astype(bool), :]
36 |         return feats
37 | 


--------------------------------------------------------------------------------
/asvtorch/kaldidata/posterior_io.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from kaldi.util.table import VectorWriter
 3 | import kaldi.util.io as kio
 4 | from kaldi.matrix import Vector
 5 | 
 6 | class PosteriorWriter():
 7 |     def __init__(self, wxspecifier):
 8 |         self.posterior_writer = VectorWriter(wxspecifier)
 9 | 
10 |     def write(self, utt_id, counts, posteriors, indices):
11 |         """Writes posteriors to disk in KALDI format.
12 |         
13 |         Arguments:
14 |             utt_id {string} -- Utterance ID to be written to scp file
15 |             counts {Tensor} -- Tensor containing the numbers of selected posteriors for each frame
16 |             posteriors {Tensor} -- Flattened Tensor containing all posteriors
17 |             indices {Tensor} -- Flattened Tensor containing all Gaussian indices
18 |         """
19 | 
20 |         counts = counts.numpy()
21 |         posteriors = posteriors.numpy()
22 |         indices = indices.numpy()
23 |         nframes = np.atleast_1d(np.array([counts.size]))
24 |         datavector = np.hstack([nframes, counts, posteriors, indices])
25 |         datavector = Vector(datavector)
26 |         self.posterior_writer.write(utt_id, datavector)       
27 | 
28 |     def close(self):
29 |         self.posterior_writer.close()
30 | 
31 | 
32 | def load_posteriors(rxspecifier):
33 |     """Loads posteriors stored in KALDI format from disk.
34 | 
35 |     Arguments:
36 |         rxspecifier {string} -- A line from scp file excluding the utterance ID.
37 |     
38 |     Returns:
39 |         ndarray -- Array containing the numbers of selected posteriors for each frame
40 |         ndarray -- Array containing posteriors (flattened)
41 |         ndarray -- Array containing Gaussian indices (flattened)
42 |     """
43 | 
44 |     datavector = kio.read_vector(rxspecifier)
45 |     datavector = datavector.numpy()
46 |     nframes = int(datavector[0])
47 |     counts = datavector[1:nframes+1].astype(int)
48 |     n_posteriors = (datavector.size - counts.size - 1) // 2
49 |     posteriors = datavector[nframes+1:-n_posteriors]
50 |     indices = datavector[-n_posteriors:].astype(int)
51 |     return counts, posteriors, indices


--------------------------------------------------------------------------------
/asvtorch/kaldidata/utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import random
  4 | 
  5 | import numpy as np
  6 | import kaldi.util.io as kio
  7 | 
  8 | def count_total_number_of_active_frames(vad_rxspecifiers):
  9 |     """Counts the total number of active speech frames in the given utterance list.
 10 |     
 11 |     Arguments:
 12 |         vad_rxspecifiers {list} -- List of lines of vad.scp file excluding utterance IDs.
 13 |     
 14 |     Returns:
 15 |         int -- Total number of active speech frames
 16 |         ndarray -- 1D array of frame indices that separate different utterances from each other (includes 0 as the first element and the total number of frames as the last element).
 17 |     """
 18 |     n_frames = 0
 19 |     counts = []
 20 |     for vad_specifier in vad_rxspecifiers:
 21 |         vad_labels = kio.read_vector(vad_specifier)
 22 |         n_active = np.sum(vad_labels.numpy().astype(int))
 23 |         counts.append(n_active)
 24 |         n_frames += n_active
 25 |     break_points = np.concatenate((np.atleast_1d(np.asarray(0, dtype=int)), np.cumsum(np.asarray(counts), dtype=int)))
 26 |     return n_frames, break_points
 27 | 
 28 | def load_posterior_specifiers(scp_file_without_ext):
 29 |     """Loads posterior reading specifiers from scp file.
 30 |     
 31 |     Arguments:
 32 |         scp_file_without_ext {string} -- Filename of scp file without the extension.
 33 |     
 34 |     Returns:
 35 |         list -- List of posterior reading specifiers.
 36 |     """
 37 |     scp_file = scp_file_without_ext + '.scp'
 38 |     rxspecifiers = []
 39 |     with open(scp_file) as f:
 40 |         for line in f:
 41 |             rxspecifiers.append(line.split()[-1].strip())
 42 |     return rxspecifiers
 43 | 
 44 | 
 45 | def _get_kaldi_dataset_files(folder):
 46 |     """Forms full filenames for feats.scp, vad.scp, utt2num_frames, and utt2spk files.
 47 |     
 48 |     Arguments:
 49 |         folder {string} -- Folder where the files are.
 50 |     
 51 |     Returns:
 52 |         (string, string, string, string) -- Full filenames for feats.scp, vad.scp, utt2num_frames, and utt2spk files, respectively.
 53 |     """
 54 |     feat_scp_file = os.path.join(folder, 'feats.scp')
 55 |     vad_scp_file = os.path.join(folder, 'vad.scp')
 56 |     utt2num_frames_file = os.path.join(folder, 'utt2num_frames')
 57 |     utt2spk_file = os.path.join(folder, 'utt2spk')
 58 |     return feat_scp_file, vad_scp_file, utt2num_frames_file, utt2spk_file
 59 | 
 60 | 
 61 | def _choose_utterances(data_folder, meta_folder, selected_utts):
 62 |     """Loads selected utterance and speaker IDs and feature and vad reading specifiers from the given folder (meta_folder). Fixes specifiers to point to ark files that have been moved from their original location.
 63 |     
 64 |     Arguments:
 65 |         data_folder {string} -- Used to fix specifiers to point to correct ark files in case ark files were moved from their original location. Last subfolder of data_folder should have the same name as in the original path to the ark files.
 66 |         meta_folder {string} -- Folder where the feats.scp, vad.scp, utt2num_frames, and utt2spk are.
 67 |         selected_utts {set} -- Set of utterance IDs that should be selected. If None, selects all. 
 68 |     
 69 |     Returns:
 70 |         list -- Reading specifiers for features.
 71 |         list -- Reading specifiers for VAD labels.
 72 |         list -- Utterance IDs.
 73 |         list -- Speaker IDs.
 74 |     """
 75 |     feat_scp_file, vad_scp_file, utt2num_frames_file, utt2spk_file = _get_kaldi_dataset_files(meta_folder)
 76 |     base_folder = os.sep + os.path.basename(os.path.normpath(data_folder)) + os.sep
 77 |     feat_rxfilenames = []
 78 |     vad_rxfilenames = []
 79 |     utts = []
 80 |     spks = []
 81 |     with open(feat_scp_file) as f1, open(vad_scp_file) as f2, open(utt2spk_file) as f3:
 82 |         for line1, line2, line3 in zip(f1, f2, f3):
 83 |             parts1 = line1.split()
 84 |             if selected_utts is None or parts1[0] in selected_utts:
 85 |                 parts2 = line2.split()
 86 |                 parts3 = line3.split()
 87 |                 if parts1[0] != parts2[0] or parts1[0] != parts3[0]:
 88 |                     sys.exit('Error: scp-files are not aligned!')
 89 |                 feat_loc = parts1[1].split(base_folder)[1].strip()
 90 |                 vad_loc = parts2[1].split(base_folder)[1].strip()
 91 |                 feat_rxfilenames.append(os.path.join(data_folder, feat_loc))
 92 |                 vad_rxfilenames.append(os.path.join(data_folder, vad_loc))
 93 |                 utts.append(parts1[0])
 94 |                 spks.append(parts3[1].strip())
 95 |     return feat_rxfilenames, vad_rxfilenames, utts, spks
 96 | 
 97 | 
 98 | def choose_all(data_folder, meta_folder):
 99 |     """Loads all utterance and speaker IDs and feature and vad reading specifiers from the given folder (meta_folder). Fixes specifiers to point to ark files that have been moved from their original location.
100 |     
101 |     Arguments:
102 |         data_folder {string} -- Used to fix specifiers to point to correct ark files in case ark files were moved from their original location. Last subfolder of data_folder should have the same name as in the original path to the ark files.
103 |         meta_folder {string} -- Folder where the feats.scp, vad.scp, utt2num_frames, and utt2spk are.
104 |     
105 |     Returns:
106 |         list -- Reading specifiers for features.
107 |         list -- Reading specifiers for VAD labels.
108 |         list -- Utterance IDs.
109 |         list -- Speaker IDs.
110 |     """
111 |     print('Loading all feature-specifiers, utterance labels, and speaker labels from folder {}'.format(meta_folder))
112 |     return _choose_utterances(data_folder, meta_folder, None)
113 | 
114 | 
115 | def choose_n_longest(data_folder, meta_folder, n):
116 |     """Same as choose_all function with a difference that this functions chooses the n longest utterances from the specified folder.
117 |     """
118 |     print('Loading feature-specifiers, utterance labels, and speaker labels of the {} longest files from folder {}'.format(n, meta_folder))
119 |     feat_scp_file, vad_scp_file, utt2num_frames_file, utt2spk_file = _get_kaldi_dataset_files(meta_folder)
120 |     selected_utts = set()
121 |     utts = []
122 |     num_frames = []
123 |     with open(utt2num_frames_file) as f:
124 |         for line in f:
125 |             parts = line.split()
126 |             utts.append(parts[0])
127 |             num_frames.append(int(parts[1].strip()))
128 |     num_frames = np.asarray(num_frames, dtype=int)
129 |     indices = np.argsort(num_frames)
130 |     indices = indices[-n:]       
131 |     for index in indices:
132 |         selected_utts.add(utts[index])
133 |     return _choose_utterances(data_folder, meta_folder, selected_utts)
134 | 
135 | 
136 | def choose_n_random(data_folder, meta_folder, n, seed=0):
137 |     """Same as choose_all function with a difference that this functions chooses n random utterances from the specified folder.
138 |     """
139 |     random.seed(seed)
140 |     print('Loading feature-specifiers, utterance labels, and speaker labels of {} random files from folder {}'.format(n, meta_folder))
141 |     feat_scp_file, vad_scp_file, utt2num_frames_file, utt2spk_file = _get_kaldi_dataset_files(meta_folder)
142 |     utts = []
143 |     with open(utt2num_frames_file) as f:
144 |         for line in f:
145 |             parts = line.split()
146 |             utts.append(parts[0])
147 |     return _choose_utterances(data_folder, meta_folder, set(random.sample(utts, n)))
148 | 
149 | 
150 | def choose_from_wavfile(data_folder, meta_folder, wav_file, every_nth=1):
151 |     """Chooses every n:th utterance from the wav_file.
152 |     """
153 |     print('Loading (every {}) feature-specifiers, utterance labels, and speaker labels for utterances in {} from folder {}'.format(every_nth, wav_file, meta_folder))
154 | 
155 |     utts = set()
156 |     with open(wav_file) as f:
157 |         for index, line in enumerate(f):
158 |             if index % every_nth == 0:
159 |                 parts = line.split()
160 |                 utts.add(parts[0])
161 |     return _choose_utterances(data_folder, meta_folder, utts)


--------------------------------------------------------------------------------
/asvtorch/misc/misc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from os.path import isfile, join
 4 | 
 5 | def ensure_exists(folder):
 6 |     """If the folder does not exist, create it.
 7 |     
 8 |     Arguments:
 9 |         folder {string} -- Folder.
10 |     """
11 |     if not os.path.exists(folder):
12 |         os.makedirs(folder)
13 | 
14 | def ensure_npz(filename, inverse=False):
15 |     if inverse:
16 |         if filename.endswith('.npz'):
17 |             filename = filename[:-4]
18 |     else:
19 |         if not filename.endswith('.npz'):
20 |             filename = filename + '.npz'
21 |     return filename
22 | 
23 | def ensure_tar(filename, inverse=False):
24 |     if inverse:
25 |         if filename.endswith('.tar'):
26 |             filename = filename[:-4]
27 |     else:
28 |         if not filename.endswith('.tar'):
29 |             filename = filename + '.tar'
30 |     return filename
31 | 
32 | def list_files(folder):
33 |     return [f for f in os.listdir(folder) if isfile(join(folder, f))]
34 | 
35 | def test_finiteness(tensor, description):
36 |     if (~torch.isfinite(tensor)).sum() > 0:
37 |         print('{}: NOT FINITE!'.format(description))


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | # This is an example config file.
 2 | # Format is txt, but I use py-extension to get the color highlighting that the editor provides.
 3 | 
 4 | # Lines starting with # are comment lines and the config parser ignores them.
 5 | # Comment sign # only works if it is placed in the beginning of the line.
 6 | 
 7 | # Empty lines separate settings for different runs.
 8 | # Different runs will be run one after another by run_voxceleb_xvector_standalone.py.
 9 | # After each run the settings will be reverted to their default values.
10 | # That is, settings of the previous run do not affect the settings of the next run.
11 | 
12 | # The settings that are not specified in this file will have the default values defined in the settings classes.
13 | 
14 | 
15 | # RUN 0:
16 | # Compute and save frame posteriors to disk
17 | # This can be commented out after it has been done once.
18 | recipe.start_stage = 0
19 | recipe.end_stage = 1
20 | 
21 | # RUN 1:
22 | # Try two different variations of augmented model training (with and without residual covariance updates):
23 | recipe.start_stage = 2
24 | ivector.type = ['augmented']
25 | ivector.update_covariances = [False, True]
26 | ivector.minimum_divergence = [True]
27 | ivector.update_means = [True]
28 | #Increase the number of iterations to improve performance:
29 | ivector.n_iterations = 5
30 | ivector.dataloader_workers = 44
31 | ivector.ivec_dim = 400
32 | 
33 | # RUN 2:
34 | # Try four parameter combinations [residual updates (True/False) and minimum_divergence (True/False)] with standard formulation:
35 | recipe.start_stage = 2
36 | ivector.type = ['standard']
37 | ivector.update_covariances = [True, False]
38 | ivector.minimum_divergence = [True, False]
39 | ivector.update_means = [False]
40 | #Increase the number of iterations to improve performance:
41 | ivector.n_iterations = 5
42 | ivector.dataloader_workers = 44
43 | ivector.ivec_dim = 400


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: ivectors
 2 | channels:
 3 |   - pytorch
 4 |   - pykaldi
 5 |   - defaults
 6 | dependencies:
 7 |   - _libgcc_mutex=0.1=main
 8 |   - blas=1.0=openblas
 9 |   - ca-certificates=2019.8.28=0
10 |   - certifi=2019.9.11=py37_0
11 |   - cffi=1.12.3=py37h2e261b9_0
12 |   - cudatoolkit=10.0.130=0
13 |   - freetype=2.9.1=h8a8886c_1
14 |   - intel-openmp=2019.4=243
15 |   - jpeg=9b=h024ee3a_2
16 |   - libedit=3.1.20181209=hc058e9b_0
17 |   - libffi=3.2.1=hd88cf55_4
18 |   - libgcc-ng=9.1.0=hdf63c60_0
19 |   - libgfortran-ng=7.3.0=hdf63c60_0
20 |   - libopenblas=0.3.6=h5a2b251_1
21 |   - libpng=1.6.37=hbc83047_0
22 |   - libstdcxx-ng=9.1.0=hdf63c60_0
23 |   - libtiff=4.0.10=h2733197_2
24 |   - llvmlite=0.29.0=py37hd408876_0
25 |   - mkl=2019.4=243
26 |   - ncurses=6.1=he6710b0_1
27 |   - ninja=1.9.0=py37hfd86e86_0
28 |   - nomkl=3.0=0
29 |   - numpy=1.16.5=py37h99e49ec_0
30 |   - numpy-base=1.16.5=py37h2f8d375_0
31 |   - olefile=0.46=py37_0
32 |   - openblas=0.3.6=1
33 |   - openblas-devel=0.3.6=1
34 |   - openssl=1.1.1d=h7b6447c_1
35 |   - pillow=6.1.0=py37h34e0f95_0
36 |   - pip=19.2.2=py37_0
37 |   - pycparser=2.19=py37_0
38 |   - python=3.7.4=h265db76_1
39 |   - readline=7.0=h7b6447c_5
40 |   - scipy=1.3.1=py37he2b7bc3_0
41 |   - setuptools=41.2.0=py37_0
42 |   - six=1.12.0=py37_0
43 |   - sqlite=3.29.0=h7b6447c_0
44 |   - tk=8.6.8=hbc83047_0
45 |   - wheel=0.33.6=py37_0
46 |   - xz=5.2.4=h14c3975_4
47 |   - zlib=1.2.11=h7b6447c_3
48 |   - zstd=1.3.7=h0b5b093_0
49 |   - pykaldi-cpu=0.1.3=py37h14c3975_1
50 |   - pytorch=1.2.0=py3.7_cuda10.0.130_cudnn7.6.2_0
51 |   - torchvision=0.4.0=py37_cu100
52 |   - pip:
53 |     - pykaldi==0.1.2
54 |     - torch==1.2.0
55 | 
56 | 


--------------------------------------------------------------------------------
/kaldi/egs/voxceleb/v1/extract_feats_and_train_ubm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright   2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
 3 | #             2017   Johns Hopkins University (Author: Daniel Povey)
 4 | #        2017-2018   David Snyder
 5 | #             2018   Ewald Enzinger
 6 | #
 7 | #             2019   Ville Vestman
 8 | # Apache 2.0.
 9 | 
10 | 
11 | . ./cmd.sh
12 | . ./path.sh
13 | set -e
14 | 
15 | # This script should be run from egs/voxceleb/v1/ folder of your Kaldi installation.
16 | # This script extracts MFCCs and trains the UBM following the original VoxCeleb v1 recipe.
17 | 
18 | # CHANGE THE FOLLOWING THREE FOLDERS BEFORE RUNNING THE SCRIPT:
19 | output_dir=/media/hdd2/vvestman/voxceleb_outputs
20 | voxceleb1_root=/media/hdd3/voxceleb
21 | voxceleb2_root=/media/hdd3/voxceleb2
22 | 
23 | mfccdir=$output_dir/mfcc
24 | vaddir=$output_dir/mfcc
25 | 
26 | stage=0
27 | 
28 | if [ $stage -le 0 ]; then
29 |   local/make_voxceleb2.pl $voxceleb2_root dev $output_dir/voxceleb2_train
30 |   local/make_voxceleb2.pl $voxceleb2_root test $output_dir/voxceleb2_test
31 |   local/make_voxceleb1.pl $voxceleb1_root $output_dir #IF YOU ARE USING THE NEWEST VERSION OF VOXCELEB1, THIS SCRIPT PROBABLY DOES NOT WORK (data organization changed from the original version)
32 |   
33 |   # We'll train on all of VoxCeleb2, plus the training portion of VoxCeleb1.
34 |   utils/combine_data.sh $output_dir/train $output_dir/voxceleb2_train $output_dir/voxceleb2_test $output_dir/voxceleb1_train
35 | fi
36 | 
37 | if [ $stage -le 1 ]; then
38 |   # Make MFCCs and compute the energy-based VAD for each dataset
39 |   for name in train voxceleb1_test; do
40 |     steps/make_mfcc.sh --write-utt2num-frames true \
41 |       --mfcc-config conf/mfcc.conf --nj 16 --cmd "$train_cmd" \
42 |       $output_dir/${name} $output_dir/make_mfcc $mfccdir
43 |     utils/fix_data_dir.sh $output_dir/${name}
44 |     sid/compute_vad_decision.sh --nj 16 --cmd "$train_cmd" \
45 |       $output_dir/${name} $output_dir/make_vad $vaddir
46 |     utils/fix_data_dir.sh $output_dir/${name}
47 |   done
48 | fi
49 | 
50 | if [ $stage -le 2 ]; then
51 |   # Train the UBM.
52 |   sid/train_diag_ubm.sh --cmd "$train_cmd --mem 20G" \
53 |     --nj 16 --num-threads 8 \
54 |     $output_dir/train 2048 \
55 |     $output_dir/diag_ubm
56 | 
57 |   sid/train_full_ubm.sh --cmd "$train_cmd --mem 40G" \
58 |     --nj 16 --remove-low-count-gaussians false \
59 |     $output_dir/train \
60 |     $output_dir/diag_ubm $output_dir/full_ubm
61 | fi
62 | 
63 | echo "Done!"
64 | 


--------------------------------------------------------------------------------
/run_voxceleb_ivector.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import socket
  3 | import datetime
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | 
  8 | import asvtorch.kaldidata.kaldifeatloaders
  9 | import asvtorch.kaldidata.utils
 10 | from asvtorch.misc.misc import ensure_exists
 11 | import asvtorch.ivector.posteriors
 12 | import asvtorch.ivector
 13 | import asvtorch.ivector.ivector_extractor
 14 | import asvtorch.ivector.settings
 15 | import asvtorch.evaluation.trials
 16 | import asvtorch.evaluation.eval_metrics
 17 | from asvtorch.ivector.gmm import Gmm
 18 | from asvtorch.backend.plda import Plda
 19 | from asvtorch.backend.vector_processing import VectorProcessor
 20 | from asvtorch.evaluation.parameters import ParameterChanger
 21 | 
 22 | # UPDATE THIS TO THE SAME FOLDER THAT WAS USED IN THE KALDI SCRIPT FOR OUTPUTS:
 23 | DATA_FOLDER = '/media/hdd2/vvestman/voxceleb_outputs'
 24 | 
 25 | TRY_TO_USE_GPU = True
 26 | 
 27 | if TRY_TO_USE_GPU:
 28 |     if torch.cuda.is_available():
 29 |         asvtorch.global_setup.device = torch.device("cuda:0")
 30 |         print('Using GPU!')
 31 | 
 32 | print('Loading settings...')
 33 | posterior_extraction_settings = asvtorch.ivector.settings.PosteriorExtractionSettings()
 34 | posterior_extraction_settings.dataloader_workers = 4
 35 | ivector_settings = asvtorch.ivector.settings.IVectorSettings()
 36 | 
 37 | class RecipeSettings():
 38 |     def __init__(self):
 39 |         self.start_stage = 0
 40 |         self.end_stage = 3
 41 |         self.plda_dim = 200
 42 | recipe_settings = RecipeSettings()
 43 | 
 44 | 
 45 | posterior_extraction_settings.print_settings()
 46 | ivector_settings.print_settings()
 47 | 
 48 | parameter_changer = ParameterChanger('config.py', {'ivector': ivector_settings, 'recipe': recipe_settings})
 49 | ensure_exists(os.path.join(DATA_FOLDER, 'results'))
 50 | resultfile = open(os.path.join(DATA_FOLDER, 'results', 'results_{}.txt'.format(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))), 'w')
 51 | 
 52 | 
 53 | # Input data:
 54 | TRAIN_FOLDER = os.path.join(DATA_FOLDER, 'train')
 55 | TEST_FOLDER = os.path.join(DATA_FOLDER, 'voxceleb1_test')
 56 | TRIAL_FILE = os.path.join(TEST_FOLDER, 'trials')
 57 | UBM_FILE = os.path.join(DATA_FOLDER, 'full_ubm', 'final.ubm')
 58 | VOX1_TRAIN_WAVFILE = os.path.join(DATA_FOLDER, 'voxceleb1_train', 'wav.scp')
 59 | 
 60 | # Output files:
 61 | IVEC_TRAINING_POSTERIOR_FILE = os.path.join(DATA_FOLDER, 'posteriors', 'ivec_posteriors')
 62 | BACKEND_TRAINING_POSTERIOR_FILE = os.path.join(DATA_FOLDER, 'posteriors', 'backend_posteriors')
 63 | TESTING_POSTERIOR_FILE = os.path.join(DATA_FOLDER, 'posteriors', 'testing_posteriors')
 64 | ensure_exists(os.path.join(DATA_FOLDER, 'posteriors'))
 65 | 
 66 | EXTRACTOR_OUTPUT_FILE = os.path.join(DATA_FOLDER, 'iextractor', 'iextractor')
 67 | ensure_exists(os.path.join(DATA_FOLDER, 'iextractor'))
 68 | 
 69 | 
 70 | print('Initializing feature loader...')
 71 | feature_loader = asvtorch.kaldidata.kaldifeatloaders.VoxcelebFeatureLoader()
 72 | print('Loading KALDI UBM...')
 73 | ubm = Gmm.from_kaldi(UBM_FILE, asvtorch.global_setup.device)
 74 | 
 75 | 
 76 | # Dataset preparation
 77 | print('Choosing dataset for i-vector extractor training...')
 78 | feat_rxspecifiers, vad_rxspecifiers, utt_ids, spk_ids = asvtorch.kaldidata.utils.choose_n_longest(DATA_FOLDER, TRAIN_FOLDER, 100000)
 79 | rxspecifiers = (feat_rxspecifiers, vad_rxspecifiers)
 80 | 
 81 | print('Choosing dataset for PLDA training...')
 82 | plda_feat_rxspecifiers, plda_vad_rxspecifiers, plda_utt_ids, plda_spk_ids = asvtorch.kaldidata.utils.choose_from_wavfile(DATA_FOLDER, TRAIN_FOLDER, VOX1_TRAIN_WAVFILE, 1)
 83 | plda_rxspecifiers = (plda_feat_rxspecifiers, plda_vad_rxspecifiers)
 84 | 
 85 | test_feat_rxspecifiers, test_vad_rxspecifiers, test_utt_ids, test_spk_ids = asvtorch.kaldidata.utils.choose_all(DATA_FOLDER, TEST_FOLDER)
 86 | test_rxspecifiers = (test_feat_rxspecifiers, test_vad_rxspecifiers)
 87 | 
 88 | 
 89 | while parameter_changer.next(): 
 90 | 
 91 |     # Frame posterior extraction
 92 |     if recipe_settings.start_stage <= 1 <= recipe_settings.end_stage:
 93 |         asvtorch.ivector.posteriors.batch_extract_posteriors(rxspecifiers, utt_ids, feature_loader, ubm, IVEC_TRAINING_POSTERIOR_FILE, posterior_extraction_settings)
 94 |         asvtorch.ivector.posteriors.batch_extract_posteriors(plda_rxspecifiers, plda_utt_ids, feature_loader, ubm, BACKEND_TRAINING_POSTERIOR_FILE, posterior_extraction_settings)
 95 |         asvtorch.ivector.posteriors.batch_extract_posteriors(test_rxspecifiers, test_utt_ids, feature_loader, ubm, TESTING_POSTERIOR_FILE, posterior_extraction_settings)
 96 | 
 97 | 
 98 |     # Preparing data with posteriors
 99 |     posterior_rxspecifiers = asvtorch.kaldidata.utils.load_posterior_specifiers(IVEC_TRAINING_POSTERIOR_FILE)
100 |     rxspecifiers = (*rxspecifiers, posterior_rxspecifiers)  # Tuple of three elements: (feats, vad, posteriors)
101 |     plda_posterior_rxspecifiers = asvtorch.kaldidata.utils.load_posterior_specifiers(BACKEND_TRAINING_POSTERIOR_FILE)
102 |     plda_rxspecifiers = (*plda_rxspecifiers, plda_posterior_rxspecifiers)  # Tuple of three elements: (feats, vad, posteriors)
103 |     test_posterior_rxspecifiers = asvtorch.kaldidata.utils.load_posterior_specifiers(TESTING_POSTERIOR_FILE)
104 |     test_rxspecifiers = (*test_rxspecifiers, test_posterior_rxspecifiers)  # Tuple of three elements: (feats, vad, posteriors)
105 | 
106 | 
107 | 
108 |     if recipe_settings.start_stage <= 2 <= recipe_settings.end_stage:
109 |   
110 |         # I-vector extractor training
111 |         ivector_extractor = asvtorch.ivector.ivector_extractor.IVectorExtractor.random_init(ubm, ivector_settings, asvtorch.global_setup.device, seed=0)
112 |         iteration_times = ivector_extractor.train(rxspecifiers, feature_loader, EXTRACTOR_OUTPUT_FILE, ivector_settings)
113 | 
114 |         for iteration in range(1, ivector_settings.n_iterations + 1):
115 | 
116 |             ivector_extractor = asvtorch.ivector.ivector_extractor.IVectorExtractor.from_npz(EXTRACTOR_OUTPUT_FILE, asvtorch.global_setup.device, iteration)
117 |             
118 |             # Extracting i-vectors
119 |             plda_training_vectors = ivector_extractor.extract(plda_rxspecifiers, feature_loader, ivector_settings)
120 |             test_vectors = ivector_extractor.extract(test_rxspecifiers, feature_loader, ivector_settings)
121 |   
122 |             # Processing i-vectors
123 |             vector_processor = VectorProcessor.train(plda_training_vectors, 'cwl', asvtorch.global_setup.device)
124 |             plda_training_vectors = vector_processor.process(plda_training_vectors)
125 |             test_vectors = vector_processor.process(test_vectors)
126 |             
127 |             # Training PLDA
128 |             plda = Plda.train_closed_form(plda_training_vectors, plda_spk_ids, asvtorch.global_setup.device)
129 |         
130 |             # Arranging trials
131 |             left_vectors, right_vectors, labels = asvtorch.evaluation.trials.organize_trials(test_vectors, test_utt_ids, TRIAL_FILE)
132 |             
133 |             # Scoring
134 |             scores = plda.score_trials(left_vectors, right_vectors, recipe_settings.plda_dim)
135 |             eer = asvtorch.evaluation.eval_metrics.compute_eer(scores[labels], scores[~labels])[0]
136 | 
137 |             # Printing results
138 |             print(parameter_changer.get_current_string(compact=False))
139 |             print('EER: {:.4f} %'.format(eer*100))
140 |             resultfile.write('{:.4f} {:.2f} {} {}\n'.format(eer*100, iteration_times[iteration-1], iteration, parameter_changer.get_value_string()))
141 |             resultfile.flush()
142 | 
143 | resultfile.close()
144 | 


--------------------------------------------------------------------------------