├── dataset ├── source1.wav ├── source2.wav ├── source3.wav ├── source4.wav ├── source5.wav ├── source6.wav ├── source7.wav ├── source8.wav └── source9.wav ├── LICENSE ├── README.md └── GAD.py /dataset/source1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source1.wav -------------------------------------------------------------------------------- /dataset/source2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source2.wav -------------------------------------------------------------------------------- /dataset/source3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source3.wav -------------------------------------------------------------------------------- /dataset/source4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source4.wav -------------------------------------------------------------------------------- /dataset/source5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source5.wav -------------------------------------------------------------------------------- /dataset/source6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source6.wav -------------------------------------------------------------------------------- /dataset/source7.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source7.wav -------------------------------------------------------------------------------- /dataset/source8.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source8.wav -------------------------------------------------------------------------------- /dataset/source9.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source9.wav -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Drifty 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Greedy-Adaptive-Dictionary 2 | 3 | This is a python implementation of [(Jafari et. al)](http://ieeexplore.ieee.org/document/5776648/). They present a greedy adaptive dictionary learning algorithm that sets out to find sparse atoms for speech signals. The algorithm learns the dictionary atoms on data frames taken from a speech signal. It iteratively extracts the data frame with minimum sparsity index, and adds this to the dictionary matrix. The contribution of this atom to the data frames is then removed, and the process is repeated. The algorithm is found to yield a sparse signal decomposition, supporting the hypothesis of a link between sparsity in the decomposition and dictionary. The algorithm is applied to the problem of speech representation and speech denoising, and its performance is compared to other existing methods. 4 | 5 | 6 | # Pre-requisites 7 | 8 | - Python 2.7 or greater
9 | - Librosa or any other package for reading audio signals 10 | 11 | # Usage 12 | 13 | `git clone https://github.com/DavideNardone/Greedy-Adaptive-Dictionary.git`
14 | 15 | `unzip Greedy-Adaptive-Dictionary-master.py` 16 | 17 | then... run the following python file: 18 | 19 | `GAD.py (naive example)`
20 | 21 | # Authors 22 | 23 | Davide Nardone, University of Naples Parthenope, Science and Techonlogies Departement,
Msc Applied Computer Science
24 | https://www.linkedin.com/in/davide-nardone-127428102 25 | 26 | # Contacts 27 | 28 | For any kind of problem, questions, ideas or suggestions, please don't esitate to contact me at: 29 | - **davide.nardone@studenti.uniparthenope.it** 30 | 31 | # References 32 | 33 | [Jafari et. al]: Jafari, Maria G., and Mark D. Plumbley. "Fast dictionary learning for sparse representations of speech signals." IEEE Journal of Selected Topics in Signal Processing 5.5 (2011): 1025-1031. 34 | -------------------------------------------------------------------------------- /GAD.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from numpy import linalg as LA 3 | 4 | import numpy as np 5 | 6 | np.set_printoptions(threshold=np.inf) 7 | import sys 8 | import librosa 9 | import matplotlib.pyplot as plt 10 | 11 | 12 | def buffer(signal, L, M): 13 | if M >= L: 14 | print ('Error: Overlapping windows cannot be larger than frame length!') 15 | sys.exit() 16 | 17 | len_signal = len(signal) 18 | 19 | print ('The signal length is %s: ' % (len_signal)) 20 | 21 | K = np.ceil(len_signal / L).astype('int') # num_frames 22 | 23 | print ('The number of frames \'K\' is %s: ' % (K)) 24 | print ('The length of each frame \'L\' is %s: ' % (L)) 25 | 26 | X_tmp = [] 27 | k = 1 28 | while (True): 29 | start_ind = ((k - 1) * (L - M) + 1) - 1 30 | end_ind = ((k * L) - (k - 1) * M) 31 | if start_ind == len_signal: 32 | break 33 | elif (end_ind > len_signal): 34 | # print ('k=%s, [%s, %s] ' % (k, start_ind, end_ind - 1)) 35 | val_in = len_signal - start_ind 36 | tmp_seg = np.zeros(L) 37 | tmp_seg[:val_in] = signal[start_ind:] 38 | X_tmp.append(tmp_seg) 39 | break 40 | else: 41 | # print ('k=%s, [%s, %s] ' % (k, start_ind, end_ind - 1)) 42 | X_tmp.append(signal[start_ind:end_ind]) 43 | k += 1 44 | 45 | return X_tmp 46 | 47 | 48 | def unbuffer(X, hop): 49 | N, L = X.shape 50 | 51 | T = N + L * hop 52 | K = np.arange(0, N) 53 | x = np.zeros(T) 54 | H = np.hanning(N) 55 | for k in xrange(0, L): 56 | x[K] = x[K] + np.multiply(H, X[:, k]) 57 | K = K + hop 58 | 59 | return x 60 | 61 | 62 | class GAD(): 63 | def __init__(self, X, params): 64 | 65 | self.X = X 66 | self.D = [] 67 | self.params = params 68 | self.n_iter = self.params['rule_1']['n_iter'] # num_iterations 69 | self.verbose = self.params['verbose'] 70 | 71 | self.K = self.X.shape[0] # sample length 72 | self.L = self.X.shape[1] # maximum atoms to be learned 73 | 74 | self.I = np.arange(0, self.L) 75 | self.set_ind = [] 76 | 77 | def findResidualColumn(self): 78 | 79 | # Find residual column of R^l with lowest l1- to l2-norm ration 80 | tmp = [] 81 | 82 | # COMPACT WAY TO DO IT 83 | # tmp2 = np.sum(np.abs(self.R),axis=0)/np.sqrt(np.sum(np.power(np.abs(self.R),2),axis=0)) 84 | for k in self.I: 85 | r_k = self.R[:, k] 86 | tmp.append(LA.norm(r_k, 1) / LA.norm(r_k, 2)) 87 | 88 | ind_k_min = np.nanargmin(tmp) # nanargmin, nanmin 89 | k_min = tmp[ind_k_min] 90 | r_k_min = self.R[:, self.I[ind_k_min]] 91 | 92 | # Set the l-th atom to equal to normalized r_k 93 | psi = r_k_min / LA.norm(r_k_min, 2) 94 | 95 | # Add to the dictionary D and its index and shrinking set I 96 | self.D.append(psi) 97 | self.set_ind.append(self.I[ind_k_min]) 98 | 99 | # COMPACT WAY TO DO IT 100 | # self.R = self.R - np.dot(np.outer(psi, psi), self.R) 101 | # self.R = np.delete(self.R, (ind_k_min), axis=1) 102 | 103 | # Compute the new residual for all columns k 104 | for k in self.I: 105 | r_k = self.R[:, k] 106 | alpha = np.dot(r_k, psi) 107 | self.R[:, k] = r_k - np.dot(psi, alpha) 108 | 109 | self.I = np.delete(self.I, ind_k_min) 110 | 111 | def iterative_GAD(self): 112 | 113 | # X columns w/ unit L2-norm 114 | # for k in xrange(0,self.X.shape[1]): 115 | # self.X[:,k] = np.divide(self.X[:,k],LA.norm(self.X[:,k],2)) 116 | 117 | 118 | if self.n_iter > self.L: 119 | print ('Cannot be learned more than %d atom!' % (self.L)) 120 | sys.exit() 121 | 122 | # Initializating the residual matrix 'R' by using 'X' 123 | self.R = self.X.copy() 124 | 125 | print self.I.shape 126 | for l in xrange(0, self.n_iter): 127 | 128 | if self.verbose == True: 129 | print 'GAD iteration: ', l + 1 130 | 131 | self.findResidualColumn() 132 | 133 | self.D = np.vstack(self.D).T 134 | 135 | return self.D, self.set_ind 136 | 137 | 138 | if __name__ == '__main__': 139 | L = 512 # frame length 140 | M = 500 # overlapping windows 141 | 142 | params = { 143 | 144 | 'rule_1': { 145 | 'n_iter': 10 # n_iter 146 | }, 147 | 148 | 'rule_2': { 149 | 'error': 10 ** -7 150 | }, 151 | 152 | 'verbose': True 153 | } 154 | 155 | signal, fs = librosa.core.load('./dataset/source1.wav') 156 | 157 | X_tmp = buffer(signal, L, M) 158 | 159 | # new matrix LxK 160 | X = np.vstack(X_tmp).T.astype('float') 161 | 162 | # ??? 163 | # if X.shape[1] < L: 164 | # print 'The number of frames %s has to be greater than its own length %s'%(X.shape[1],X.shape[0]) 165 | # sys.exit() 166 | 167 | alg = GAD(X, params) 168 | 169 | D, I = alg.iterative_GAD() 170 | 171 | X_t = np.dot(np.dot(D, D.T), X) 172 | 173 | s_rec = unbuffer(X_t, L - M) 174 | 175 | plt.figure(1) 176 | plt.title('Original signal') 177 | plt.plot(signal) 178 | 179 | plt.figure(2) 180 | plt.title('Reconstructed signal') 181 | plt.plot(s_rec) 182 | 183 | plt.show() --------------------------------------------------------------------------------