├── dataset
├── source1.wav
├── source2.wav
├── source3.wav
├── source4.wav
├── source5.wav
├── source6.wav
├── source7.wav
├── source8.wav
└── source9.wav
├── LICENSE
├── README.md
└── GAD.py
/dataset/source1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source1.wav
--------------------------------------------------------------------------------
/dataset/source2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source2.wav
--------------------------------------------------------------------------------
/dataset/source3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source3.wav
--------------------------------------------------------------------------------
/dataset/source4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source4.wav
--------------------------------------------------------------------------------
/dataset/source5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source5.wav
--------------------------------------------------------------------------------
/dataset/source6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source6.wav
--------------------------------------------------------------------------------
/dataset/source7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source7.wav
--------------------------------------------------------------------------------
/dataset/source8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source8.wav
--------------------------------------------------------------------------------
/dataset/source9.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DavideNardone/Greedy-Adaptive-Dictionary/HEAD/dataset/source9.wav
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2013 Drifty
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | this software and associated documentation files (the "Software"), to deal in
7 | the Software without restriction, including without limitation the rights to
8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Greedy-Adaptive-Dictionary
2 |
3 | This is a python implementation of [(Jafari et. al)](http://ieeexplore.ieee.org/document/5776648/). They present a greedy adaptive dictionary learning algorithm that sets out to find sparse atoms for speech signals. The algorithm learns the dictionary atoms on data frames taken from a speech signal. It iteratively extracts the data frame with minimum sparsity index, and adds this to the dictionary matrix. The contribution of this atom to the data frames is then removed, and the process is repeated. The algorithm is found to yield a sparse signal decomposition, supporting the hypothesis of a link between sparsity in the decomposition and dictionary. The algorithm is applied to the problem of speech representation and speech denoising, and its performance is compared to other existing methods.
4 |
5 |
6 | # Pre-requisites
7 |
8 | - Python 2.7 or greater
9 | - Librosa or any other package for reading audio signals
10 |
11 | # Usage
12 |
13 | `git clone https://github.com/DavideNardone/Greedy-Adaptive-Dictionary.git`
14 |
15 | `unzip Greedy-Adaptive-Dictionary-master.py`
16 |
17 | then... run the following python file:
18 |
19 | `GAD.py (naive example)`
20 |
21 | # Authors
22 |
23 | Davide Nardone, University of Naples Parthenope, Science and Techonlogies Departement,
Msc Applied Computer Science
24 | https://www.linkedin.com/in/davide-nardone-127428102
25 |
26 | # Contacts
27 |
28 | For any kind of problem, questions, ideas or suggestions, please don't esitate to contact me at:
29 | - **davide.nardone@studenti.uniparthenope.it**
30 |
31 | # References
32 |
33 | [Jafari et. al]: Jafari, Maria G., and Mark D. Plumbley. "Fast dictionary learning for sparse representations of speech signals." IEEE Journal of Selected Topics in Signal Processing 5.5 (2011): 1025-1031.
34 |
--------------------------------------------------------------------------------
/GAD.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from numpy import linalg as LA
3 |
4 | import numpy as np
5 |
6 | np.set_printoptions(threshold=np.inf)
7 | import sys
8 | import librosa
9 | import matplotlib.pyplot as plt
10 |
11 |
12 | def buffer(signal, L, M):
13 | if M >= L:
14 | print ('Error: Overlapping windows cannot be larger than frame length!')
15 | sys.exit()
16 |
17 | len_signal = len(signal)
18 |
19 | print ('The signal length is %s: ' % (len_signal))
20 |
21 | K = np.ceil(len_signal / L).astype('int') # num_frames
22 |
23 | print ('The number of frames \'K\' is %s: ' % (K))
24 | print ('The length of each frame \'L\' is %s: ' % (L))
25 |
26 | X_tmp = []
27 | k = 1
28 | while (True):
29 | start_ind = ((k - 1) * (L - M) + 1) - 1
30 | end_ind = ((k * L) - (k - 1) * M)
31 | if start_ind == len_signal:
32 | break
33 | elif (end_ind > len_signal):
34 | # print ('k=%s, [%s, %s] ' % (k, start_ind, end_ind - 1))
35 | val_in = len_signal - start_ind
36 | tmp_seg = np.zeros(L)
37 | tmp_seg[:val_in] = signal[start_ind:]
38 | X_tmp.append(tmp_seg)
39 | break
40 | else:
41 | # print ('k=%s, [%s, %s] ' % (k, start_ind, end_ind - 1))
42 | X_tmp.append(signal[start_ind:end_ind])
43 | k += 1
44 |
45 | return X_tmp
46 |
47 |
48 | def unbuffer(X, hop):
49 | N, L = X.shape
50 |
51 | T = N + L * hop
52 | K = np.arange(0, N)
53 | x = np.zeros(T)
54 | H = np.hanning(N)
55 | for k in xrange(0, L):
56 | x[K] = x[K] + np.multiply(H, X[:, k])
57 | K = K + hop
58 |
59 | return x
60 |
61 |
62 | class GAD():
63 | def __init__(self, X, params):
64 |
65 | self.X = X
66 | self.D = []
67 | self.params = params
68 | self.n_iter = self.params['rule_1']['n_iter'] # num_iterations
69 | self.verbose = self.params['verbose']
70 |
71 | self.K = self.X.shape[0] # sample length
72 | self.L = self.X.shape[1] # maximum atoms to be learned
73 |
74 | self.I = np.arange(0, self.L)
75 | self.set_ind = []
76 |
77 | def findResidualColumn(self):
78 |
79 | # Find residual column of R^l with lowest l1- to l2-norm ration
80 | tmp = []
81 |
82 | # COMPACT WAY TO DO IT
83 | # tmp2 = np.sum(np.abs(self.R),axis=0)/np.sqrt(np.sum(np.power(np.abs(self.R),2),axis=0))
84 | for k in self.I:
85 | r_k = self.R[:, k]
86 | tmp.append(LA.norm(r_k, 1) / LA.norm(r_k, 2))
87 |
88 | ind_k_min = np.nanargmin(tmp) # nanargmin, nanmin
89 | k_min = tmp[ind_k_min]
90 | r_k_min = self.R[:, self.I[ind_k_min]]
91 |
92 | # Set the l-th atom to equal to normalized r_k
93 | psi = r_k_min / LA.norm(r_k_min, 2)
94 |
95 | # Add to the dictionary D and its index and shrinking set I
96 | self.D.append(psi)
97 | self.set_ind.append(self.I[ind_k_min])
98 |
99 | # COMPACT WAY TO DO IT
100 | # self.R = self.R - np.dot(np.outer(psi, psi), self.R)
101 | # self.R = np.delete(self.R, (ind_k_min), axis=1)
102 |
103 | # Compute the new residual for all columns k
104 | for k in self.I:
105 | r_k = self.R[:, k]
106 | alpha = np.dot(r_k, psi)
107 | self.R[:, k] = r_k - np.dot(psi, alpha)
108 |
109 | self.I = np.delete(self.I, ind_k_min)
110 |
111 | def iterative_GAD(self):
112 |
113 | # X columns w/ unit L2-norm
114 | # for k in xrange(0,self.X.shape[1]):
115 | # self.X[:,k] = np.divide(self.X[:,k],LA.norm(self.X[:,k],2))
116 |
117 |
118 | if self.n_iter > self.L:
119 | print ('Cannot be learned more than %d atom!' % (self.L))
120 | sys.exit()
121 |
122 | # Initializating the residual matrix 'R' by using 'X'
123 | self.R = self.X.copy()
124 |
125 | print self.I.shape
126 | for l in xrange(0, self.n_iter):
127 |
128 | if self.verbose == True:
129 | print 'GAD iteration: ', l + 1
130 |
131 | self.findResidualColumn()
132 |
133 | self.D = np.vstack(self.D).T
134 |
135 | return self.D, self.set_ind
136 |
137 |
138 | if __name__ == '__main__':
139 | L = 512 # frame length
140 | M = 500 # overlapping windows
141 |
142 | params = {
143 |
144 | 'rule_1': {
145 | 'n_iter': 10 # n_iter
146 | },
147 |
148 | 'rule_2': {
149 | 'error': 10 ** -7
150 | },
151 |
152 | 'verbose': True
153 | }
154 |
155 | signal, fs = librosa.core.load('./dataset/source1.wav')
156 |
157 | X_tmp = buffer(signal, L, M)
158 |
159 | # new matrix LxK
160 | X = np.vstack(X_tmp).T.astype('float')
161 |
162 | # ???
163 | # if X.shape[1] < L:
164 | # print 'The number of frames %s has to be greater than its own length %s'%(X.shape[1],X.shape[0])
165 | # sys.exit()
166 |
167 | alg = GAD(X, params)
168 |
169 | D, I = alg.iterative_GAD()
170 |
171 | X_t = np.dot(np.dot(D, D.T), X)
172 |
173 | s_rec = unbuffer(X_t, L - M)
174 |
175 | plt.figure(1)
176 | plt.title('Original signal')
177 | plt.plot(signal)
178 |
179 | plt.figure(2)
180 | plt.title('Reconstructed signal')
181 | plt.plot(s_rec)
182 |
183 | plt.show()
--------------------------------------------------------------------------------