├── .gitignore
├── IOMethods.py
├── LICENSE
├── MaskingMethods.py
├── QMF
    ├── __init__.py
    ├── old_vers
    │   ├── IOMethods.py
    │   ├── __init__.py
    │   ├── foldingmat_filterbank.py
    │   ├── mixed.wav
    │   ├── mixedrek.wav
    │   ├── qmf1024_8x.mat
    │   ├── qmf1024qn.mat
    │   ├── qmf_realtime.py
    │   ├── qmf_realtime_analysis.py
    │   ├── qmf_realtime_sinmod.py
    │   └── qmf_realtime_synthesis.py
    ├── qmf.dat
    ├── qmf1024.mat
    ├── qmf1024_8x.mat
    ├── qmf1024qn.mat
    ├── qmf_comp_call.py
    ├── qmf_realtime_class.py
    └── xn.mat
├── README.md
├── TFMethods.py
└── testFiles
    ├── mixed.wav
    ├── mt_test.py
    ├── pulse.wav
    ├── ramp.wav
    └── sc03_16m.wav


/.gitignore:
--------------------------------------------------------------------------------
 1 | # OS generated files #
 2 | ######################
 3 | .DS_Store
 4 | .DS_Store?
 5 | ._*
 6 | .Spotlight-V100
 7 | .Trashes
 8 | ehthumbs.db
 9 | Thumbs.db
10 | *~
11 | 
12 | # Py-Generated files #
13 | ######################
14 | *.pyc
15 | *.py~
16 | *.md~
17 | __pycache__/
18 | *.py[cod]
19 | *.bin
20 | 


--------------------------------------------------------------------------------
/IOMethods.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | __author__ = 'S.I. Mimilakis'
  3 | __copyright__ = 'MacSeNet'
  4 | 
  5 | import os, subprocess, csv
  6 | import numpy as np
  7 | import wave as _wave
  8 | from scipy.io.wavfile import write, read
  9 | from sys import platform
 10 | 
 11 | class AudioIO:
 12 | 	""" Class for handling audio input/output operations.
 13 | 	    It supports reading and writing of various audio formats
 14 | 	    via 'audioRead' & 'audioWrite' methods. Moreover playback
 15 | 	    can be performed by using 'sound' method. For formats
 16 | 	    different than '.wav' a coder is needed. In this case
 17 | 	    libffmpeg is being used, where the absolute path of
 18 | 	    the static build should be given to the class variable.
 19 | 	    Finally, energy normalisation and anti-clipping methods
 20 | 	    are also covered in the last two methods.
 21 | 
 22 | 		Basic Usage examples:
 23 | 		Import the class :
 24 | 		import IOMethods as IO
 25 | 		-For loading wav files:
 26 | 			x, fs = IO.AudioIO.wavRead('myWavFile.wav', mono = True)
 27 | 		-In case that compressed files are about to be read specify
 28 | 			the path to the libffmpeg library by changing the 'pathToffmpeg'
 29 | 			variable and then type:
 30 | 			x, fs = IO.AudioIO.audioRead()
 31 | 		-For writing wav files:
 32 | 			IO.AudioIO.audioWrite(x, fs, 16, 'myNewWavFile.wav', 'wav')
 33 | 
 34 | 		-For listening wav files:
 35 | 			IO.AudioIO.sound(x,fs)
 36 | 
 37 | 	"""
 38 |     # Normalisation parameters for wavreading and writing
 39 | 	normFact = {'int8' : (2**7) -1,
 40 | 				'int16': (2**15)-1,
 41 | 				'int24': (2**23)-1,
 42 | 				'int32': (2**31)-1,
 43 | 				'int64': (2**63)-1,
 44 | 				'float32': 1.0,
 45 | 				'float64': 1.0}
 46 | 
 47 | 	# 'Silence' the bash output
 48 | 	FNULL = open(os.devnull, 'w')
 49 | 
 50 | 	# Absolute path needed here
 51 | 	pathToffmpeg = '/home/mis/Documents/Python/Projects/SourceSeparation/MiscFiles'
 52 | 
 53 | 
 54 | 	def __init__(self):
 55 | 		pass
 56 | 
 57 | 	@staticmethod
 58 | 	def audioRead(fileName, mono=False):
 59 | 		""" Function to load audio files such as *.mp3, *.au, *.wma, *.m4a, *.x-wav & *.aiff.
 60 | 			It first converts them to .wav and reads them with the methods below.
 61 | 			Currently, it uses a static build of ffmpeg.
 62 | 
 63 |         Args:
 64 |             fileName:       (str)       Absolute filename of WAV file
 65 |             mono:           (bool)      Switch if samples should be converted to mono
 66 |         Returns:
 67 |             samples:        (np array)  Audio samples (between [-1,1]
 68 |                                         (if stereo: numSamples x numChannels,
 69 |                                         if mono: numSamples)
 70 |             sampleRate:     (float):    Sampling frequency [Hz]
 71 |         """
 72 | 
 73 | 		# Get the absolute path
 74 | 		fileName = os.path.abspath(fileName)
 75 | 
 76 | 		# Linux
 77 | 		if (platform == "linux") or (platform == "linux2"):
 78 | 			convDict = {
 79 | 				'm4a':[os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux')
 80 | 					+ ' -i ' + fileName + ' ', -3],
 81 | 				'mp3':[os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux')
 82 | 					+ ' -i ' + fileName + ' ', -3],
 83 | 				'au': [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux')
 84 | 					 + ' -i ' + fileName + ' ', -2],
 85 | 				'wma':[os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux')
 86 | 					 + ' -i ' + fileName + ' ', -3],
 87 | 				'aiff':[os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux')
 88 | 					 + ' -i ' + fileName + ' ', -4],
 89 | 				'wav':[os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux')
 90 | 					 + ' -i ' + fileName + ' ', -3]
 91 | 						}
 92 | 
 93 | 		# MacOSX
 94 | 		elif (platform == "darwin"):
 95 | 			convDict = {
 96 | 				'm4a':[os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx')
 97 | 					+ ' -i ' + fileName + ' ', -3],
 98 | 				'mp3':[os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx')
 99 | 					+ ' -i ' + fileName + ' ', -3],
100 | 				'au': [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx')
101 | 					 + ' -i ' + fileName + ' ', -2],
102 | 				'wma':[os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx')
103 | 					 + ' -i ' + fileName + ' ', -3],
104 | 				'aiff': [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx')
105 | 					 + ' -i ' + fileName + ' ', -4],
106 | 				'wav': [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx')
107 | 						+ ' -i ' + fileName + ' ', -3]
108 | 						}
109 | 		# Add windows support!
110 | 		else :
111 | 			raise Exception('This OS is not supported.')
112 | 
113 | 		# Construct
114 | 
115 | 		if fileName[convDict['mp3'][1]:] == 'mp3':
116 | 			print(fileName[convDict['mp3'][1]:])
117 | 			modfileName = os.path.join(os.path.abspath(fileName[:convDict['mp3'][1]] + 'wav'))
118 | 			subprocess.call(convDict['mp3'][0]+modfileName, shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
119 | 			samples, sampleRate = AudioIO.wavRead(modfileName, mono)
120 | 			os.remove(modfileName)
121 | 
122 | 		elif fileName[convDict['au'][1]:] == 'au':
123 | 			print(fileName[convDict['au'][1]:])
124 | 			modfileName = os.path.join(os.path.abspath(fileName[:convDict['au'][1]] + 'wav'))
125 | 			subprocess.call(convDict['au'][0]+modfileName, shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
126 | 			samples, sampleRate = AudioIO.wavRead(modfileName, mono)
127 | 			os.remove(modfileName)
128 | 
129 | 		elif fileName[convDict['wma'][1]:] == 'wma':
130 | 			print(fileName[convDict['wma'][1]:])
131 | 			modfileName = os.path.join(os.path.abspath(fileName[:convDict['wma'][1]] + 'wav'))
132 | 			subprocess.call(convDict['wma'][0]+modfileName, shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
133 | 			samples, sampleRate = AudioIO.wavRead(modfileName, mono)
134 | 			os.remove(modfileName)
135 | 
136 | 		elif fileName[convDict['aiff'][1]:] == 'aiff':
137 | 			print(fileName[convDict['aiff'][1]:])
138 | 			modfileName = os.path.join(os.path.abspath(fileName[:convDict['aiff'][1]] + 'wav'))
139 | 			subprocess.call(convDict['aiff'][0]+modfileName, shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
140 | 			samples, sampleRate = AudioIO.wavRead(modfileName, mono)
141 | 			os.remove(modfileName)
142 | 
143 | 		elif fileName[convDict['wav'][1]:] == 'wav':
144 | 			"""
145 | 				General purpose reading of wav files that do not contain the RIFF header.
146 | 			"""
147 | 			print('x-wav')
148 | 			modfileName = os.path.join(os.path.abspath(fileName[:-4] + '_temp.wav'))
149 | 			subprocess.call(convDict['wav'][0] + modfileName, shell=True, stdout=AudioIO.FNULL,
150 | 							stderr=subprocess.STDOUT)
151 | 			samples, sampleRate = AudioIO.wavRead(modfileName, mono)
152 | 			os.remove(modfileName)
153 | 
154 | 		elif fileName[convDict['m4a'][1]:] == 'm4a':
155 | 			print(fileName[convDict['m4a'][1]:])
156 | 			modfileName = os.path.join(os.path.abspath(fileName[:-4] + '_temp.wav'))
157 | 			subprocess.call(convDict['m4a'][0] + modfileName, shell=True, stdout=AudioIO.FNULL,
158 | 							stderr=subprocess.STDOUT)
159 | 			samples, sampleRate = AudioIO.wavRead(modfileName, mono)
160 | 			os.remove(modfileName)
161 | 
162 | 		else :
163 | 			raise Exception('This format is not supported.')
164 | 
165 | 		return samples, sampleRate
166 | 
167 | 	@staticmethod
168 | 	def audioWrite(y, fs, nbits, audioFile, format):
169 | 		""" Write samples to WAV file and then converts to selected
170 | 		format using ffmpeg.
171 |         Args:
172 |             samples: 	(ndarray / 2D ndarray) (floating point) sample vector
173 |                     		mono:   DIM: nSamples
174 |                     		stereo: DIM: nSamples x nChannels
175 | 
176 |             fs: 		(int) Sample rate in Hz
177 |             nBits: 		(int) Number of bits
178 |             audioFile: 	(string) File name to write
179 |             format:		(string) Selected format
180 |             				'mp3' 	: Writes to .mp3
181 |             				'wma' 	: Writes to .wma
182 |             				'wav' 	: Writes to .wav
183 |             				'aiff'	: Writes to .aiff
184 |             				'au'	: Writes to .au
185 |             				'm4a'   : Writes to .m4a
186 | 		"""
187 | 
188 | 		# Linux
189 | 		if (platform == "linux") or (platform == "linux2"):
190 | 			convDict = {
191 | 				'm4a':  [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux') + ' -i ', -3],
192 | 				'mp3':  [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux') + ' -i ', -3],
193 | 				'au':   [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux') + ' -i ', -2],
194 | 				'wma':  [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux') + ' -i ', -3],
195 | 				'aiff': [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux') + ' -i ', -4]
196 | 						}
197 | 
198 | 		# MacOSX
199 | 		elif (platform == "darwin"):
200 | 			convDict = {
201 | 				'm4a':  [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx') + ' -i ', -3],
202 | 				'mp3':  [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx') + ' -i ', -3],
203 | 				'au':   [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx') + ' -i ', -2],
204 | 				'wma':  [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx') + ' -i ', -3],
205 | 				'aiff': [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx') + ' -i ', -4]
206 | 						}
207 | 
208 | 		else :
209 | 			raise Exception('This OS is not supported.')
210 | 
211 | 		if (format == 'mp3'):
212 | 			prmfileName = os.path.join(os.path.abspath(audioFile[:convDict['mp3'][1]] + 'wav'))
213 | 			AudioIO.wavWrite(y, fs, nbits, prmfileName)
214 | 			subprocess.call(convDict['mp3'][0] + prmfileName + ' ' + audioFile,
215 | 							shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
216 | 			os.remove(prmfileName)
217 | 
218 | 		elif (format == 'wav'):
219 | 			AudioIO.wavWrite(y, fs, nbits, audioFile)
220 | 
221 | 		elif (format == 'wma'):
222 | 			prmfileName = os.path.join(os.path.abspath(audioFile[:convDict['wma'][1]] + 'wav'))
223 | 			AudioIO.wavWrite(y, fs, nbits, prmfileName)
224 | 			subprocess.call(convDict['wma'][0] + prmfileName + ' ' + audioFile,
225 | 							shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
226 | 			os.remove(prmfileName)
227 | 
228 | 		elif (format == 'aiff'):
229 | 			prmfileName = os.path.join(os.path.abspath(audioFile[:convDict['aiff'][1]] + 'wav'))
230 | 			AudioIO.wavWrite(y, fs, nbits, prmfileName)
231 | 			subprocess.call(convDict['aiff'][0] + prmfileName + ' ' + audioFile,
232 | 							shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
233 | 			os.remove(prmfileName)
234 | 
235 | 		elif (format == 'au'):
236 | 			prmfileName = os.path.join(os.path.abspath(audioFile[:convDict['au'][1]] + 'wav'))
237 | 			AudioIO.wavWrite(y, fs, nbits, prmfileName)
238 | 			subprocess.call(convDict['au'][0] + prmfileName + ' ' + audioFile,
239 | 							shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
240 | 			os.remove(prmfileName)
241 | 
242 | 		elif (format == 'm4a'):
243 | 			prmfileName = os.path.join(os.path.abspath(audioFile[:convDict['m4a'][1]] + 'wav'))
244 | 			AudioIO.wavWrite(y, fs, nbits, prmfileName)
245 | 			subprocess.call(convDict['m4a'][0] + prmfileName + ' -b:a 320k ' + audioFile,
246 | 							shell = True, stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
247 | 			os.remove(prmfileName)
248 | 		else :
249 | 			raise Exception('This format is not supported.')
250 | 
251 | 	@staticmethod
252 | 	def wavRead(fileName, mono=False):
253 | 		""" Function to load WAV file.
254 | 
255 |         Args:
256 |             fileName:       (str)       Absolute filename of WAV file
257 |             mono:           (bool)      Switch if samples should be converted to mono
258 |         Returns:
259 |             samples:        (np array)  Audio samples (between [-1,1]
260 |                                         (if stereo: numSamples x numChannels,
261 |                                         if mono: numSamples)
262 |             sampleRate:     (float):    Sampling frequency [Hz]
263 |         """
264 | 		try:
265 | 			samples, sampleRate = AudioIO._loadWAVWithWave(fileName)
266 | 			sWidth = _wave.open(fileName).getsampwidth()
267 | 			if sWidth == 1:
268 | 				#print('8bit case')
269 | 				samples = samples.astype(float) / AudioIO.normFact['int8'] - 1.0
270 | 			elif sWidth == 2:
271 | 				#print('16bit case')
272 | 				samples = samples.astype(float) / AudioIO.normFact['int16']
273 | 			elif sWidth == 3:
274 | 				#print('24bit case')
275 | 				samples = samples.astype(float) / AudioIO.normFact['int24']
276 | 		except:
277 | 			#print('32bit case')
278 | 			samples, sampleRate = AudioIO._loadWAVWithScipy(fileName)
279 | 
280 | 		# mono conversion
281 | 		if mono:
282 | 			if samples.ndim == 2 and samples.shape[1] > 1:
283 | 				samples = (samples[:, 0] + samples[:, 1])*0.5
284 | 
285 | 		return samples, sampleRate
286 | 
287 | 	@staticmethod
288 | 	def _loadWAVWithWave(fileName):
289 | 		""" Load samples & sample rate from 24 bit WAV file """
290 | 		wav = _wave.open(fileName)
291 | 		rate = wav.getframerate()
292 | 		nchannels = wav.getnchannels()
293 | 		sampwidth = wav.getsampwidth()
294 | 		nframes = wav.getnframes()
295 | 		data = wav.readframes(nframes)
296 | 		wav.close()
297 | 		array = AudioIO._wav2array(nchannels, sampwidth, data)
298 | 
299 | 		return array, rate
300 | 
301 | 	@staticmethod
302 | 	def _loadWAVWithScipy(fileName):
303 | 		""" Load samples & sample rate from WAV file """
304 | 		inputData = read(fileName)
305 | 		samples = inputData[1]
306 | 		sampleRate = inputData[0]
307 | 
308 | 		return samples, sampleRate
309 | 
310 | 	@staticmethod
311 | 	def _wav2array(nchannels, sampwidth, data):
312 | 		"""data must be the string containing the bytes from the wav file."""
313 | 		num_samples, remainder = divmod(len(data), sampwidth * nchannels)
314 | 		if remainder > 0:
315 | 			raise ValueError('The length of data is not a multiple of '
316 |                              'sampwidth * num_channels.')
317 | 		if sampwidth > 4:
318 | 			raise ValueError("sampwidth must not be greater than 4.")
319 | 
320 | 		if sampwidth == 3:
321 | 			a = np.empty((num_samples, nchannels, 4), dtype = np.uint8)
322 | 			raw_bytes = np.fromstring(data, dtype = np.uint8)
323 | 			a[:, :, :sampwidth] = raw_bytes.reshape(-1, nchannels, sampwidth)
324 | 			a[:, :, sampwidth:] = (a[:, :, sampwidth - 1:sampwidth] >> 7) * 255
325 | 			result = a.view('<i4').reshape(a.shape[:-1])
326 | 		else:
327 | 			# 8 bit samples are stored as unsigned ints; others as signed ints.
328 | 			dt_char = 'u' if sampwidth == 1 else 'i'
329 | 			a = np.fromstring(data, dtype='<%s%d' % (dt_char, sampwidth))
330 | 			result = a.reshape(-1, nchannels)
331 | 		return result
332 | 
333 | 	@staticmethod
334 | 	def wavWrite(y, fs, nbits, audioFile):
335 | 		""" Write samples to WAV file
336 |         Args:
337 |             samples: (ndarray / 2D ndarray) (floating point) sample vector
338 |                     	mono: DIM: nSamples
339 |                     	stereo: DIM: nSamples x nChannels
340 | 
341 |             fs: 	(int) Sample rate in Hz
342 |             nBits: 	(int) Number of bits
343 |             fnWAV: 	(string) WAV file name to write
344 | 		"""
345 | 		if nbits == 8:
346 | 			intsamples = (y+1.0) * AudioIO.normFact['int' + str(nbits)]
347 | 			fX = np.int8(intsamples)
348 | 		elif nbits == 16:
349 | 			intsamples = y * AudioIO.normFact['int' + str(nbits)]
350 | 			fX = np.int16(intsamples)
351 | 		elif nbits > 16:
352 | 			fX = y
353 | 
354 | 		write(audioFile, fs, fX)
355 | 
356 | 	@staticmethod
357 | 	def sound(x,fs):
358 | 		""" Plays a wave file using the pyglet library. But first, it has to be written.
359 | 			Termination of the playback is being performed by any keyboard input and Enter.
360 | 			Args:
361 | 			x: 		   (array) Floating point samples
362 | 			fs:		   (int) The sampling rate
363 | 		"""
364 | 		import pyglet as pg
365 | 		global player
366 | 		# Call the writing function
367 | 		AudioIO.wavWrite(x, fs, 16, 'testPlayback.wav')
368 | 		# Initialize playback engine
369 | 		player = pg.media.Player()
370 | 		# Initialize the object with the audio file
371 | 		playback = pg.media.load('testPlayback.wav')
372 | 		# Set it to player
373 | 		player.queue(playback)
374 | 		# Sound call
375 | 		player.play()
376 | 		# Killed by "keyboard"
377 | 		kill = raw_input()
378 | 		if kill or kill == '':
379 | 			AudioIO.stop()
380 | 		# Remove the dummy wave write
381 | 		os.remove('testPlayback.wav')
382 | 
383 | 	@staticmethod
384 | 	def stop():
385 | 		""" Stops a playback object of the pyglet library.
386 | 			It does not accept arguments, but a player has to be
387 | 			already initialized by the above "sound" method.
388 | 		"""
389 | 		global player
390 | 		# Just Pause & Destruct
391 | 		player.pause()
392 | 		player = None
393 | 		return None
394 | 
395 | if __name__ == "__main__":
396 | 	# Define File
397 | 	myReadFile = 'EnterYourWavFile.wav'
398 | 
399 | 	# Read the file
400 | 	x, fs = AudioIO.wavRead(myReadFile, mono = True)
401 | 
402 | 	# Gain parameter
403 | 	g = 0.2
404 | 
405 | 	# Listen to it
406 | 	AudioIO.sound(x*g,fs)
407 | 
408 | 	# Make it better and write it to disk
409 | 	x2 = np.empty((len(x),2), dtype = np.float32)
410 | 	try :
411 | 		x2[:,0] = x * g
412 | 		x2[:,1] = np.roll(x*g, 512)
413 | 	except ValueError:
414 | 		x2[:,0] = x[:,0] * g
415 | 		x2[:,1] = np.roll(x[:,0] * g, 256)
416 | 
417 | 	# Listen to stereo processed
418 | 	AudioIO.sound(x2*g,fs)
419 | 	AudioIO.audioWrite(x2, fs, 16, 'myNewWavFile.wav', 'wav')


--------------------------------------------------------------------------------
/MaskingMethods.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | __author__ = 'S.I. Mimilakis'
  3 | __copyright__ = 'MacSeNet'
  4 | 
  5 | import numpy as np
  6 | from scipy.fftpack import fft, ifft
  7 | from TFMethods import TimeFrequencyDecomposition as TF
  8 | 
  9 | class FrequencyMasking:
 10 | 	"""Class containing various time-frequency masking methods, for processing Time-Frequency representations.
 11 | 	"""
 12 | 
 13 | 	def __init__(self, mX, sTarget, nResidual, psTarget = [], pnResidual = [], alpha = 1.2, method = 'Wiener'):
 14 | 		self._mX = mX
 15 | 		self._eps = np.finfo(np.float).eps
 16 | 		self._sTarget = sTarget
 17 | 		self._nResidual = nResidual
 18 | 		self._pTarget = psTarget
 19 | 		self._pY = pnResidual
 20 | 		self._mask = []
 21 | 		self._Out = []
 22 | 		self._alpha = alpha
 23 | 		self._method = method
 24 | 		self._iterations = 200
 25 | 		self._lr = 3e-3#2e-3
 26 | 		self._hetaplus = 1.2
 27 | 		self._hetaminus = 0.5
 28 | 
 29 | 	def __call__(self, reverse = False):
 30 | 
 31 | 		if (self._method == 'Phase'):
 32 | 			if not self._pTarget.size or not self._pTarget.size:
 33 | 				raise ValueError('Phase-sensitive masking cannot be performed without phase information.')
 34 | 			else:
 35 | 				FrequencyMasking.phaseSensitive(self)
 36 | 				if not(reverse) :
 37 | 					FrequencyMasking.applyMask(self)
 38 | 				else :
 39 | 					FrequencyMasking.applyReverseMask(self)
 40 | 
 41 | 		elif (self._method == 'IRM'):
 42 | 			FrequencyMasking.IRM(self)
 43 | 			if not(reverse) :
 44 | 				FrequencyMasking.applyMask(self)
 45 | 			else :
 46 | 				FrequencyMasking.applyReverseMask(self)
 47 | 
 48 | 		elif (self._method == 'IAM'):
 49 | 			FrequencyMasking.IAM(self)
 50 | 			if not(reverse) :
 51 | 				FrequencyMasking.applyMask(self)
 52 | 			else :
 53 | 				FrequencyMasking.applyReverseMask(self)
 54 | 
 55 | 		elif (self._method == 'IBM'):
 56 | 			FrequencyMasking.IBM(self)
 57 | 			if not(reverse) :
 58 | 				FrequencyMasking.applyMask(self)
 59 | 			else :
 60 | 				FrequencyMasking.applyReverseMask(self)
 61 | 
 62 | 		elif (self._method == 'UBBM'):
 63 | 			FrequencyMasking.UBBM(self)
 64 | 			if not(reverse) :
 65 | 				FrequencyMasking.applyMask(self)
 66 | 			else :
 67 | 				FrequencyMasking.applyReverseMask(self)
 68 | 
 69 | 
 70 | 		elif (self._method == 'Wiener'):
 71 | 			FrequencyMasking.Wiener(self)
 72 | 			if not(reverse) :
 73 | 				FrequencyMasking.applyMask(self)
 74 | 			else :
 75 | 				FrequencyMasking.applyReverseMask(self)
 76 | 
 77 | 		elif (self._method == 'alphaWiener'):
 78 | 			FrequencyMasking.alphaHarmonizableProcess(self)
 79 | 			if not(reverse) :
 80 | 				FrequencyMasking.applyMask(self)
 81 | 			else :
 82 | 				FrequencyMasking.applyReverseMask(self)
 83 | 
 84 | 		elif (self._method == 'expMask'):
 85 | 			FrequencyMasking.ExpM(self)
 86 | 			if not(reverse) :
 87 | 				FrequencyMasking.applyMask(self)
 88 | 			else :
 89 | 				FrequencyMasking.applyReverseMask(self)
 90 | 
 91 | 		elif (self._method == 'MWF'):
 92 | 			print('Multichannel Wiener Filtering')
 93 | 			FrequencyMasking.MWF(self)
 94 | 
 95 | 		return self._Out
 96 | 
 97 | 	def IRM(self):
 98 | 		"""
 99 | 			Computation of Ideal Amplitude Ratio Mask. As appears in :
100 | 			H Erdogan, John R. Hershey, Shinji Watanabe, and Jonathan Le Roux,
101 | 	   		"Phase-sensitive and recognition-boosted speech separation using deep recurrent neural networks,"
102 | 	   		in ICASSP 2015, Brisbane, April, 2015.
103 | 		Args:
104 | 			sTarget:   (2D ndarray) Magnitude Spectrogram of the target component
105 | 			nResidual: (2D ndarray) Magnitude Spectrogram of the residual component
106 | 		Returns:
107 | 			mask:      (2D ndarray) Array that contains time frequency gain values
108 | 
109 | 		"""
110 | 		print('Ideal Amplitude Ratio Mask')
111 | 		self._mask = np.divide(self._sTarget, (self._eps + self._sTarget + self._nResidual))
112 | 
113 | 	def IAM(self):
114 | 		"""
115 | 			Computation of Ideal Amplitude Mask. As appears in :
116 | 			H. Erdogan, J. R. Hershey, S. Watanabe, and J. Le Roux,
117 | 	   		"Phase-sensitive and recognition-boosted speech separation using deep recurrent neural networks,"
118 | 	   		in ICASSP 2015, Brisbane, April, 2015.
119 | 		Args:
120 | 			sTarget:   (2D ndarray) Magnitude Spectrogram of the target component
121 | 			nResidual: (2D ndarray) Magnitude Spectrogram of the residual component
122 | 									(In this case the observed mixture should be placed)
123 | 		Returns:
124 | 			mask:      (2D ndarray) Array that contains time frequency gain values
125 | 
126 | 		"""
127 | 		print('Ideal Amplitude Mask')
128 | 		self._mask = np.divide(self._sTarget, (self._eps + self._nResidual))
129 | 
130 | 	def ExpM(self):
131 | 		"""
132 | 			Approximate a signal via element-wise exponentiation. As appears in :
133 | 			S.I. Mimilakis, K. Drossos, T. Virtanen, and G. Schuller,
134 | 			"Deep Neural Networks for Dynamic Range Compression in Mastering Applications,"
135 | 			in proc. of the 140th Audio Engineering Society Convention, Paris, 2016.
136 | 		Args:
137 | 			sTarget:   (2D ndarray) Magnitude Spectrogram of the target component
138 | 			nResidual: (2D ndarray) Magnitude Spectrogram of the residual component
139 | 		Returns:
140 | 			mask:      (2D ndarray) Array that contains time frequency gain values
141 | 
142 | 		"""
143 | 		print('Exponential mask')
144 | 		self._mask = np.divide(np.log(self._sTarget.clip(self._eps, np.inf)**self._alpha),\
145 | 							   np.log(self._nResidual.clip(self._eps, np.inf)**self._alpha))
146 | 
147 | 	def IBM(self):
148 | 		"""
149 | 			Computation of Ideal Binary Mask.
150 | 		Args:
151 | 			sTarget:   (2D ndarray) Magnitude Spectrogram of the target component
152 | 			nResidual: (2D ndarray) Magnitude Spectrogram of the residual component
153 | 		Returns:
154 | 			mask:      (2D ndarray) Array that contains time frequency gain values
155 | 
156 | 		"""
157 | 		print('Ideal Binary Mask')
158 | 		theta = 0.5
159 | 		mask = np.divide(self._sTarget ** self._alpha, (self._eps + self._nResidual ** self._alpha))
160 | 		bg = np.where(mask >= theta)
161 | 		sm = np.where(mask < theta)
162 | 		mask[bg[0],bg[1]] = 1.
163 | 		mask[sm[0], sm[1]] = 0.
164 | 		self._mask = mask
165 | 
166 | 	def UBBM(self):
167 | 		"""
168 | 			Computation of Upper Bound Binary Mask. As appears in :
169 | 			- J.J. Burred, "From Sparse Models to Timbre Learning: New Methods for Musical Source Separation", PhD Thesis,
170 | 			TU Berlin, 2009.
171 | 
172 | 		Args:
173 | 			sTarget:   (2D ndarray) Magnitude Spectrogram of the target component
174 | 			nResidual: (2D ndarray) Magnitude Spectrogram of the residual component (Should not contain target source!)
175 | 		Returns:
176 | 			mask:      (2D ndarray) Array that contains time frequency gain values
177 | 		"""
178 | 		print('Upper Bound Binary Mask')
179 | 		mask = 20. * np.log(self._eps + np.divide((self._eps + (self._sTarget ** self._alpha)),
180 | 									  ((self._eps + (self._nResidual ** self._alpha)))))
181 | 		bg = np.where(mask >= 0)
182 | 		sm = np.where(mask < 0)
183 | 		mask[bg[0],bg[1]] = 1.
184 | 		mask[sm[0], sm[1]] = 0.
185 | 		self._mask = mask
186 | 
187 | 	def Wiener(self):
188 | 		"""
189 | 			Computation of Wiener-like Mask. As appears in :
190 | 			H Erdogan, John R. Hershey, Shinji Watanabe, and Jonathan Le Roux,
191 | 	   		"Phase-sensitive and recognition-boosted speech separation using deep recurrent neural networks,"
192 | 	   		in ICASSP 2015, Brisbane, April, 2015.
193 | 		Args:
194 | 				sTarget:   (2D ndarray) Magnitude Spectrogram of the target component
195 | 				nResidual: (2D ndarray) Magnitude Spectrogram of the residual component
196 | 		Returns:
197 | 				mask:      (2D ndarray) Array that contains time frequency gain values
198 | 		"""
199 | 		print('Wiener-like Mask')
200 | 		localsTarget = self._sTarget ** 2.
201 | 		numElements = len(self._nResidual)
202 | 		if numElements > 1:
203 | 			localnResidual = self._nResidual[0] ** 2. + localsTarget
204 | 			for indx in range(1, numElements):
205 | 				localnResidual += self._nResidual[indx] ** 2.
206 | 		else :
207 | 			localnResidual = self._nResidual[0] ** 2. + localsTarget
208 | 
209 | 		self._mask = np.divide((localsTarget + self._eps), (self._eps + localnResidual))
210 | 
211 | 	def alphaHarmonizableProcess(self):
212 | 		"""
213 | 			Computation of Wiener like mask using fractional power spectrograms. As appears in :
214 | 			A. Liutkus, R. Badeau, "Generalized Wiener filtering with fractional power spectrograms",
215 |     		40th International Conference on Acoustics, Speech and Signal Processing (ICASSP),
216 |     		Apr 2015, Brisbane, Australia.
217 | 		Args:
218 | 			sTarget:   (2D ndarray) Magnitude Spectrogram of the target component
219 | 		    nResidual: (2D ndarray) Magnitude Spectrogram of the residual component or a list 
220 | 									of 2D ndarrays which will be added together
221 | 		Returns:
222 | 			mask:      (2D ndarray) Array that contains time frequency gain values
223 | 
224 | 		"""
225 | 		print('Harmonizable Process with alpha:', str(self._alpha))
226 | 		localsTarget = self._sTarget ** self._alpha
227 | 		numElements = len(self._nResidual)
228 | 		if numElements > 1:
229 | 			localnResidual = self._nResidual[0] ** self._alpha + localsTarget
230 | 			for indx in range(1, numElements):
231 | 				localnResidual += self._nResidual[indx] ** self._alpha
232 | 		else :
233 | 			localnResidual = self._nResidual[0] ** self._alpha + localsTarget
234 | 			 
235 | 		self._mask = np.divide((localsTarget + self._eps), (self._eps + localnResidual))
236 | 
237 | 	def phaseSensitive(self):
238 | 		"""
239 | 			Computation of Phase Sensitive Mask. As appears in :
240 | 			H Erdogan, John R. Hershey, Shinji Watanabe, and Jonathan Le Roux,
241 | 	   		"Phase-sensitive and recognition-boosted speech separation using deep recurrent neural networks,"
242 | 	   		in ICASSP 2015, Brisbane, April, 2015.
243 | 
244 | 		Args:
245 | 			mTarget:   (2D ndarray) Magnitude Spectrogram of the target component
246 | 			pTarget:   (2D ndarray) Phase Spectrogram of the output component
247 | 			mY:        (2D ndarray) Magnitude Spectrogram of the residual component
248 | 			pY:        (2D ndarray) Phase Spectrogram of the residual component
249 | 		Returns:
250 | 			mask:      (2D ndarray) Array that contains time frequency gain values
251 | 
252 | 		"""
253 | 		print('Phase Sensitive Masking.')
254 | 		# Compute Phase Difference
255 | 		Theta = (self._pTarget - self._pY)
256 | 		self._mask = 2./ (1. + np.exp(-np.multiply(np.divide(self._sTarget, self._eps + self._nResidual), np.cos(Theta)))) - 1.
257 | 
258 | 	def optAlpha(self, initloss):
259 | 		"""
260 | 			A simple gradiend descent method using the RProp algorithm,
261 | 			for finding optimum power-spectral density exponents (alpha) for generalized wiener filtering.
262 | 		Args:
263 | 			sTarget  : (2D ndarray) Magnitude Spectrogram of the target component
264 | 			nResidual: (2D ndarray) Magnitude Spectrogram of the residual component or a list
265 | 									of 2D ndarrays which will be added together
266 | 			initloss : (float)		Initial loss, for comparisson
267 | 		Returns:
268 | 			mask:      (2D ndarray) Array that contains time frequency gain values
269 | 
270 | 		"""
271 | 		# Initialization of the parameters
272 | 		# Put every source spectrogram into an array, given an input list.
273 | 		slist = list(self._nResidual)
274 | 		slist.insert(0, self._sTarget)
275 | 		numElements = len(slist)
276 | 		slist = np.asarray(slist)
277 | 
278 | 		alpha = np.array([1.15] * (numElements))	  # Initialize an array of alpha values to be found.
279 | 		dloss = np.array([0.] * (numElements))  	  # Initialize an array of loss functions to be used.
280 | 		lrs = np.array([self._lr] * (numElements))    # Initialize an array of learning rates to be applied to each source.
281 | 
282 | 		# Begin of otpimization
283 | 		isloss = []
284 | 		for iter in xrange(self._iterations):
285 | 			# The actual function of additive power spectrograms
286 | 			Xhat = np.sum(np.power(slist, np.reshape(alpha, (numElements, 1, 1))), axis=0)
287 | 			for source in xrange(numElements):
288 | 				# Derivative with respect to the function of additive power spectrograms
289 | 				dX = (slist[source, :, :]**alpha[source]) * np.log(slist[source, :, :] + self._eps)
290 | 
291 | 				# Chain rule between the above derivative and the IS derivative
292 | 				dloss[source] = self._dIS(Xhat) * np.mean(dX)
293 | 
294 | 			alpha -= (lrs*dloss)
295 | 
296 | 			# Make sure the initial alpha are inside reasonable values
297 | 			alpha = np.clip(alpha, a_min = 0.5, a_max = 2.)
298 | 
299 | 			# Check IS Loss by computing Xhat
300 | 			Xhat = 0
301 | 			for source in xrange(numElements):
302 | 				Xhat += slist[source, :, :] ** alpha[source]
303 | 
304 | 			isloss.append(self._IS(Xhat))
305 | 			if (iter > 2):
306 | 				# Apply RProp
307 | 				if (isloss[-2] - isloss[-1] > 0):
308 | 					lrs *= self._hetaplus
309 | 
310 | 				if (isloss[-2] - isloss[-1] < 0):
311 | 					lrs *= self._hetaminus
312 | 
313 | 				if (iter > 4):
314 | 					if (np.abs(isloss[-2] - isloss[-1]) < 1e-4 and np.abs(isloss[-3] - isloss[-2]) < 1e-4):
315 | 						print('Local Minimum Found')
316 | 						print('Final Loss: ' + str(isloss[-1]) + ' with characteristic exponent(s): ' + str(alpha))
317 | 						break
318 | 
319 | 			print('Loss: ' + str(isloss[-1]) + ' with characteristic exponent(s): ' + str(alpha))
320 | 
321 | 		# Evaluate Xhat for the mask update
322 | 		self._mask = np.divide((slist[0, :, :] ** alpha[0] + self._eps), (self._mX ** self._alpha + self._eps))
323 | 		self._closs = isloss[-1]
324 | 		self._alpha = alpha
325 | 
326 | 	def MWF(self):
327 | 		""" Multi-channel Wiener filtering as appears in:
328 | 		I. Cohen, J. Benesty, and S. Gannot, Speech Processing in Modern
329 | 		Communication, Springer, Berlin, Heidelberg, 2010, Chapter 9.
330 | 		Args:
331 | 			mTarget:   (3D ndarray) Magnitude Spectrogram of the target component
332 | 			mY:        (3D ndarray) Magnitude Spectrogram of the output component
333 | 								    (M channels x F frequency samples x T time-frames).
334 | 		Returns:
335 | 			_Out:      (3D ndarray) Array that contains the estimated source.
336 | 		"""
337 | 		# Parameter for the update
338 | 		flambda = 0.99								           # Forgetting Factor
339 | 
340 | 		cX = self._sTarget ** self._alpha
341 | 		cN = self._nResidual ** self._alpha
342 | 
343 | 		M = self._mX.shape[0]  									# Number of channels
344 | 		gF = 1./M        										# Gain factor
345 | 		eM = cX.shape[0]										# Number of estimated channels
346 | 		F = cX.shape[1]											# Number of frequency samples
347 | 		T = cX.shape[2]											# Number of time-frames
348 | 		fout = np.zeros((M,F,T), dtype = np.float32)			# Initializing output
349 | 		I = np.eye(M)											# Identity matrix
350 | 
351 | 		# Initialization of covariance matrices
352 | 		Rxx = np.repeat(np.reshape(I, (M,M,1)), F, axis = -1)
353 | 		Rnn = np.repeat(np.reshape(I, (M,M,1)), F, axis = -1)
354 | 
355 | 		# Recursive updates
356 | 		for t in xrange(T):
357 | 			for f in xrange(F):
358 | 				if eM == 1:
359 | 					Rxx[:, :, f] = flambda * Rxx[:, :, f] + (1. - flambda) * (cX[:, f, t])
360 | 					Rnn[:, :, f] = flambda * Rnn[:, :, f] + (1. - flambda) * (cN[:, f, t])
361 | 				else:
362 | 					Rxx[:, :, f] = (np.dot(cX[:, f:f+1, t], cX[:, f:f+1, t].T))/np.sum(cX[:,f,t], axis = 0)
363 | 					Rnn[:, :, f] = (np.dot(cN[:, f:f+1, t], cN[:, f:f+1, t].T))/np.sum(cN[:,f,t], axis = 0)
364 | 
365 | 				inv = np.dot(np.linalg.pinv(Rnn[:, :, f]), (Rnn[:, :, f] + Rxx[:, :, f]))
366 | 				if eM == 1:
367 | 					Wf = ((inv - I)/( (cN[:,f,t] + cX[:, f, t] + 1e-16)/(cX[:,f,t] + 1e-16) + np.trace(inv) * gF))
368 | 				else :
369 | 					Wf = ((inv - I)/(gF * np.trace(inv)))
370 | 
371 | 				fout[:, f, t] = np.dot(Wf.T, self._mX[:, f, t])
372 | 
373 | 		self._Out = np.abs(fout)
374 | 
375 | 	def applyMask(self):
376 | 		""" Compute the filtered output spectrogram.
377 | 		Args:
378 | 			mask:   (2D ndarray) Array that contains time frequency gain values
379 | 			mX:     (2D ndarray) Input Magnitude Spectrogram
380 | 		Returns:
381 | 			Y:      (2D ndarray) Filtered version of the Magnitude Spectrogram
382 | 		"""
383 | 		if self._method == 'expMask':
384 | 			self._Out = (self._mX ** self._alpha) ** self._mask
385 | 		else :
386 | 			self._Out = np.multiply(self._mask, self._mX)
387 | 
388 | 	def applyReverseMask(self):
389 | 		""" Compute the filtered output spectrogram, reversing the gain values.
390 | 		Args:
391 | 			mask:   (2D ndarray) Array that contains time frequency gain values
392 | 			mX:     (2D ndarray) Input Magnitude Spectrogram
393 | 		Returns:
394 | 			Y:      (2D ndarray) Filtered version of the Magnitude Spectrogram
395 | 		"""
396 | 		if self._method == 'expMask':
397 | 			raise ValueError('Cannot compute that using such masking method.')
398 | 		else :
399 | 			self._Out = np.multiply( (1. - self._mask), self._mX)
400 | 
401 | 	def _IS(self, Xhat):
402 | 		""" Compute the Itakura-Saito distance between the observed magnitude spectrum
403 | 			and the estimated one.
404 |         Args:
405 |             mX    :   	(2D ndarray) Input Magnitude Spectrogram
406 |             Xhat  :     (2D ndarray) Estimated Magnitude Spectrogram
407 |         Returns:
408 |             dis   :     (float) Average Itakura-Saito distance
409 |         """
410 | 		r1 = (np.abs(self._mX)**self._alpha + self._eps) / (np.abs(Xhat) + self._eps)
411 | 		lg = np.log((np.abs(self._mX)**self._alpha + self._eps)) - np.log((np.abs(Xhat) + self._eps))
412 | 		return np.mean(r1 - lg - 1.)
413 | 
414 | 	def _dIS(self, Xhat):
415 | 		""" Computation of the first derivative of Itakura-Saito function. As appears in :
416 |             Cedric Fevotte and Jerome Idier, "Algorithms for nonnegative matrix factorization
417 |             with the beta-divergence", in CoRR, vol. abs/1010.1763, 2010.
418 |         Args:
419 |             mX    :   	(2D ndarray) Input Magnitude Spectrogram
420 |             Xhat  :     (2D ndarray) Estimated Magnitude Spectrogram
421 |         Returns:
422 |             dis'  :     (float) Average of first derivative of Itakura-Saito distance.
423 |         """
424 | 		dis = (np.abs(Xhat + self._eps) ** (-2.)) * (np.abs(Xhat) - np.abs(self._mX)**self._alpha)
425 | 		return (np.mean(dis))
426 | 
427 | if __name__ == "__main__":
428 | 
429 | 	# Small test
430 | 	kSin = (0.5 * np.cos(np.arange(4096) * (1000.0 * (3.1415926 * 2.0) / 44100)))
431 | 	noise = (np.random.uniform(-0.25,0.25,4096))
432 | 	# Noisy observation
433 | 	obs = (kSin + noise)
434 | 
435 | 	kSinX = fft(kSin, 4096)
436 | 	noisX = fft(noise, 4096)
437 | 	obsX  = fft(obs, 4096)
438 | 
439 | 	# Wiener Case
440 | 	mask = FrequencyMasking(np.abs(obsX), np.abs(kSinX), [np.abs(noisX)], [], [], alpha = 2., method = 'alphaWiener')
441 | 	sinhat = mask()
442 | 	noisehat = mask(reverse = True)
443 | 	# Access the mask if needed
444 | 	ndmask = mask._mask


--------------------------------------------------------------------------------
/QMF/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __author__ = 'S.I. Mimilakis'
3 | __copyright__ = 'MacSeNet'


--------------------------------------------------------------------------------
/QMF/old_vers/IOMethods.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | __author__ = 'S.I. Mimilakis'
  3 | __copyright__ = 'MacSeNet'
  4 | 
  5 | import os, subprocess, csv
  6 | import numpy as np
  7 | import wave as _wave
  8 | from scipy.io.wavfile import write, read
  9 | from sys import platform
 10 | 
 11 | class AudioIO:
 12 | 	""" Class for handling audio input/output operations.
 13 | 	    It supports reading and writing of various audio formats
 14 | 	    via 'audioRead' & 'audioWrite' methods. Moreover playback
 15 | 	    can be performed by using 'sound' method. For formats
 16 | 	    different than '.wav' a coder is needed. In this case
 17 | 	    libffmpeg is being used, where the absolute path of
 18 | 	    the static build should be given to the class variable.
 19 | 	    Finally, energy normalisation and anti-clipping methods
 20 | 	    are also covered in the last two methods.
 21 | 
 22 | 		Basic Usage examples:
 23 | 		Import the class :
 24 | 		import IOMethods as IO
 25 | 		-For loading wav files:
 26 | 			x, fs = IO.AudioIO.wavRead('myWavFile.wav', mono = True)
 27 | 		-In case that compressed files are about to be read specify
 28 | 			the path to the libffmpeg library by changing the 'pathToffmpeg'
 29 | 			variable and then type:
 30 | 			x, fs = IO.AudioIO.audioRead()
 31 | 		-For writing wav files:
 32 | 			IO.AudioIO.audioWrite(x, fs, 16, 'myNewWavFile.wav', 'wav')
 33 | 
 34 | 		-For listening wav files:
 35 | 			IO.AudioIO.sound(x,fs)
 36 | 
 37 | 	"""
 38 |     # Normalisation parameters for wavreading and writing
 39 | 	normFact = {'int8' : (2**7) -1,
 40 | 				'int16': (2**15)-1,
 41 | 				'int24': (2**23)-1,
 42 | 				'int32': (2**31)-1,
 43 | 				'int64': (2**63)-1,
 44 | 				'float32': 1.0,
 45 | 				'float64': 1.0}
 46 | 
 47 | 	# 'Silence' the bash output
 48 | 	FNULL = open(os.devnull, 'w')
 49 | 
 50 | 	# Absolute path needed here
 51 | 	pathToffmpeg = '/home/mis/Documents/Python/Projects/SourceSeparation/MiscFiles'
 52 | 
 53 | 	def __init__(self):
 54 | 		pass
 55 | 
 56 | 	@staticmethod
 57 | 	def audioRead(fileName, mono=False, startSec=None, endSec=None):
 58 | 		""" Function to load audio files such as *.mp3, *.au, *.wma & *.aiff.
 59 | 			It first converts them to .wav and reads them with the methods below.
 60 | 			Currently, it uses a static build of ffmpeg.
 61 | 
 62 |         Args:
 63 |             fileName:       (str)       Absolute filename of WAV file
 64 |             mono:           (bool)      Switch if samples should be converted to mono
 65 |             startSec:       (float)     Segment start time in seconds (if None, segment starts at the beginning of the WAV file)
 66 |             endSec:         (float)     Segment end time in seconds (if None, segment ends at the end of the WAV file)
 67 |         Returns:
 68 |             samples:        (np array)  Audio samples (between [-1,1]
 69 |                                         (if stereo: numSamples x numChannels,
 70 |                                         if mono: numSamples)
 71 |             sampleRate:     (float):    Sampling frequency [Hz]
 72 |         """
 73 | 
 74 | 		# Get the absolute path
 75 | 		fileName = os.path.abspath(fileName)
 76 | 
 77 | 		# Linux
 78 | 		if (platform == "linux") or (platform == "linux2"):
 79 | 			convDict = {
 80 | 				'mp3':[os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux')
 81 | 					+ ' -i ' + fileName + ' ', -3],
 82 | 				'au': [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux')
 83 | 					 + ' -i ' + fileName + ' ', -2],
 84 | 				'wma':[os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux')
 85 | 					 + ' -i ' + fileName + ' ', -3],
 86 | 				'aiff': [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux')
 87 | 					 + ' -i ' + fileName + ' ', -4]
 88 | 						}
 89 | 
 90 | 		# MacOSX
 91 | 		elif (platform == "darwin"):
 92 | 			convDict = {
 93 | 				'mp3':[os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx')
 94 | 					+ ' -i ' + fileName + ' ', -3],
 95 | 				'au': [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx')
 96 | 					 + ' -i ' + fileName + ' ', -2],
 97 | 				'wma':[os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx')
 98 | 					 + ' -i ' + fileName + ' ', -3],
 99 | 				'aiff': [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx')
100 | 					 + ' -i ' + fileName + ' ', -4]
101 | 						}
102 | 		# Add windows support!
103 | 		else :
104 | 			raise Exception('This OS is not supported.')
105 | 
106 | 		# Construct
107 | 
108 | 		if fileName[convDict['mp3'][1]:] == 'mp3':
109 | 			print(fileName[convDict['mp3'][1]:])
110 | 			modfileName = os.path.join(os.path.abspath(fileName[:convDict['mp3'][1]] + 'wav'))
111 | 			subprocess.call(convDict['mp3'][0]+modfileName, shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
112 | 			samples, sampleRate = AudioIO.wavRead(modfileName, mono, startSec, endSec)
113 | 			os.remove(modfileName)
114 | 
115 | 		elif fileName[convDict['au'][1]:] == 'au':
116 | 			print(fileName[convDict['au'][1]:])
117 | 			modfileName = os.path.join(os.path.abspath(fileName[:convDict['au'][1]] + 'wav'))
118 | 			subprocess.call(convDict['au'][0]+modfileName, shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
119 | 			samples, sampleRate = AudioIO.wavRead(modfileName, mono, startSec, endSec)
120 | 			os.remove(modfileName)
121 | 
122 | 		elif fileName[convDict['wma'][1]:] == 'wma':
123 | 			print(fileName[convDict['wma'][1]:])
124 | 			modfileName = os.path.join(os.path.abspath(fileName[:convDict['wma'][1]] + 'wav'))
125 | 			subprocess.call(convDict['wma'][0]+modfileName, shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
126 | 			samples, sampleRate = AudioIO.wavRead(modfileName, mono, startSec, endSec)
127 | 			os.remove(modfileName)
128 | 
129 | 		elif fileName[convDict['aiff'][1]:] == 'aiff':
130 | 			print(fileName[convDict['aiff'][1]:])
131 | 			modfileName = os.path.join(os.path.abspath(fileName[:convDict['aiff'][1]] + 'wav'))
132 | 			subprocess.call(convDict['aiff'][0]+modfileName, shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
133 | 			samples, sampleRate = AudioIO.wavRead(modfileName, mono, startSec, endSec)
134 | 			os.remove(modfileName)
135 | 
136 | 		else :
137 | 			raise Exception('This format is not supported.')
138 | 
139 | 		return samples, sampleRate
140 | 
141 | 	@staticmethod
142 | 	def audioWrite(y, fs, nbits, audioFile, format):
143 | 		""" Write samples to WAV file and then converts to selected
144 | 		format using ffmpeg.
145 |         Args:
146 |             samples: 	(ndarray / 2D ndarray) (floating point) sample vector
147 |                     		mono: DIM: nSamples
148 |                     		stereo: DIM: nSamples x nChannels
149 | 
150 |             fs: 		(int) Sample rate in Hz
151 |             nBits: 		(int) Number of bits
152 |             audioFile: 	(string) WAV file name to write
153 |             format:		(string) Selected format
154 |             				'mp3' 	: Writes to .mp3
155 |             				'wma' 	: Writes to .wma
156 |             				'wav' 	: Writes to .wav
157 |             				'aiff'	: Writes to .aiff
158 |             				'au'	: Writes to .au
159 | 		"""
160 | 
161 | 		# Linux
162 | 		if (platform == "linux") or (platform == "linux2"):
163 | 			convDict = {
164 | 				'mp3':  [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux') + ' -i ', -3],
165 | 				'au':   [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux') + ' -i ', -2],
166 | 				'wma':  [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux') + ' -i ', -3],
167 | 				'aiff': [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_linux') + ' -i ', -4]
168 | 						}
169 | 
170 | 		# MacOSX
171 | 		elif (platform == "darwin"):
172 | 			convDict = {
173 | 				'mp3':  [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx') + ' -i ', -3],
174 | 				'au':   [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx') + ' -i ', -2],
175 | 				'wma':  [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx') + ' -i ', -3],
176 | 				'aiff': [os.path.join(AudioIO.pathToffmpeg, 'ffmpeg_osx') + ' -i ', -4]
177 | 						}
178 | 
179 | 		# Add windows support!
180 | 		else :
181 | 			raise Exception('This OS is not supported.')
182 | 
183 | 		if (format == 'mp3'):
184 | 			prmfileName = os.path.join(os.path.abspath(audioFile[:convDict['mp3'][1]] + 'wav'))
185 | 			AudioIO.wavWrite(y, fs, nbits, prmfileName)
186 | 			subprocess.call(convDict['mp3'][0] + prmfileName + ' ' + audioFile,
187 | 							shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
188 | 			os.remove(prmfileName)
189 | 
190 | 		elif (format == 'wav'):
191 | 			AudioIO.wavWrite(y, fs, nbits, audioFile)
192 | 
193 | 		elif (format == 'wma'):
194 | 			prmfileName = os.path.join(os.path.abspath(audioFile[:convDict['wma'][1]] + 'wav'))
195 | 			AudioIO.wavWrite(y, fs, nbits, prmfileName)
196 | 			subprocess.call(convDict['wma'][0] + prmfileName + ' ' + audioFile,
197 | 							shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
198 | 			os.remove(prmfileName)
199 | 
200 | 		elif (format == 'aiff'):
201 | 			prmfileName = os.path.join(os.path.abspath(audioFile[:convDict['aiff'][1]] + 'wav'))
202 | 			AudioIO.wavWrite(y, fs, nbits, prmfileName)
203 | 			subprocess.call(convDict['aiff'][0] + prmfileName + ' ' + audioFile,
204 | 							shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
205 | 			os.remove(prmfileName)
206 | 
207 | 		elif (format == 'au'):
208 | 			prmfileName = os.path.join(os.path.abspath(audioFile[:convDict['au'][1]] + 'wav'))
209 | 			AudioIO.wavWrite(y, fs, nbits, prmfileName)
210 | 			subprocess.call(convDict['au'][0] + prmfileName + ' ' + audioFile,
211 | 							shell = True,  stdout=AudioIO.FNULL, stderr=subprocess.STDOUT)
212 | 			os.remove(prmfileName)
213 | 		else :
214 | 			raise Exception('This format is not supported.')
215 | 
216 | 	@staticmethod
217 | 	def wavRead(fileName, mono=False, startSec=None, endSec=None):
218 | 		""" Function to load WAV file.
219 | 
220 |         Args:
221 |             fileName:       (str)       Absolute filename of WAV file
222 |             mono:           (bool)      Switch if samples should be converted to mono
223 |             startSec:       (float)     Segment start time in seconds (if None, segment starts at the beginning of the WAV file)
224 |             endSec:         (float)     Segment end time in seconds (if None, segment ends at the end of the WAV file)
225 |         Returns:
226 |             samples:        (np array)  Audio samples (between [-1,1]
227 |                                         (if stereo: numSamples x numChannels,
228 |                                         if mono: numSamples)
229 |             sampleRate:     (float):    Sampling frequency [Hz]
230 |         """
231 | 		try:
232 | 			samples, sampleRate = AudioIO._loadWAVWithWave(fileName)
233 | 			sWidth = _wave.open(fileName).getsampwidth()
234 | 			if sWidth == 1:
235 | 				#print('8bit case')
236 | 				samples = samples.astype(float) / AudioIO.normFact['int8'] - 1.0
237 | 			elif sWidth == 2:
238 | 				#print('16bit case')
239 | 				samples = samples.astype(float) / AudioIO.normFact['int16']
240 | 			elif sWidth == 3:
241 | 				#print('24bit case')
242 | 				samples = samples.astype(float) / AudioIO.normFact['int24']
243 | 		except:
244 | 			#print('32bit case')
245 | 			samples, sampleRate = AudioIO._loadWAVWithScipy(fileName)
246 | 
247 | 		# mono conversion
248 | 		if mono:
249 | 			if samples.ndim == 2 and samples.shape[1] > 1:
250 | 				samples = (samples[:, 0] + samples[:, 1])*0.5
251 | 
252 |         # segment selection
253 | 		songLenSamples = samples.shape[0]
254 | 		if startSec is None:
255 | 			startIdx = 0
256 | 		else:
257 | 			startIdx = int(round(startSec*sampleRate))
258 | 		if endSec is None:
259 | 			endIdx = songLenSamples-1
260 | 		else:
261 | 			endIdx = int(round(endSec*sampleRate))
262 | 		if startIdx < 0 or startIdx > songLenSamples:
263 | 			raise Exception("Segment start sample index out of song boundaries!")
264 | 		if endIdx < startIdx or endIdx > songLenSamples:
265 | 			raise Exception("Segment end sample index out of song boundaries!")
266 | 		if samples.ndim == 1:
267 | 			samples = samples[startIdx:endIdx]
268 | 		else:
269 | 			samples = samples[startIdx:endIdx, :]
270 | 
271 | 		return samples, sampleRate
272 | 
273 | 	@staticmethod
274 | 	def _loadWAVWithWave(fileName):
275 | 		""" Load samples & sample rate from 24 bit WAV file """
276 | 		wav = _wave.open(fileName)
277 | 		rate = wav.getframerate()
278 | 		nchannels = wav.getnchannels()
279 | 		sampwidth = wav.getsampwidth()
280 | 		nframes = wav.getnframes()
281 | 		data = wav.readframes(nframes)
282 | 		wav.close()
283 | 		array = AudioIO._wav2array(nchannels, sampwidth, data)
284 | 
285 | 		return array, rate
286 | 
287 | 	@staticmethod
288 | 	def _loadWAVWithScipy(fileName):
289 | 		""" Load samples & sample rate from WAV file """
290 | 		inputData = read(fileName)
291 | 		samples = inputData[1]
292 | 		sampleRate = inputData[0]
293 | 
294 | 		return samples, sampleRate
295 | 
296 | 	@staticmethod
297 | 	def _wav2array(nchannels, sampwidth, data):
298 | 		"""data must be the string containing the bytes from the wav file."""
299 | 		num_samples, remainder = divmod(len(data), sampwidth * nchannels)
300 | 		if remainder > 0:
301 | 			raise ValueError('The length of data is not a multiple of '
302 |                              'sampwidth * num_channels.')
303 | 		if sampwidth > 4:
304 | 			raise ValueError("sampwidth must not be greater than 4.")
305 | 
306 | 		if sampwidth == 3:
307 | 			a = np.empty((num_samples, nchannels, 4), dtype = np.uint8)
308 | 			raw_bytes = np.fromstring(data, dtype = np.uint8)
309 | 			a[:, :, :sampwidth] = raw_bytes.reshape(-1, nchannels, sampwidth)
310 | 			a[:, :, sampwidth:] = (a[:, :, sampwidth - 1:sampwidth] >> 7) * 255
311 | 			result = a.view('<i4').reshape(a.shape[:-1])
312 | 		else:
313 | 			# 8 bit samples are stored as unsigned ints; others as signed ints.
314 | 			dt_char = 'u' if sampwidth == 1 else 'i'
315 | 			a = np.fromstring(data, dtype='<%s%d' % (dt_char, sampwidth))
316 | 			result = a.reshape(-1, nchannels)
317 | 		return result
318 | 
319 | 	@staticmethod
320 | 	def wavWrite(y, fs, nbits, audioFile):
321 | 		""" Write samples to WAV file
322 |         Args:
323 |             samples: (ndarray / 2D ndarray) (floating point) sample vector
324 |                     	mono: DIM: nSamples
325 |                     	stereo: DIM: nSamples x nChannels
326 | 
327 |             fs: 	(int) Sample rate in Hz
328 |             nBits: 	(int) Number of bits
329 |             fnWAV: 	(string) WAV file name to write
330 | 		"""
331 | 		if nbits == 8:
332 | 			intsamples = (y+1.0) * AudioIO.normFact['int' + str(nbits)]
333 | 			fX = np.int8(intsamples)
334 | 		elif nbits == 16:
335 | 			intsamples = y * AudioIO.normFact['int' + str(nbits)]
336 | 			fX = np.int16(intsamples)
337 | 		elif nbits > 16:
338 | 			fX = y
339 | 
340 | 		write(audioFile, fs, fX)
341 | 
342 | 	@staticmethod
343 | 	def sound(x,fs):
344 | 		""" Plays a wave file using the pyglet library. But first, it has to be written.
345 | 			Args:
346 | 			x: 		   (array) Floating point samples
347 | 			fs:		   (int) The sampling rate
348 | 		"""
349 | 		import pyglet as pg
350 | 		# Call the writing function
351 | 		AudioIO.wavWrite(x, fs, 16, 'testPlayback.wav')
352 | 		# Initialize playback engine
353 | 		playback = pg.media.load('testPlayback.wav')
354 | 		# Sound call
355 | 		playback.play()
356 | 		# Remove the dummy wave write
357 | 		os.remove('testPlayback.wav')
358 | 
359 | 	@staticmethod
360 | 	def energyNormalisation(x1, x2, wsz = 1024):
361 | 		""" Function to perform energy normalisation of two audio signals,
362 | 		based on envelopes acquired by Hilbert transformation.
363 | 
364 |         Args:
365 |             x1	:   (np array)      Absolute filename of WAV file
366 |             x2	:   (np array)      Switch if samples should be converted to mono
367 |             wsz :	(int)			Number of samples to take into account for the
368 |              						computation of the analytic function. If set
369 |              						to zero the whole signal will be analysed
370 |              						at once.
371 |         Returns:
372 | 			y1	:	(np array)	    Energy normalised output signal
373 | 			y2	:	(np array)	    Energy normalised output signal
374 |         """
375 | 		x1.shape = (len(x1), 1)
376 | 		x2.shape = (len(x2), 1)
377 | 
378 | 		if wsz == 0:
379 | 			xa1 = AF.HilbertTransformation(x1, mode = 'global', wsz = wsz)
380 | 			xa2 = AF.HilbertTransformation(x2, mode = 'global', wsz = wsz)
381 | 
382 | 			energy1 = np.mean(np.abs(xa1) ** 2.0)
383 | 			energy2 = np.mean(np.abs(xa2) ** 2.0)
384 | 
385 | 			if energy1 > energy2:
386 | 				rt = energy1/energy2
387 | 				y2 = x2 * rt
388 | 				y1 = x1
389 | 			else :
390 | 				rt = energy2/energy1
391 | 				y1 = x1 * rt
392 | 				y2 = x2
393 | 
394 | 			y1 = AudioIO.twoSideClip(y1, -1.0, 1.0)
395 | 			y2 = AudioIO.twoSideClip(y2, -1.0, 1.0)
396 | 
397 | 		else:
398 | 
399 | 			if len(x1) > len(x2):
400 | 				x1 = np.append(x1, np.zeros(len(x1)%wsz))
401 | 				x2 = np.append(x2, np.zeros(len(x1) - len(x2)))
402 | 			else:
403 | 				x2 = np.append(x2, np.zeros(len(x2)%wsz))
404 | 				x1 = np.append(x1, np.zeros(len(x2) - len(x1)))
405 | 
406 | 			xa1 = AF.HilbertTransformation(x1, mode = 'local', wsz = wsz)
407 | 			xa2 = AF.HilbertTransformation(x2, mode = 'local', wsz = wsz)
408 | 
409 | 			y1 = np.empty(len(x1))
410 | 			y2 = np.empty(len(x2))
411 | 
412 | 			energy1 = np.abs(xa1)
413 | 			energy2 = np.abs(xa2)
414 | 
415 | 			pin = 0
416 | 			pend = len(x1) - wsz
417 | 
418 | 			while pin <= pend :
419 | 
420 | 				lclE1 = np.mean(energy1[pin : pin + wsz])
421 | 				lclE2 = np.mean(energy2[pin : pin + wsz])
422 | 
423 | 				if (lclE1 > lclE2) and (lclE1 > 1e-4) and (lclE2 > 1e-4):
424 | 					rt = lclE1/lclE2
425 | 					bufferY2 = x2[pin : pin + wsz] * rt
426 | 					bufferY1 = x1[pin : pin + wsz]
427 | 
428 | 				elif (lclE1 < lclE2) and (lclE1 > 1e-4) and (lclE2 > 1e-4):
429 | 					rt = lclE2/lclE1
430 | 					bufferY1 = x1[pin : pin + wsz] * rt
431 | 					bufferY2 = x2[pin : pin + wsz]
432 | 
433 | 				else:
434 | 					bufferY1 = x1[pin : pin + wsz]
435 | 					bufferY2 = x2[pin : pin + wsz]
436 | 
437 | 				y1[pin : pin + wsz] = AudioIO.twoSideClip(bufferY1, -1.0, 1.0)
438 | 				y2[pin : pin + wsz] = AudioIO.twoSideClip(bufferY2, -1.0, 1.0)
439 | 
440 | 				pin += wsz
441 | 
442 | 		y1.shape = (len(y1),1)
443 | 		y2.shape = (len(y2),1)
444 | 		return y1, y2
445 | 
446 | 	@staticmethod
447 | 	def twoSideClip(x, minimum, maximum):
448 | 		""" Method to limit an input array inside a given
449 | 			range.
450 |         Args:
451 |             x		:       (np array)      Input array to be limited
452 |             minimum	:       (int)      		Minimum value to be considered for cliping.
453 |             maximum :		(int)			Maximum value to be considered for cliping.
454 | 
455 |         Returns:
456 | 			x		:		(np array)	    Limited output array
457 |         """
458 | 
459 | 		for indx in range(len(x)):
460 | 			if x[indx] < minimum:
461 | 				x[indx] = minimum
462 | 			elif x[indx] > maximum:
463 | 				x[indx] = maximum
464 | 
465 | 		return x
466 | 
467 | if __name__ == "__main__":
468 | 	# Define File
469 | 	myReadFile = 'EnterYourWavFile.wav'
470 | 	# Read the file
471 | 	x, fs = AudioIO.wavRead(myReadFile, mono = True)
472 | 	# Gain parameter
473 | 	g = 0.5
474 | 	# Listen to it
475 | 	AudioIO.sound(x*g,fs)
476 | 	# Make it better and write it to disk
477 | 	x2 = np.empty((len(x),2), dtype = np.float32)
478 | 	try :
479 | 		x2[:,0] = x * g
480 | 		x2[:,1] = np.roll(x*g, 512)
481 | 	except ValueError:
482 | 		x2[:,0] = x[:,0] * g
483 | 		x2[:,1] = np.roll(x[:,0] * g, 256)
484 | 	# Listen to stereo processed
485 | 	AudioIO.sound(x2*g,fs)
486 | 	AudioIO.audioWrite(x2, fs, 16, 'myNewWavFile.wav', 'wav')
487 | 
488 | 


--------------------------------------------------------------------------------
/QMF/old_vers/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __author__ = 'S.I. Mimilakis'
3 | __copyright__ = 'MacSeNet'


--------------------------------------------------------------------------------
/QMF/old_vers/foldingmat_filterbank.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.fftpack as spfft
  3 | 
  4 | def ha2Pa3d(ha,N):
  5 | 	#usage: Pa=ha2Pa3d(ha,N);
  6 | 	#produces the analysis polyphase matrix Pa
  7 | 	#in 3D matrix representation
  8 | 	#from a basband filter ha with
  9 | 	#a cosine modulation
 10 | 	#N: Blocklength
 11 | 	#Gerald Schuller
 12 | 	#shl@idmt.fhg.de
 13 | 	#Dec-2-15
 14 | 
 15 | 	import numpy as np
 16 | 
 17 | 	L=len(ha);
 18 | 
 19 | 	blocks=int(np.ceil(L/N));
 20 |         #print(blocks)
 21 | 
 22 | 	Pa=np.zeros((N,N,blocks));
 23 | 
 24 | 	for k in range(N): #subband
 25 | 	  for m in range(blocks):  #m: block number 
 26 | 	    for nphase in range(N): #nphase: Phase 
 27 | 	      n=m*N+nphase;
 28 | 	      #indexing like impulse response, phase index is reversed (N-np):
 29 | 	      Pa[N-1-nphase,k,m]=ha[n]*np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(blocks*N-1-n-N/2.0+0.5)); 
 30 | 
 31 | 	return Pa
 32 | 
 33 | def hs2Ps3d(hs,N):
 34 | 	#usage: Ps=hs2Ps3d(hs,N);
 35 | 	#produces the synthesis polyphase matrix Ps
 36 | 	#in 3D matrix representation
 37 | 	#from a basband filter hs with
 38 | 	#a cosine modulation
 39 | 	#N: Blocklength
 40 | 	#Gerald Schuller
 41 | 	#shl@idmt.fhg.de
 42 | 	#Dec-2-15
 43 | 
 44 | 	import numpy as np
 45 | 
 46 | 	L=len(hs);
 47 | 
 48 | 	blocks=int(np.ceil(L/N));
 49 |         #print(blocks)
 50 | 
 51 | 	Ps=np.zeros((N,N,blocks));
 52 | 
 53 | 	for k in range(N): #subband
 54 | 	  for m in range(blocks):  #m: block number 
 55 | 	    for nphase in range(N): #nphase: Phase 
 56 | 	      n=m*N+nphase;
 57 | 	      #synthesis:
 58 | 	      Ps[k,nphase,m]=hs[n]*np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(n-N/2.0+0.5)); 
 59 | 
 60 | 	return Ps
 61 | 
 62 | def ha2Fa3d(ha,N):
 63 | 	#usage: Fa=ha2Fa3d(ha,N);
 64 | 	#produces the analysis polyphase folding matrix Fa with all polyphase components
 65 | 	#in 3D matrix representation
 66 | 	#from a basband filter ha with
 67 | 	#a cosine modulation
 68 | 	#N: Blocklength
 69 | 	#Gerald Schuller
 70 | 	#shl@idmt.fhg.de
 71 | 	#Dec-2-15
 72 | 
 73 | 	Pa=ha2Pa3d(ha,N);
 74 | 	Fa=polmatmult(Pa,DCToMatrix(N))
 75 |         #round zeroth polyphase component to 7 decimals after point:
 76 |         Fa=np.around(Fa,8)
 77 | 
 78 | 	return Fa
 79 | 
 80 | def hs2Fs3d(hs,N):
 81 | 	#usage: Fs=hs2Fs3d(hs,N);
 82 | 	#produces the synthesis polyphase folding matrix Fs with all polyphase components
 83 | 	#in 3D matrix representation
 84 | 	#from a basband filter ha with
 85 | 	#a cosine modulation
 86 | 	#N: Blocklength
 87 | 	#Gerald Schuller
 88 | 	#shl@idmt.fhg.de
 89 | 	#Dec-2-15
 90 | 
 91 | 	Ps=hs2Ps3d(hs,N);
 92 | 	Fs=polmatmult(DCToMatrix(N),Ps)
 93 |         #round zeroth polyphase component to 7 decimals after point:
 94 |         Fs=np.around(Fs,8)
 95 | 
 96 | 	return Fs
 97 | 
 98 | def DCToMatrix(N):
 99 | 	#produces an odd DCT matrix with size NxN
100 | 	#Gerald Schuller, Dec. 2015
101 | 
102 | 	import numpy as np
103 | 
104 | 	y=np.zeros((N,N,1));
105 | 
106 | 	for n in range(N):
107 | 	   for k in range(N):
108 | 	      y[n,k,0]=np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(n+0.5));
109 | 	      #y(n,k)=cos(pi/N*(k-0.5)*(n-1));
110 | 	return y   
111 | 
112 | def polmatmult(A,B):
113 | 	#function C=polmatmult(A,B)
114 | 	#multiplies 2 polynomial matrices A and B, where each matrix entry is a polynomial, e.g. in z^-1.
115 | 	#Those polynomial entries are in the 3rd dimension 
116 | 	#The third dimension can also be interpreted as containing the (2D) coefficient matrices for each 
117 | 	#exponent of z^-1. 
118 | 	#Result is C=A*B;
119 | 
120 | 	import numpy as np
121 | 	from scipy import sparse
122 | 
123 | 	[NAx,NAy,NAz]=A.shape;
124 | 	[NBx,NBy,NBz]=B.shape;
125 | 
126 | 	#Degree +1 of resulting polynomial, with NAz-1 and NBz-1 beeing the degree of the input  polynomials:
127 | 	Deg=NAz+NBz-1;
128 | 
129 | 	C=np.zeros((NAx,NBy,Deg));
130 | 
131 | 	for n in range(Deg):
132 | 	  for m in range(n+1):
133 | 	    if ((n-m)<NAz and m<NBz):
134 | 	      C[:,:,n]=C[:,:,n]+ A[:,:,(n-m)].dot(B[:,:,m])
135 | 	      #sparse version:
136 | 	      #C[:,:,n]=C[:,:,n]+ (sparse.csr_matrix(A[:,:,(n-m)]).dot(sparse.csr_matrix(B[:,:,m]))).todense()
137 | 	return C
138 | 
139 | #The DCT4 transform:
140 | def DCT4(samples):
141 |    #use a DCT3 to implement a DCT4:
142 |    samplesup=np.zeros(2*N)
143 |    #upsample signal:
144 |    samplesup[1::2]=samples
145 |    y=spfft.dct(samplesup,type=3)/2
146 |    return y[0:N]
147 | 
148 | def x2polyphase(x,N):
149 | 	#xp=x2polyphase(x);
150 | 	#Converts input signal x (a row vector) into a polphase row vector
151 | 	#For blocks of length N
152 | 	#For 3D polyphase representation (exponents of z in the third matrix/tensor dimension)
153 | 	#Gerald Schuller
154 | 	#shl@idmt.fhg.de
155 | 	#Dec-2-2015
156 | 
157 | 	#Number of blocks in the signal:
158 | 	L=int(np.floor(len(x)/N))
159 | 	xp=np.zeros((1,N,L));
160 | 
161 | 	for m in range(L):
162 | 	  xp[0,:,m]=x[m*N: (m*N+N)];
163 | 	
164 | 	return xp
165 | 
166 | def polyphase2x(xp):
167 | 	#xp=polyphase2x(xp,N);
168 | 	#Converts polyphase input signal xp (a row vector) into a contiguous row vector
169 | 	#For blocks of length N
170 | 	#For 3D polyphase representation (exponents of z in the third matrix/tensor dimension)
171 | 	#Gerald Schuller
172 | 	#shl@idmt.fhg.de
173 | 	#Aug-24-11
174 | 
175 | 	#Number of blocks in the signal:
176 | 	[r,N,b]=xp.shape;
177 | 	L=b;
178 | 
179 | 	x=np.zeros(N*L);
180 | 
181 | 	for m in range(L):
182 | 	  #print x[(m*N):((m+1)*N)].shape, xp[0,:,m].shape
183 | 	  x[(m*N):(m+1)*N]=xp[0,:,m];
184 | 
185 | 	return x
186 | 
187 | def analysisqmf(x,qmfwin,N):
188 | 	#QMF Analysis filter bank
189 | 	#usage: y=analysisqmf(x,qmfwin,N);
190 | 	#with x: input signal (row vector)
191 | 	#qmfwin: qmf window
192 | 	#N: number of subbands
193 | 	#y: output, subband signals as a polphase vector,
194 | 	#each column is one subband signal,
195 | 	#the third dimension corresponds to the blocks of the signal
196 | 	#Gerald Schuller
197 | 	#shl@idmt.fhg.de
198 | 	#Dec 2015
199 | 
200 | 	#generation of input polyphase vector:
201 | 	xp=x2polyphase(x,N);
202 | 
203 | 	#Analysis polphase matrix:
204 | 	Fa=ha2Fa3d(qmfwin,N);
205 | 
206 | 	#Analysis filter bank:
207 | 	#Sparse mult. with folding mat:
208 | 	y=polmatmult(xp,Fa);
209 |         #Transform:
210 |         y=polmatmult(y,DCToMatrix(N))
211 | 	return y
212 | 
213 | def synthesisqmf(y,qmfwin,N):
214 | 	#QMF Synthesis filter bank
215 | 	#usage: xrek=synthesisqmf(y,qmfwin,N);
216 | 	#with y: subband signals (polyphase row vector, 3d tensor in Matlab/Octave)
217 | 	#each column is one subband signal,
218 | 	#the third dimension corresponds to the blocks of the signal
219 | 	#qmfwin: qmf window
220 | 	#N: number of subbands
221 | 	#xrek: output, rekonstructed signal 
222 | 	#Gerald Schuller
223 | 	#shl@idmt.fhg.de
224 | 	#Dec. 2015
225 | 
226 | 	#generation of input polyphase vector:
227 | 
228 | 	#Analysis polphase matrix:
229 | 	Fs=hs2Fs3d(qmfwin,N);
230 | 
231 | 	#Analysis filter bank:
232 | 	#Inverse transform:
233 | 	xrekp=polmatmult(y,DCToMatrix(N))
234 |         #print xrekp.shape
235 | 	#Sparse mult with folding mat.:
236 | 	xrekp=polmatmult(xrekp,Fs);
237 | 	#print xrekp.shape
238 | 
239 | 	#polyphase to contiguous vector:
240 | 	xrek=polyphase2x(xrekp);
241 | 
242 | 	return xrek
243 | 
244 | def test():
245 | 	from scipy import sparse
246 | 	import matplotlib.pyplot as plt
247 | 	import IOMethods as io
248 |         #ha=np.arange(1,9)
249 | 	ha=np.sin(np.pi/8*(np.arange(8)+0.5))
250 |         print "ha= ", ha
251 | 	Fa=ha2Fa3d(ha,4)
252 | 	Fs=hs2Fs3d(ha,4)
253 | 	# convert to compressed sparse row matrix: 
254 |         sFa0 = sparse.csr_matrix(Fa[:,:,0])
255 |         sFa1 = sparse.csr_matrix(Fa[:,:,1]) 
256 | 	#umkehrung: sFa0.todense()
257 |         #List of sparse 2d matrices, to obtain sparse 3d tensor:
258 |         #sFa=[sFa0, sFa1];
259 |         sFa=[sFa0]
260 | 	sFa.append(sFa1)
261 | 
262 |         [NAx,NAy,NAz]=Fa.shape;
263 | 	np.set_printoptions(suppress=True)
264 |         print "Fa= \n"
265 |  	for n in range(NAz):
266 | 	   print "Fa ",n,':\n', Fa[:,:,n] 
267 | 	print "Fs= \n"
268 |  	for n in range(NAz):
269 | 	   print "Fs ",n,':\n', Fs[:,:,n] 
270 | 
271 | 	print "test perfect reconstruction: Fa*Fs=Ident: \n"
272 |         pr=polmatmult(Fa,Fs)
273 | 	[NAx,NAy,NAz]=pr.shape;
274 | 	for n in range(NAz):
275 | 	   print "Prod ",n,':\n', pr[:,:,n] 
276 |         
277 |         #print sFa0
278 | 	#print sparse.issparse(sFa0)
279 |         #print sparse.issparse(Fa)
280 |         #pr=Fa[:,:,0].dot(Fa[:,:,0])
281 | 	#print '\n', np.dot(Fa[:,:,0],Fa[:,:,0])
282 |         #print '\n', pr
283 | 	#print sFa[0], '\n'
284 |         #print sFa[1]
285 |         #print sparse.issparse(sFa[0])
286 |         #print(sFa[0].shape)
287 |         #print len(sFa)
288 | 
289 | 	#QMF Test:
290 | 	N = 64
291 | 	qmfwin=np.loadtxt('qmf.dat');
292 | 	mdctwin = np.sin(np.pi/(2*N) * (np.arange(2*N) + 0.5))
293 | 
294 | 	win = qmfwin
295 | 	plt.plot(win)
296 | 	
297 | 	x=np.arange(1024);
298 | 	print(x.shape)
299 | 	N=64 #number of subbands
300 | 
301 | 	y=analysisqmf(x,win,N)
302 | 	#Test synthesis impulse response of lowest filter:
303 | 	#y=np.zeros((1,64,10))
304 | 	#y[0,0,0]=1;
305 |         print "y.shape= \n", y.shape
306 | 
307 |   	xrek=synthesisqmf(y,win,N)
308 |   	plt.figure()
309 | 	plt.plot(xrek)
310 | 	plt.show()
311 | 
312 | if __name__ == '__main__':
313 | 
314 |     import IOMethods as io
315 |     import matplotlib.pyplot as plt
316 | 	#test()
317 | 
318 |     #QMF Example:
319 |     N=1024 #Number of subbands
320 | 	#qmfwin=np.loadtxt('qmf.dat');
321 |     #mdctwin = np.sin(np.pi/(2*N) * (np.arange(2*N) + 0.5))
322 |     qmfwin = np.loadtxt('qmf1024.mat')
323 | 
324 |     win = qmfwin
325 | 
326 |     plt.plot(win)
327 |     plt.title('Filter Bank Window')
328 |     plt.xlabel('Sample at original sampling rate')
329 |     x,fs=io.AudioIO.wavRead('Dido44.wav', True)
330 | 
331 |     x1 = x[0 : (15 * fs)]
332 |     x2 = x[(15 * fs): (30 * fs)]
333 |     x1 = x1[:, 0]
334 |     x2 = x2[:, 0]
335 | 
336 |     #make x 1-dimensional:
337 |     x=x[:,0];
338 |     print "x.shape", x.shape
339 | 
340 | 
341 |     #y=analysisqmf(x,win,N) #in y are the subbands over time
342 |     y1=analysisqmf(x1,win,N) #in y are the subbands over time
343 |     y2=analysisqmf(x2,win,N) #in y are the subbands over time
344 | 
345 |     print "y.shape= ", y.shape
346 |     #show the subbands as spectrogram:
347 |     plt.figure()
348 |     plt.imshow(np.abs(y[0,:,200:250])/N, aspect='auto', interpolation='nearest', origin='lower')
349 |     plt.title('Part of filter bank subbands')
350 |     plt.xlabel('time (sample) after downsampling (block)')
351 |     plt.ylabel('Frequency')
352 | 
353 |     #xrek=synthesisqmf(y,win,N)
354 |     xrek1=synthesisqmf(y1,win,N)
355 |     xrek2=synthesisqmf(y2,win,N)
356 |     np.hstack((xrek1, xrek2))
357 |     plt.figure()
358 |     plt.plot(xrek)
359 |     plt.title('Reconstructed signal')
360 |     plt.xlabel('Sample at original sampling rate')
361 |     plt.show()
362 | 
363 | 


--------------------------------------------------------------------------------
/QMF/old_vers/mixed.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/Audio-Masking-Methods/64782c3ff9564b06677c2a2112b2b2bdb48dc2a1/QMF/old_vers/mixed.wav


--------------------------------------------------------------------------------
/QMF/old_vers/mixedrek.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/Audio-Masking-Methods/64782c3ff9564b06677c2a2112b2b2bdb48dc2a1/QMF/old_vers/mixedrek.wav


--------------------------------------------------------------------------------
/QMF/old_vers/qmf_realtime.py:
--------------------------------------------------------------------------------
  1 | #This is a library that implements a QMF filter bank, including a realt time implelentation.
  2 | #The "main" is an example, which reads in an audio .wav file, block-wise, computes the QMF subbands, 
  3 | #then reconstructs the audio signal from the subbands, and writes them, block wise, into another audio .wav file. 
  4 | #As such, it can read and process arbitrary long audio files, without memory problems!
  5 | #example call from a terminal window: python qmf_realtime.py
  6 | #It loads the qmf coefficient file 'qmf1024_8x.mat'
  7 | # 
  8 | #Implements a real time Pseudo Quandrature Mirror Filter Bank
  9 | #"real time" means: it reads a block of N samples in and generates a block of N spectral samples from the QMF.
 10 | #The QMF implementaion uses internal memory to accomodate the window overlap
 11 | #Gerald Schuller, gerald.schuller@tu-ilmenau.de, January 2016
 12 | 
 13 | import numpy as np
 14 | import scipy.fftpack as spfft
 15 | 
 16 | N=1024  #number of subbands of the QMF filter bank
 17 | #N=64
 18 | #internal memory for the input blocks, for 8 times overlap:
 19 | overlap=8;
 20 | #overlap=10;
 21 | blockmemory=np.zeros((overlap,N))
 22 | blockmemorysyn=np.zeros((overlap,N))
 23 | 
 24 | def ha2Pa3d(ha,N):
 25 | 	#usage: Pa=ha2Pa3d(ha,N);
 26 | 	#produces the analysis polyphase matrix Pa
 27 | 	#in 3D matrix representation
 28 | 	#from a basband filter ha with
 29 | 	#a cosine modulation
 30 | 	#N: Blocklength
 31 | 	#Gerald Schuller
 32 | 	#shl@idmt.fhg.de
 33 | 	#Dec-2-15
 34 | 
 35 | 	import numpy as np
 36 | 
 37 | 	L=len(ha);
 38 | 
 39 | 	blocks=int(np.ceil(L/N));
 40 |         #print(blocks)
 41 | 
 42 | 	Pa=np.zeros((N,N,blocks));
 43 | 
 44 | 	for k in range(N): #subband
 45 | 	  for m in range(blocks):  #m: block number 
 46 | 	    for nphase in range(N): #nphase: Phase 
 47 | 	      n=m*N+nphase;
 48 | 	      #indexing like impulse response, phase index is reversed (N-np):
 49 | 	      Pa[N-1-nphase,k,m]=ha[n]*np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(blocks*N-1-n-N/2.0+0.5)); 
 50 | 
 51 | 	return Pa
 52 | 
 53 | def hs2Ps3d(hs,N):
 54 | 	#usage: Ps=hs2Ps3d(hs,N);
 55 | 	#produces the synthesis polyphase matrix Ps
 56 | 	#in 3D matrix representation
 57 | 	#from a basband filter hs with
 58 | 	#a cosine modulation
 59 | 	#N: Blocklength
 60 | 	#Gerald Schuller
 61 | 	#shl@idmt.fhg.de
 62 | 	#Dec-2-15
 63 | 
 64 | 	import numpy as np
 65 | 
 66 | 	L=len(hs);
 67 | 
 68 | 	blocks=int(np.ceil(L/N));
 69 |         #print(blocks)
 70 | 
 71 | 	Ps=np.zeros((N,N,blocks));
 72 | 
 73 | 	for k in range(N): #subband
 74 | 	  for m in range(blocks):  #m: block number 
 75 | 	    for nphase in range(N): #nphase: Phase 
 76 | 	      n=m*N+nphase;
 77 | 	      #synthesis:
 78 | 	      Ps[k,nphase,m]=hs[n]*np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(n-N/2.0+0.5)); 
 79 | 
 80 | 	return Ps
 81 | 
 82 | 
 83 | def ha2Fa3d(ha,N):
 84 | 	#usage: Fa=ha2Fa3d(ha,N);
 85 | 	#produces the analysis polyphase folding matrix Fa with all polyphase components
 86 | 	#in 3D matrix representation
 87 | 	#from a basband filter ha with
 88 | 	#a cosine modulation
 89 | 	#N: Blocklength
 90 | 	#Gerald Schuller
 91 | 	#shl@idmt.fhg.de
 92 | 	#Dec-2-15
 93 | 	print "ha2Pa3d:"
 94 | 	Pa=ha2Pa3d(ha,N);
 95 | 	print "polmatmult DCT:"
 96 | 	Fa=polmatmult(Pa,DCToMatrix(N))
 97 |         #round zeroth polyphase component to 7 decimals after point:
 98 |         Fa=np.around(Fa,8)
 99 | 
100 | 	return Fa
101 | 
102 | def ha2Fa3d_fast(qmfwin,N):
103 | 	#usage: Fa=ha2Fa3d_fast(ha,N);
104 | 	#produces the analysis polyphase folding matrix Fa with all polyphase components
105 | 	#in 3D matrix representation
106 | 	#from a basband filter ha with
107 | 	#a cosine modulation
108 | 	#N: Blocklength
109 | 	#using a fast implementation (important for large N)
110 | 	#See my book chapter about "Filter Banks", cosine modulated filter banks.
111 | 	#Gerald Schuller
112 | 	#shl@idmt.fhg.de
113 | 	#Jan-23-16
114 | 
115 | 	Fa=np.zeros((N,N,overlap))
116 |  	for m in range(overlap/2):
117 | 	   Fa[:,:,2*m]+=np.fliplr(np.diag(np.flipud(-qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=-N/2))
118 | 	   Fa[:,:,2*m]+=(np.diag(np.flipud(qmfwin[m*2*N+N/2:(m*2*N+N)]*((-1)**m)),k=N/2))
119 | 	   Fa[:,:,2*m+1]+=(np.diag(np.flipud(qmfwin[m*2*N+N:(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
120 | 	   Fa[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(qmfwin[m*2*N+1.5*N:(m*2*N+2*N)]*((-1)**m)),k=N/2))
121 | 	   #print -qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)
122 | 	return Fa
123 | 
124 | def hs2Fs3d_fast(qmfwin,N):
125 | 	#usage: Fs=hs2Fs3d_fast(hs,N);
126 | 	#produces the synthesis polyphase folding matrix Fs with all polyphase components
127 | 	#in 3D matrix representation
128 | 	#from a basband filter ha with
129 | 	#a cosine modulation
130 | 	#N: Blocklength
131 | 	#Fast implementation
132 | 	#Gerald Schuller
133 | 	#shl@idmt.fhg.de
134 | 	#Jan-23-15
135 | 
136 | 	#Fa=ha2Fa3d_fast(hs,N)
137 | 	#print "Fa.shape in hs2Fs : ", Fa.shape
138 | 	#Transpose first two dimensions to obtain synthesis folding matrix:
139 | 	#Fs=np.transpose(Fa, (1, 0, 2))
140 | 	global overlap
141 | 	Fs=np.zeros((N,N,overlap))
142 |  	for m in range(overlap/2):
143 | 	   Fs[:,:,2*m]+=np.fliplr(np.diag(np.flipud(qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=N/2))
144 | 	   Fs[:,:,2*m]+=(np.diag((qmfwin[m*2*N+N/2:(m*2*N+N)]*((-1)**m)),k=N/2))
145 | 	   Fs[:,:,2*m+1]+=(np.diag((qmfwin[m*2*N+N:(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
146 | 	   Fs[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(-qmfwin[m*2*N+1.5*N:(m*2*N+2*N)]*((-1)**m)),k=-N/2))
147 | 	#print "Fs.shape in hs2Fs : ", Fs.shape
148 | 	#avoid sign change after reconstruction:
149 | 	return -Fs
150 | 
151 | def hs2Fs3d(hs,N):
152 | 	#usage: Fs=hs2Fs3d(hs,N);
153 | 	#produces the synthesis polyphase folding matrix Fs with all polyphase components
154 | 	#in 3D matrix representation
155 | 	#from a basband filter ha with
156 | 	#a cosine modulation
157 | 	#N: Blocklength
158 | 	#Gerald Schuller
159 | 	#shl@idmt.fhg.de
160 | 	#Dec-2-15
161 | 
162 | 	Ps=hs2Ps3d(hs,N);
163 | 	Fs=polmatmult(DCToMatrix(N),Ps)
164 |         #round zeroth polyphase component to 7 decimals after point:
165 |         Fs=np.around(Fs,8)
166 | 
167 | 	return Fs
168 | 
169 | def DCToMatrix(N):
170 | 	#produces an odd DCT matrix with size NxN
171 | 	#Gerald Schuller, Dec. 2015
172 | 
173 | 	import numpy as np
174 | 
175 | 	y=np.zeros((N,N,1));
176 | 
177 | 	for n in range(N):
178 | 	   for k in range(N):
179 | 	      y[n,k,0]=np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(n+0.5));
180 | 	      #y(n,k)=cos(pi/N*(k-0.5)*(n-1));
181 | 	return y   
182 | 
183 | def polmatmult(A,B):
184 | 	#function C=polmatmult(A,B)
185 | 	#multiplies 2 polynomial matrices A and B, where each matrix entry is a polynomial, e.g. in z^-1.
186 | 	#Those polynomial entries are in the 3rd dimension 
187 | 	#The third dimension can also be interpreted as containing the (2D) coefficient matrices for each 
188 | 	#exponent of z^-1. 
189 | 	#Result is C=A*B;
190 | 
191 | 	import numpy as np
192 | 	from scipy import sparse
193 | 
194 | 	[NAx,NAy,NAz]=A.shape;
195 | 	[NBx,NBy,NBz]=B.shape;
196 | 
197 | 	#Degree +1 of resulting polynomial, with NAz-1 and NBz-1 beeing the degree of the input  polynomials:
198 | 	Deg=NAz+NBz-1;
199 | 
200 | 	C=np.zeros((NAx,NBy,Deg));
201 | 
202 | 	for n in range(Deg):
203 | 	  for m in range(n+1):
204 | 	    if ((n-m)<NAz and m<NBz):
205 | 	      C[:,:,n]=C[:,:,n]+ A[:,:,(n-m)].dot(B[:,:,m])
206 | 	      #sparse version:
207 | 	      #C[:,:,n]=C[:,:,n]+ (sparse.csr_matrix(A[:,:,(n-m)]).dot(sparse.csr_matrix(B[:,:,m]))).todense()
208 | 	return C
209 | 
210 | #The DCT4 transform:
211 | def DCT4(samples):
212 |    #use a DCT3 to implement a DCT4:
213 |    samplesup=np.zeros(2*N)
214 |    #upsample signal:
215 |    samplesup[1::2]=samples
216 |    y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
217 |    return y[0:N]
218 | 
219 | def x2polyphase(x,N):
220 | 	#xp=x2polyphase(x);
221 | 	#Converts input signal x (a row vector) into a polphase row vector
222 | 	#For blocks of length N
223 | 	#For 3D polyphase representation (exponents of z in the third matrix/tensor dimension)
224 | 	#Gerald Schuller
225 | 	#shl@idmt.fhg.de
226 | 	#Dec-2-2015
227 | 
228 | 	#Number of blocks in the signal:
229 | 	L=int(np.floor(len(x)/N))
230 | 
231 | 	xp=np.zeros((1,N,L));
232 | 
233 | 	for m in range(L):
234 | 	  xp[0,:,m]=x[m*N: (m*N+N)];
235 | 	
236 | 	return xp
237 | 
238 | def polyphase2x(xp):
239 | 	#xp=polyphase2x(xp,N);
240 | 	#Converts polyphase input signal xp (a row vector) into a contiguous row vector
241 | 	#For blocks of length N
242 | 	#For 3D polyphase representation (exponents of z in the third matrix/tensor dimension)
243 | 	#Gerald Schuller
244 | 	#shl@idmt.fhg.de
245 | 	#Aug-24-11
246 | 
247 | 	#Number of blocks in the signal:
248 | 	[r,N,b]=xp.shape;
249 | 	L=b;
250 | 
251 | 	x=np.zeros(N*L);
252 | 
253 | 	for m in range(L):
254 | 	  #print x[(m*N):((m+1)*N)].shape, xp[0,:,m].shape
255 | 	  x[(m*N):(m+1)*N]=xp[0,:,m];
256 | 
257 | 	return x
258 | 
259 | def analysisqmf(x,qmfwin,N):
260 | 	#QMF Analysis filter bank
261 | 	#usage: y=analysisqmf(x,qmfwin,N);
262 | 	#with x: input signal (row vector)
263 | 	#qmfwin: qmf window
264 | 	#N: number of subbands
265 | 	#y: output, subband signals as a polphase vector,
266 | 	#each column is one subband signal,
267 | 	#the third dimension corresponds to the blocks of the signal
268 | 	#Gerald Schuller
269 | 	#shl@idmt.fhg.de
270 | 	#Dec 2015
271 | 
272 | 	#generation of input polyphase vector:
273 | 	xp=x2polyphase(x,N);
274 | 
275 | 	#Analysis polphase matrix:
276 | 	Fa=ha2Fa3d(qmfwin,N);
277 | 
278 | 	#Analysis filter bank:
279 | 	#Sparse mult. with folding mat:
280 | 	y=polmatmult(xp,Fa);
281 |         #Transform:
282 |         y=polmatmult(y,DCToMatrix(N))
283 | 	return y
284 | 
285 | def analysisqmf_realtime(xrt,Fa,N):
286 | 	#computes the QMF subband samples for each real time input block xrt. 
287 | 	#Conducts an implicit polynomial multiplication with folding matrix Fa of 
288 | 	#the polyphase matrix of the QMF filter bank, using
289 | 	#internal memory of the past input blocks.
290 | 
291 | 	#from scipy import sparse
292 |         global blockmemory
293 | 	global overlap
294 | 	#push down old blocks:
295 | 	blockmemory[0:(overlap-1),:]=blockmemory[1:(overlap),:]
296 | 	#write new block into top of memory stack:
297 | 	blockmemory[overlap-1,:]=xrt;
298 |     	y=np.zeros((1,N));
299 |         #print "Fa.shape =", Fa.shape
300 | 	for m in range(overlap):
301 |    	   y+=np.dot(np.array([blockmemory[overlap-1-m,:]]), Fa[:,:,m])
302 | 	   #y+= (sparse.csr_matrix(blockmemory[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
303 | 	#fast DCT4:
304 | 	y=DCT4(y)
305 | 	return y
306 | 
307 | def synthesisqmf_realtime(y,Fs,N):
308 | 	#computes the inverse QMF for each subband block y, 
309 | 	#conducts an implicit polynomial multiplication with synthesis folding matrix Fs 
310 | 	#of the synthesis polyphase matrix of the QMF filter bank, using
311 | 	#internal memory for future output blocks.
312 | 
313 | 	#from scipy import sparse
314 |         global blockmemorysyn
315 | 	global overlap
316 | 	#print "overlap= ", overlap
317 | 	#push down old blocks:
318 | 	blockmemorysyn[0:(overlap-1),:]=blockmemorysyn[1:(overlap),:]
319 | 	blockmemorysyn[overlap-1,:]=np.zeros((1,N)) #avoid leaving previous values in top of memory.
320 | 	#print "memory content synthesis: ", np.sum(np.abs(blockmemorysyn))
321 |         #print "Fs.shape =", Fs.shape
322 | 	#print "y.shape= ", y.shape
323 | 	#Overlap-add after fast (inverse) DCT4::
324 | 	for m in range(overlap):
325 |    	   blockmemorysyn[m,:]+=np.dot(DCT4(y), Fs[:,:,m])
326 | 	   #y+= (sparse.csr_matrix(blockmemory[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
327 |    	xrek=blockmemorysyn[0,:]
328 | 	
329 | 	
330 | 	return xrek
331 | 
332 | def synthesisqmf(y,qmfwin,N):
333 | 	#QMF Synthesis filter bank
334 | 	#usage: xrek=synthesisqmf(y,qmfwin,N);
335 | 	#with y: subband signals (polyphase row vector, 3d tensor in Matlab/Octave)
336 | 	#each column is one subband signal,
337 | 	#the third dimension corresponds to the blocks of the signal
338 | 	#qmfwin: qmf window
339 | 	#N: number of subbands
340 | 	#xrek: output, rekonstructed signal 
341 | 	#Gerald Schuller
342 | 	#shl@idmt.fhg.de
343 | 	#Dec. 2015
344 | 
345 | 	#generation of input polyphase vector:
346 | 
347 | 	#Analysis polphase matrix:
348 | 	Fs=hs2Fs3d(qmfwin,N);
349 | 
350 | 	#Analysis filter bank:
351 | 	#Inverse transform:
352 | 	xrekp=polmatmult(y,DCToMatrix(N))
353 |         #print xrekp.shape
354 | 	#Sparse mult with folding mat.:
355 | 	xrekp=polmatmult(xrekp,Fs);
356 | 	#print xrekp.shape
357 | 
358 | 	#polyphase to contiguous vector:
359 | 	xrek=polyphase2x(xrekp);
360 | 	return xrek
361 | 
362 | 
363 | def qmfrt_example():
364 | 	from scipy import sparse
365 | 	import matplotlib.pyplot as plt
366 |         import time
367 | 
368 | 	#QMF Test:
369 | 	global N  #number of subbands
370 |    	global overlap 
371 | 	qmfwin=np.loadtxt('qmf1024_8x.mat');
372 | 	qmfwin=np.hstack((qmfwin,np.flipud(qmfwin)))
373 | 	#qmfwin=np.loadtxt('qmf.dat');
374 | 	print "qmfwin.shape= ", qmfwin.shape
375 |         
376 | 	plt.plot(qmfwin)
377 | 	plt.show(block=False)
378 | 	#Testing Analysis:
379 | 	#Input signal (can be live from the sound card):
380 | 	#x=np.random.random((10*N))-0.5;
381 | 	x=np.sin(2*np.pi/N*10*np.arange(10*N))
382 | 	x=np.zeros((10*N))
383 | 	x[10]=1
384 | 	#initialize subband matrix:
385 | 	y=np.zeros((10,N));
386 |        
387 | 	start=time.time()
388 | 	#Analysis polphase matrix:
389 | 	print "Compute analysis Folding matrix:"
390 | 	#Fao=ha2Fa3d(qmfwin,N);
391 | 	Fa=ha2Fa3d_fast(qmfwin,N) 
392 | 	time1=time.time()
393 |         print "Time for Fa: ", time1-start
394 | 	#mult vector with full matrix:
395 | 	xtest=np.random.rand(N);
396 | 	startmultfull=time.time()
397 | 	testout=np.dot(xtest,Fa[:,:,4])
398 | 	endtime=time.time()
399 | 	print "time for full mult: ", endtime-startmultfull
400 | 
401 | 	sparsemat=sparse.csr_matrix(Fa[:,:,4])
402 | 	sparsex=sparse.csr_matrix(xtest)
403 | 	startmultsparse=time.time()
404 | 	testout=sparsex.dot(sparsemat)
405 | 	endtime=time.time()
406 | 	print "time for sparse mult: ", endtime-startmultsparse	
407 | 	
408 | 	#plt.figure()
409 | 	#plt.imshow(np.sum(np.abs(Fa), axis=2))
410 | 	#plt.show(block=False)
411 |   	print "Fa.shape= ", Fa.shape	   
412 | 	#print "Fa=", Fa[N-1,N/2-1,:]
413 |      	#print "sum of diff:", np.sum(np.abs(Fa-Fao))
414 |         #print "Fa.shape =", Fa.shape
415 |         
416 | 	print "Start QMF:"
417 | 	for m in range(10):
418 | 	   y[m,:]=analysisqmf_realtime(x[m*N:(m+1)*N],Fa,N)
419 | 	time2=time.time()
420 | 	print "Time for filtering: ", time2-time1
421 | 	
422 |         print "y.shape= \n", y.shape
423 | 
424 |   	#xrek=synthesisqmf(y,qmfwin,N)
425 |   	plt.figure()
426 | 	#plt.plot(y.T)
427 | 	plt.imshow(np.abs(y[:,0:300]))
428 | 	plt.show(block=False)
429 | 
430 | 	#Testing synthesis
431 | 	#Synthesis folding matrix:
432 | 	Fs=hs2Fs3d_fast(qmfwin,N)
433 | 	#plt.figure()
434 | 	#plt.imshow(np.sum(np.abs(Fa), axis=2))
435 | 	#plt.show(block=False)
436 | 	#Fso=hs2Fs3d(qmfwin,N)
437 | 	#print "sum of diff of Fs:", np.sum(np.abs(Fs-Fso))
438 | 	#print "Fs.shape after hs2Fs: ", Fs.shape
439 | 	#Test synthesis impulse response of lowest filter:
440 | 	#y[:,30:]=np.zeros((10,1024-30))
441 | 	"""
442 | 	y=np.zeros((10,N))
443 | 	y[2:,19]=6.0*np.ones((8));
444 | 	y[2:,20]=-6.0*np.ones((8));
445 | 	"""
446 | 	plt.figure()
447 | 	plt.plot(y.T)
448 | 	#plt.imshow(y[:,0:40])
449 | 	plt.show(block=False)
450 | 
451 | 	print "y.shape= \n", y.shape
452 | 	xrek=np.zeros((10*N));
453 | 	for m in range(10):
454 | 	   xrek[m*N:(m+1)*N]=synthesisqmf_realtime(y[m,:],Fs,N)
455 | 	#clear plot figure:
456 | 	#plt.clf()
457 | 	plt.figure()
458 | 	plt.plot(xrek)
459 | 	plt.show()
460 | 	#Observe: Since QMF is non PR, a reconstructed pulse has small pre- and post-pulses!!! 
461 | 
462 | if __name__ == '__main__':
463 | 
464 |         import IOMethods as io
465 | 	import matplotlib.pyplot as plt
466 | 	import wave
467 | 	import struct
468 | 
469 | 	#qmfrt_example()
470 | 	
471 | 	#global N  #number of subbands
472 |    	#global overlap 
473 | 	qmfwin=np.loadtxt('qmf1024_8x.mat');
474 | 	qmfwin=np.hstack((qmfwin,np.flipud(qmfwin)))
475 | 	plt.plot(qmfwin)
476 | 	plt.show(block=False)
477 | 	plt.figure()
478 | 	#Analysis Folding matrix:
479 | 	Fa=ha2Fa3d_fast(qmfwin,N)
480 | 	#Synthesis Folding matrix: 
481 | 	Fs=hs2Fs3d_fast(qmfwin,N)
482 | 
483 | 	#Open sound file to read:
484 |    	wf=wave.open('mixed.wav','rb');
485 |    	nchan=wf.getnchannels();
486 |    	bytes=wf.getsampwidth();
487 |    	rate=wf.getframerate();
488 |    	length=wf.getnframes();
489 |    	print("Number of channels: ", nchan);
490 |    	print("Number of bytes per sample:", bytes);
491 |    	print("Sampling rate: ", rate);
492 |    	print("Number of samples:", length);
493 | 
494 | 	#open sound file to write the reconstruced sound:
495 | 	wfw = wave.open('mixedrek.wav', 'wb')
496 |    	wfw.setnchannels(1)
497 |    	wfw.setsampwidth(2)
498 |    	wfw.setframerate(rate)
499 | 
500 | 	print "Start QMF:"
501 | 	#Process the audio signal block-wise (N samples) from file and into file:
502 | 	for m in range(length/N):
503 | 	   print "Block number: ", m
504 | 	   #Analysis:
505 | 	   data=wf.readframes(N);
506 | 	   x = (struct.unpack( 'h' * N, data ));
507 | 	   y=analysisqmf_realtime(x,Fa,N)
508 | 	   plt.plot(y)
509 | 	   plt.show(block=False)
510 | 
511 | 	   #Synthesis:
512 | 	   xrek=synthesisqmf_realtime(y,Fs,N)
513 | 	   #write with 2 Bytes per sample:
514 | 	   data=struct.pack( 'h' * N, *xrek )
515 | 	   wfw.writeframes(data)
516 | 	wf.close()   	
517 | 	wfw.close()
518 | 	
519 | 
520 | 


--------------------------------------------------------------------------------
/QMF/old_vers/qmf_realtime_analysis.py:
--------------------------------------------------------------------------------
  1 | #This is a library that implements a QMF filter bank, including a realt time implelentation.
  2 | #The "main" is an example which reads in an audio .wav file, block-wise, computes the QMF subbands of the 
  3 | #analysis filter bank, then writes them to a python "pickle" file.
  4 | #As such, it can read and process arbitrary long audio files, without memory problems!
  5 | #It has 2 arguments, the input wav file, and the output file, which contains the subband values after 
  6 | #the analysis filter bank
  7 | #example usage: 
  8 | #             python mixed.wav mixed_y.bin
  9 | #It loads the qmf coefficient file 'qmf1024_8x.mat'
 10 | #
 11 | #Implements a real time Pseudo Quandrature Mirror Filter Bank
 12 | #"real time" means: it reads a block of N samples in and generates a block of N spectral samples from the QMF.
 13 | #The QMF implementaion uses internal memory to accomodate the window overlap
 14 | #Gerald Schuller, gerald.schuller@tu-ilmenau.de, January 2016
 15 | 
 16 | import numpy as np
 17 | import scipy.fftpack as spfft
 18 | 
 19 | N=1024  #number of subbands of the QMF filter bank
 20 | #N=64
 21 | #internal memory for the input blocks, for 8 times overlap:
 22 | overlap=8;
 23 | #overlap=10;
 24 | blockmemory=np.zeros((overlap,N))
 25 | blockmemorysyn=np.zeros((overlap,N))
 26 | 
 27 | def ha2Pa3d(ha,N):
 28 | 	#usage: Pa=ha2Pa3d(ha,N);
 29 | 	#produces the analysis polyphase matrix Pa
 30 | 	#in 3D matrix representation
 31 | 	#from a basband filter ha with
 32 | 	#a cosine modulation
 33 | 	#N: Blocklength
 34 | 	#Gerald Schuller
 35 | 	#shl@idmt.fhg.de
 36 | 	#Dec-2-15
 37 | 
 38 | 	import numpy as np
 39 | 
 40 | 	L=len(ha);
 41 | 
 42 | 	blocks=int(np.ceil(L/N));
 43 |         #print(blocks)
 44 | 
 45 | 	Pa=np.zeros((N,N,blocks));
 46 | 
 47 | 	for k in range(N): #subband
 48 | 	  for m in range(blocks):  #m: block number 
 49 | 	    for nphase in range(N): #nphase: Phase 
 50 | 	      n=m*N+nphase;
 51 | 	      #indexing like impulse response, phase index is reversed (N-np):
 52 | 	      Pa[N-1-nphase,k,m]=ha[n]*np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(blocks*N-1-n-N/2.0+0.5)); 
 53 | 
 54 | 	return Pa
 55 | 
 56 | def hs2Ps3d(hs,N):
 57 | 	#usage: Ps=hs2Ps3d(hs,N);
 58 | 	#produces the synthesis polyphase matrix Ps
 59 | 	#in 3D matrix representation
 60 | 	#from a basband filter hs with
 61 | 	#a cosine modulation
 62 | 	#N: Blocklength
 63 | 	#Gerald Schuller
 64 | 	#shl@idmt.fhg.de
 65 | 	#Dec-2-15
 66 | 
 67 | 	import numpy as np
 68 | 
 69 | 	L=len(hs);
 70 | 
 71 | 	blocks=int(np.ceil(L/N));
 72 |         #print(blocks)
 73 | 
 74 | 	Ps=np.zeros((N,N,blocks));
 75 | 
 76 | 	for k in range(N): #subband
 77 | 	  for m in range(blocks):  #m: block number 
 78 | 	    for nphase in range(N): #nphase: Phase 
 79 | 	      n=m*N+nphase;
 80 | 	      #synthesis:
 81 | 	      Ps[k,nphase,m]=hs[n]*np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(n-N/2.0+0.5)); 
 82 | 
 83 | 	return Ps
 84 | 
 85 | 
 86 | def ha2Fa3d(ha,N):
 87 | 	#usage: Fa=ha2Fa3d(ha,N);
 88 | 	#produces the analysis polyphase folding matrix Fa with all polyphase components
 89 | 	#in 3D matrix representation
 90 | 	#from a basband filter ha with
 91 | 	#a cosine modulation
 92 | 	#N: Blocklength
 93 | 	#Gerald Schuller
 94 | 	#shl@idmt.fhg.de
 95 | 	#Dec-2-15
 96 | 	print "ha2Pa3d:"
 97 | 	Pa=ha2Pa3d(ha,N);
 98 | 	print "polmatmult DCT:"
 99 | 	Fa=polmatmult(Pa,DCToMatrix(N))
100 |         #round zeroth polyphase component to 7 decimals after point:
101 |         Fa=np.around(Fa,8)
102 | 
103 | 	return Fa
104 | 
105 | def ha2Fa3d_fast(qmfwin,N):
106 | 	#usage: Fa=ha2Fa3d_fast(ha,N);
107 | 	#produces the analysis polyphase folding matrix Fa with all polyphase components
108 | 	#in 3D matrix representation
109 | 	#from a basband filter ha with
110 | 	#a cosine modulation
111 | 	#N: Blocklength
112 | 	#using a fast implementation (important for large N)
113 | 	#See my book chapter about "Filter Banks", cosine modulated filter banks.
114 | 	#Gerald Schuller
115 | 	#shl@idmt.fhg.de
116 | 	#Jan-23-16
117 | 
118 | 	Fa=np.zeros((N,N,overlap))
119 |  	for m in range(overlap/2):
120 | 	   Fa[:,:,2*m]+=np.fliplr(np.diag(np.flipud(-qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=-N/2))
121 | 	   Fa[:,:,2*m]+=(np.diag(np.flipud(qmfwin[m*2*N+N/2:(m*2*N+N)]*((-1)**m)),k=N/2))
122 | 	   Fa[:,:,2*m+1]+=(np.diag(np.flipud(qmfwin[m*2*N+N:(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
123 | 	   Fa[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(qmfwin[m*2*N+1.5*N:(m*2*N+2*N)]*((-1)**m)),k=N/2))
124 | 	   #print -qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)
125 | 	return Fa
126 | 
127 | def hs2Fs3d_fast(qmfwin,N):
128 | 	#usage: Fs=hs2Fs3d_fast(hs,N);
129 | 	#produces the synthesis polyphase folding matrix Fs with all polyphase components
130 | 	#in 3D matrix representation
131 | 	#from a basband filter ha with
132 | 	#a cosine modulation
133 | 	#N: Blocklength
134 | 	#Fast implementation
135 | 	#Gerald Schuller
136 | 	#shl@idmt.fhg.de
137 | 	#Jan-23-15
138 | 
139 | 	#Fa=ha2Fa3d_fast(hs,N)
140 | 	#print "Fa.shape in hs2Fs : ", Fa.shape
141 | 	#Transpose first two dimensions to obtain synthesis folding matrix:
142 | 	#Fs=np.transpose(Fa, (1, 0, 2))
143 | 	global overlap
144 | 	Fs=np.zeros((N,N,overlap))
145 |  	for m in range(overlap/2):
146 | 	   Fs[:,:,2*m]+=np.fliplr(np.diag(np.flipud(qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=N/2))
147 | 	   Fs[:,:,2*m]+=(np.diag((qmfwin[m*2*N+N/2:(m*2*N+N)]*((-1)**m)),k=N/2))
148 | 	   Fs[:,:,2*m+1]+=(np.diag((qmfwin[m*2*N+N:(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
149 | 	   Fs[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(-qmfwin[m*2*N+1.5*N:(m*2*N+2*N)]*((-1)**m)),k=-N/2))
150 | 	#print "Fs.shape in hs2Fs : ", Fs.shape
151 | 	#avoid sign change after reconstruction:
152 | 	return -Fs
153 | 
154 | def hs2Fs3d(hs,N):
155 | 	#usage: Fs=hs2Fs3d(hs,N);
156 | 	#produces the synthesis polyphase folding matrix Fs with all polyphase components
157 | 	#in 3D matrix representation
158 | 	#from a basband filter ha with
159 | 	#a cosine modulation
160 | 	#N: Blocklength
161 | 	#Gerald Schuller
162 | 	#shl@idmt.fhg.de
163 | 	#Dec-2-15
164 | 
165 | 	Ps=hs2Ps3d(hs,N);
166 | 	Fs=polmatmult(DCToMatrix(N),Ps)
167 |         #round zeroth polyphase component to 7 decimals after point:
168 |         Fs=np.around(Fs,8)
169 | 
170 | 	return Fs
171 | 
172 | def DCToMatrix(N):
173 | 	#produces an odd DCT matrix with size NxN
174 | 	#Gerald Schuller, Dec. 2015
175 | 
176 | 	import numpy as np
177 | 
178 | 	y=np.zeros((N,N,1));
179 | 
180 | 	for n in range(N):
181 | 	   for k in range(N):
182 | 	      y[n,k,0]=np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(n+0.5));
183 | 	      #y(n,k)=cos(pi/N*(k-0.5)*(n-1));
184 | 	return y   
185 | 
186 | def polmatmult(A,B):
187 | 	#function C=polmatmult(A,B)
188 | 	#multiplies 2 polynomial matrices A and B, where each matrix entry is a polynomial, e.g. in z^-1.
189 | 	#Those polynomial entries are in the 3rd dimension 
190 | 	#The third dimension can also be interpreted as containing the (2D) coefficient matrices for each 
191 | 	#exponent of z^-1. 
192 | 	#Result is C=A*B;
193 | 
194 | 	import numpy as np
195 | 	from scipy import sparse
196 | 
197 | 	[NAx,NAy,NAz]=A.shape;
198 | 	[NBx,NBy,NBz]=B.shape;
199 | 
200 | 	#Degree +1 of resulting polynomial, with NAz-1 and NBz-1 beeing the degree of the input  polynomials:
201 | 	Deg=NAz+NBz-1;
202 | 
203 | 	C=np.zeros((NAx,NBy,Deg));
204 | 
205 | 	for n in range(Deg):
206 | 	  for m in range(n+1):
207 | 	    if ((n-m)<NAz and m<NBz):
208 | 	      C[:,:,n]=C[:,:,n]+ A[:,:,(n-m)].dot(B[:,:,m])
209 | 	      #sparse version:
210 | 	      #C[:,:,n]=C[:,:,n]+ (sparse.csr_matrix(A[:,:,(n-m)]).dot(sparse.csr_matrix(B[:,:,m]))).todense()
211 | 	return C
212 | 
213 | #The DCT4 transform:
214 | def DCT4(samples):
215 |    #use a DCT3 to implement a DCT4:
216 |    samplesup=np.zeros(2*N)
217 |    #upsample signal:
218 |    samplesup[1::2]=samples
219 |    y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
220 |    return y[0:N]
221 | 
222 | def x2polyphase(x,N):
223 | 	#xp=x2polyphase(x);
224 | 	#Converts input signal x (a row vector) into a polphase row vector
225 | 	#For blocks of length N
226 | 	#For 3D polyphase representation (exponents of z in the third matrix/tensor dimension)
227 | 	#Gerald Schuller
228 | 	#shl@idmt.fhg.de
229 | 	#Dec-2-2015
230 | 
231 | 	#Number of blocks in the signal:
232 | 	L=int(np.floor(len(x)/N))
233 | 
234 | 	xp=np.zeros((1,N,L));
235 | 
236 | 	for m in range(L):
237 | 	  xp[0,:,m]=x[m*N: (m*N+N)];
238 | 	
239 | 	return xp
240 | 
241 | def polyphase2x(xp):
242 | 	#xp=polyphase2x(xp,N);
243 | 	#Converts polyphase input signal xp (a row vector) into a contiguous row vector
244 | 	#For blocks of length N
245 | 	#For 3D polyphase representation (exponents of z in the third matrix/tensor dimension)
246 | 	#Gerald Schuller
247 | 	#shl@idmt.fhg.de
248 | 	#Aug-24-11
249 | 
250 | 	#Number of blocks in the signal:
251 | 	[r,N,b]=xp.shape;
252 | 	L=b;
253 | 
254 | 	x=np.zeros(N*L);
255 | 
256 | 	for m in range(L):
257 | 	  #print x[(m*N):((m+1)*N)].shape, xp[0,:,m].shape
258 | 	  x[(m*N):(m+1)*N]=xp[0,:,m];
259 | 
260 | 	return x
261 | 
262 | def analysisqmf(x,qmfwin,N):
263 | 	#QMF Analysis filter bank
264 | 	#usage: y=analysisqmf(x,qmfwin,N);
265 | 	#with x: input signal (row vector)
266 | 	#qmfwin: qmf window
267 | 	#N: number of subbands
268 | 	#y: output, subband signals as a polphase vector,
269 | 	#each column is one subband signal,
270 | 	#the third dimension corresponds to the blocks of the signal
271 | 	#Gerald Schuller
272 | 	#shl@idmt.fhg.de
273 | 	#Dec 2015
274 | 
275 | 	#generation of input polyphase vector:
276 | 	xp=x2polyphase(x,N);
277 | 
278 | 	#Analysis polphase matrix:
279 | 	Fa=ha2Fa3d(qmfwin,N);
280 | 
281 | 	#Analysis filter bank:
282 | 	#Sparse mult. with folding mat:
283 | 	y=polmatmult(xp,Fa);
284 |         #Transform:
285 |         y=polmatmult(y,DCToMatrix(N))
286 | 	return y
287 | 
288 | def analysisqmf_realtime(xrt,Fa,N):
289 | 	#computes the QMF subband samples for each real time input block xrt. 
290 | 	#Conducts an implicit polynomial multiplication with folding matrix Fa of 
291 | 	#the polyphase matrix of the QMF filter bank, using
292 | 	#internal memory of the past input blocks.
293 | 
294 | 	#from scipy import sparse
295 |         global blockmemory
296 | 	global overlap
297 | 	#push down old blocks:
298 | 	blockmemory[0:(overlap-1),:]=blockmemory[1:(overlap),:]
299 | 	#write new block into top of memory stack:
300 | 	blockmemory[overlap-1,:]=xrt;
301 |     	y=np.zeros((1,N));
302 |         #print "Fa.shape =", Fa.shape
303 | 	for m in range(overlap):
304 |    	   y+=np.dot(np.array([blockmemory[overlap-1-m,:]]), Fa[:,:,m])
305 | 	   #y+= (sparse.csr_matrix(blockmemory[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
306 | 	#fast DCT4:
307 | 	y=DCT4(y)
308 | 	return y
309 | 
310 | def synthesisqmf_realtime(y,Fs,N):
311 | 	#computes the inverse QMF for each subband block y, 
312 | 	#conducts an implicit polynomial multiplication with synthesis folding matrix Fs 
313 | 	#of the synthesis polyphase matrix of the QMF filter bank, using
314 | 	#internal memory for future output blocks.
315 | 
316 | 	#from scipy import sparse
317 |         global blockmemorysyn
318 | 	global overlap
319 | 	#print "overlap= ", overlap
320 | 	#push down old blocks:
321 | 	blockmemorysyn[0:(overlap-1),:]=blockmemorysyn[1:(overlap),:]
322 | 	blockmemorysyn[overlap-1,:]=np.zeros((1,N)) #avoid leaving previous values in top of memory.
323 | 	#print "memory content synthesis: ", np.sum(np.abs(blockmemorysyn))
324 |         #print "Fs.shape =", Fs.shape
325 | 	#print "y.shape= ", y.shape
326 | 	#Overlap-add after fast (inverse) DCT4::
327 | 	for m in range(overlap):
328 |    	   blockmemorysyn[m,:]+=np.dot(DCT4(y), Fs[:,:,m])
329 | 	   #y+= (sparse.csr_matrix(blockmemory[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
330 |    	xrek=blockmemorysyn[0,:]
331 | 	
332 | 	
333 | 	return xrek
334 | 
335 | def synthesisqmf(y,qmfwin,N):
336 | 	#QMF Synthesis filter bank
337 | 	#usage: xrek=synthesisqmf(y,qmfwin,N);
338 | 	#with y: subband signals (polyphase row vector, 3d tensor in Matlab/Octave)
339 | 	#each column is one subband signal,
340 | 	#the third dimension corresponds to the blocks of the signal
341 | 	#qmfwin: qmf window
342 | 	#N: number of subbands
343 | 	#xrek: output, rekonstructed signal 
344 | 	#Gerald Schuller
345 | 	#shl@idmt.fhg.de
346 | 	#Dec. 2015
347 | 
348 | 	#generation of input polyphase vector:
349 | 
350 | 	#Analysis polphase matrix:
351 | 	Fs=hs2Fs3d(qmfwin,N);
352 | 
353 | 	#Analysis filter bank:
354 | 	#Inverse transform:
355 | 	xrekp=polmatmult(y,DCToMatrix(N))
356 |         #print xrekp.shape
357 | 	#Sparse mult with folding mat.:
358 | 	xrekp=polmatmult(xrekp,Fs);
359 | 	#print xrekp.shape
360 | 
361 | 	#polyphase to contiguous vector:
362 | 	xrek=polyphase2x(xrekp);
363 | 	return xrek
364 | 
365 | def reset_rt():
366 | 
367 | 	global blockmemorysyn
368 | 	global blockmemory
369 | 	global overlap
370 | 
371 | 	blockmemory=np.zeros((overlap,N))
372 | 	blockmemorysyn=np.zeros((overlap,N))
373 | 	print('Block Memories Resetted')
374 | 
375 | 	return None
376 | 
377 | def qmfrt_example():
378 | 	from scipy import sparse
379 | 	import matplotlib.pyplot as plt
380 |         import time
381 | 
382 | 	#QMF Test:
383 | 	global N  #number of subbands
384 |    	global overlap 
385 | 	qmfwin=np.loadtxt('qmf1024_8x.mat');
386 | 	qmfwin=np.hstack((qmfwin,np.flipud(qmfwin)))
387 | 	#qmfwin=np.loadtxt('qmf.dat');
388 | 	print "qmfwin.shape= ", qmfwin.shape
389 |         
390 | 	plt.plot(qmfwin)
391 | 	plt.show(block=False)
392 | 	#Testing Analysis:
393 | 	#Input signal (can be live from the sound card):
394 | 	#x=np.random.random((10*N))-0.5;
395 | 	x=np.sin(2*np.pi/N*10*np.arange(10*N))
396 | 	x=np.zeros((10*N))
397 | 	x[10]=1
398 | 	#initialize subband matrix:
399 | 	y=np.zeros((10,N));
400 |        
401 | 	start=time.time()
402 | 	#Analysis polphase matrix:
403 | 	print "Compute analysis Folding matrix:"
404 | 	#Fao=ha2Fa3d(qmfwin,N);
405 | 	Fa=ha2Fa3d_fast(qmfwin,N) 
406 | 	time1=time.time()
407 |         print "Time for Fa: ", time1-start
408 | 	#mult vector with full matrix:
409 | 	xtest=np.random.rand(N);
410 | 	startmultfull=time.time()
411 | 	testout=np.dot(xtest,Fa[:,:,4])
412 | 	endtime=time.time()
413 | 	print "time for full mult: ", endtime-startmultfull
414 | 
415 | 	sparsemat=sparse.csr_matrix(Fa[:,:,4])
416 | 	sparsex=sparse.csr_matrix(xtest)
417 | 	startmultsparse=time.time()
418 | 	testout=sparsex.dot(sparsemat)
419 | 	endtime=time.time()
420 | 	print "time for sparse mult: ", endtime-startmultsparse	
421 | 	
422 | 	#plt.figure()
423 | 	#plt.imshow(np.sum(np.abs(Fa), axis=2))
424 | 	#plt.show(block=False)
425 |   	print "Fa.shape= ", Fa.shape	   
426 | 	#print "Fa=", Fa[N-1,N/2-1,:]
427 |      	#print "sum of diff:", np.sum(np.abs(Fa-Fao))
428 |         #print "Fa.shape =", Fa.shape
429 |         
430 | 	print "Start QMF:"
431 | 	for m in range(10):
432 | 	   y[m,:]=analysisqmf_realtime(x[m*N:(m+1)*N],Fa,N)
433 | 	time2=time.time()
434 | 	print "Time for filtering: ", time2-time1
435 | 	
436 |         print "y.shape= \n", y.shape
437 | 
438 |   	#xrek=synthesisqmf(y,qmfwin,N)
439 |   	plt.figure()
440 | 	#plt.plot(y.T)
441 | 	plt.imshow(np.abs(y[:,0:300]))
442 | 	plt.show(block=False)
443 | 
444 | 	#Testing synthesis
445 | 	#Synthesis folding matrix:
446 | 	Fs=hs2Fs3d_fast(qmfwin,N)
447 | 	#plt.figure()
448 | 	#plt.imshow(np.sum(np.abs(Fa), axis=2))
449 | 	#plt.show(block=False)
450 | 	#Fso=hs2Fs3d(qmfwin,N)
451 | 	#print "sum of diff of Fs:", np.sum(np.abs(Fs-Fso))
452 | 	#print "Fs.shape after hs2Fs: ", Fs.shape
453 | 	#Test synthesis impulse response of lowest filter:
454 | 	#y[:,30:]=np.zeros((10,1024-30))
455 | 	"""
456 | 	y=np.zeros((10,N))
457 | 	y[2:,19]=6.0*np.ones((8));
458 | 	y[2:,20]=-6.0*np.ones((8));
459 | 	"""
460 | 	plt.figure()
461 | 	plt.plot(y.T)
462 | 	#plt.imshow(y[:,0:40])
463 | 	plt.show(block=False)
464 | 
465 | 	print "y.shape= \n", y.shape
466 | 	xrek=np.zeros((10*N));
467 | 	for m in range(10):
468 | 	   xrek[m*N:(m+1)*N]=synthesisqmf_realtime(y[m,:],Fs,N)
469 | 	#clear plot figure:
470 | 	#plt.clf()
471 | 	plt.figure()
472 | 	plt.plot(xrek)
473 | 	plt.show()
474 | 	#Observe: Since QMF is non PR, a reconstructed pulse has small pre- and post-pulses!!! 
475 | 
476 | if __name__ == '__main__':
477 | 
478 |         import IOMethods as io
479 | 	import matplotlib.pyplot as plt
480 | 	import wave
481 | 	import struct
482 | 	import cPickle as pickle
483 | 	import sys
484 | 
485 | 	#qmfrt_example()
486 | 	
487 | 	#global N  #number of subbands
488 |    	#global overlap 
489 | 	if len(sys.argv) < 3:
490 | 	    print("Need 2 arguments.\n\nUsage: %s infile.wav outfile.bin" % sys.argv[0])
491 |  	    sys.exit(-1)
492 | 
493 | 	qmfwin=np.loadtxt('qmf1024_8x.mat');
494 | 	qmfwin=np.hstack((qmfwin,np.flipud(qmfwin)))
495 | 	plt.plot(qmfwin)
496 | 	plt.show(block=False)
497 | 	#Analysis Folding matrix:
498 | 	Fa=ha2Fa3d_fast(qmfwin,N)
499 | 
500 | 	#Open sound file to read:
501 |    	wf=wave.open(sys.argv[1],'rb');
502 |    	nchan=wf.getnchannels();
503 |    	bytes=wf.getsampwidth();
504 |    	rate=wf.getframerate();
505 |    	length=wf.getnframes();
506 |    	print("Number of channels: ", nchan);
507 |    	print("Number of bytes per sample:", bytes);
508 |    	print("Sampling rate: ", rate);
509 |    	print("Number of samples:", length);
510 | 
511 | 	#open pickle file to write the subband samples:
512 | 	wfw=open(sys.argv[2], 'w')
513 | 
514 | 	print "Start QMF:"
515 | 	#Process the audio signal block-wise (N samples) from file and into file:
516 | 	for m in range(length/N):
517 | 	   print "Block number: ", m
518 | 	   #Analysis:
519 | 	   data=wf.readframes(N);
520 | 	   x = (struct.unpack( 'h' * N, data ));
521 | 	   y=analysisqmf_realtime(x,Fa,N)
522 | 	   plt.plot(y)
523 | 	   plt.show(block=False)
524 | 	   pickle.dump(y,wfw)
525 | 
526 | 	wf.close()   	
527 | 	wfw.close()
528 | 	
529 | 
530 | 


--------------------------------------------------------------------------------
/QMF/old_vers/qmf_realtime_sinmod.py:
--------------------------------------------------------------------------------
  1 | #This is a library that implements a QMF filter bank, including a real time implelentation.
  2 | #It also includes a sine modulated QMF filter bank.
  3 | #The "main" is an example, which reads in an audio .wav file, block-wise, computes the QMF subbands, 
  4 | #then reconstructs the audio signal from the subbands, and writes them, block wise, into another audio .wav file. 
  5 | #As such, it can read and process arbitrary long audio files, without memory problems!
  6 | #example call from a terminal window: python qmf_realtime.py
  7 | #It loads the qmf coefficient file 'qmf1024_8x.mat'
  8 | # 
  9 | #Implements a real time Pseudo Quandrature Mirror Filter Bank
 10 | #"real time" means: it reads a block of N samples in and generates a block of N spectral samples from the QMF.
 11 | #The QMF implementaion uses internal memory to accomodate the window overlap
 12 | #Gerald Schuller, gerald.schuller@tu-ilmenau.de, January 2016
 13 | 
 14 | import numpy as np
 15 | import scipy.fftpack as spfft
 16 | 
 17 | N=1024  #number of subbands of the QMF filter bank
 18 | #N=64
 19 | #internal memory for the input blocks, for 8 times overlap:
 20 | overlap=8;
 21 | #overlap=10;
 22 | blockmemory=np.zeros((overlap,N))
 23 | blockmemorysyn=np.zeros((overlap,N))
 24 | 
 25 | def ha2Pa3d(ha,N):
 26 | 	#usage: Pa=ha2Pa3d(ha,N);
 27 | 	#produces the analysis polyphase matrix Pa
 28 | 	#in 3D matrix representation
 29 | 	#from a basband filter ha with
 30 | 	#a cosine modulation
 31 | 	#N: Blocklength
 32 | 	#Gerald Schuller
 33 | 	#shl@idmt.fhg.de
 34 | 	#Dec-2-15
 35 | 
 36 | 	import numpy as np
 37 | 
 38 | 	L=len(ha);
 39 | 
 40 | 	blocks=int(np.ceil(L/N));
 41 |         #print(blocks)
 42 | 
 43 | 	Pa=np.zeros((N,N,blocks));
 44 | 
 45 | 	for k in range(N): #subband
 46 | 	  for m in range(blocks):  #m: block number 
 47 | 	    for nphase in range(N): #nphase: Phase 
 48 | 	      n=m*N+nphase;
 49 | 	      #indexing like impulse response, phase index is reversed (N-np):
 50 | 	      Pa[N-1-nphase,k,m]=ha[n]*np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(blocks*N-1-n-N/2.0+0.5)); 
 51 | 
 52 | 	return Pa
 53 | 
 54 | def hs2Ps3d(hs,N):
 55 | 	#usage: Ps=hs2Ps3d(hs,N);
 56 | 	#produces the synthesis polyphase matrix Ps
 57 | 	#in 3D matrix representation
 58 | 	#from a basband filter hs with
 59 | 	#a cosine modulation
 60 | 	#N: Blocklength
 61 | 	#Gerald Schuller
 62 | 	#shl@idmt.fhg.de
 63 | 	#Dec-2-15
 64 | 
 65 | 	import numpy as np
 66 | 
 67 | 	L=len(hs);
 68 | 
 69 | 	blocks=int(np.ceil(L/N));
 70 |         #print(blocks)
 71 | 
 72 | 	Ps=np.zeros((N,N,blocks));
 73 | 
 74 | 	for k in range(N): #subband
 75 | 	  for m in range(blocks):  #m: block number 
 76 | 	    for nphase in range(N): #nphase: Phase 
 77 | 	      n=m*N+nphase;
 78 | 	      #synthesis:
 79 | 	      Ps[k,nphase,m]=hs[n]*np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(n-N/2.0+0.5)); 
 80 | 
 81 | 	return Ps
 82 | 
 83 | 
 84 | def ha2Fa3d(ha,N):
 85 | 	#usage: Fa=ha2Fa3d(ha,N);
 86 | 	#produces the analysis polyphase folding matrix Fa with all polyphase components
 87 | 	#in 3D matrix representation
 88 | 	#from a basband filter ha with
 89 | 	#a cosine modulation
 90 | 	#N: Blocklength
 91 | 	#Gerald Schuller
 92 | 	#shl@idmt.fhg.de
 93 | 	#Dec-2-15
 94 | 	print "ha2Pa3d:"
 95 | 	Pa=ha2Pa3d(ha,N);
 96 | 	print "polmatmult DCT:"
 97 | 	Fa=polmatmult(Pa,DCToMatrix(N))
 98 |         #round zeroth polyphase component to 7 decimals after point:
 99 |         Fa=np.around(Fa,8)
100 | 
101 | 	return Fa
102 | 
103 | def ha2Fa3d_fast(qmfwin,N):
104 | 	#usage: Fa=ha2Fa3d_fast(ha,N);
105 | 	#produces the analysis polyphase folding matrix Fa with all polyphase components
106 | 	#in 3D matrix representation
107 | 	#from a basband filter ha with
108 | 	#a cosine modulation
109 | 	#N: Blocklength
110 | 	#using a fast implementation (important for large N)
111 | 	#See my book chapter about "Filter Banks", cosine modulated filter banks.
112 | 	#Gerald Schuller
113 | 	#shl@idmt.fhg.de
114 | 	#Jan-23-16
115 | 
116 | 	Fa=np.zeros((N,N,overlap))
117 |  	for m in range(overlap/2):
118 | 	   Fa[:,:,2*m]+=np.fliplr(np.diag(np.flipud(-qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=-N/2))
119 | 	   Fa[:,:,2*m]+=(np.diag(np.flipud(qmfwin[m*2*N+N/2:(m*2*N+N)]*((-1)**m)),k=N/2))
120 | 	   Fa[:,:,2*m+1]+=(np.diag(np.flipud(qmfwin[m*2*N+N:(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
121 | 	   Fa[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(qmfwin[m*2*N+1.5*N:(m*2*N+2*N)]*((-1)**m)),k=N/2))
122 | 	   #print -qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)
123 | 	return Fa
124 | 
125 | def ha2Fa3d_sinmod_fast(qmfwin,N):
126 | 	#usage: Fa=ha2Fa3d_fast(ha,N);
127 | 	#produces the analysis polyphase folding matrix Fa with all polyphase components
128 | 	#in 3D matrix representation
129 | 	#from a basband filter ha with
130 | 	#a cosine modulation
131 | 	#N: Blocklength
132 | 	#using a fast implementation (important for large N)
133 | 	#See my book chapter about "Filter Banks", cosine modulated filter banks.
134 | 	#Gerald Schuller
135 | 	#shl@idmt.fhg.de
136 | 	#Jan-29-16
137 | 
138 | 	Fa=np.zeros((N,N,overlap))
139 |  	for m in range(overlap/2):
140 | 	   Fa[:,:,2*m]+=np.fliplr(np.diag(np.flipud(qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=-N/2))
141 | 	   Fa[:,:,2*m]+=(np.diag(np.flipud(qmfwin[m*2*N+N/2:(m*2*N+N)]*((-1)**m)),k=N/2))
142 | 	   Fa[:,:,2*m+1]+=(np.diag(np.flipud(qmfwin[m*2*N+N:(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
143 | 	   Fa[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(-qmfwin[m*2*N+1.5*N:(m*2*N+2*N)]*((-1)**m)),k=N/2))
144 | 	   #print -qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)
145 | 	return Fa
146 | 
147 | def hs2Fs3d_fast(qmfwin,N):
148 | 	#usage: Fs=hs2Fs3d_fast(hs,N);
149 | 	#produces the synthesis polyphase folding matrix Fs with all polyphase components
150 | 	#in 3D matrix representation
151 | 	#from a basband filter ha with
152 | 	#a cosine modulation
153 | 	#N: Blocklength
154 | 	#Fast implementation
155 | 	#Gerald Schuller
156 | 	#shl@idmt.fhg.de
157 | 	#Jan-23-15
158 | 
159 | 	#Fa=ha2Fa3d_fast(hs,N)
160 | 	#print "Fa.shape in hs2Fs : ", Fa.shape
161 | 	#Transpose first two dimensions to obtain synthesis folding matrix:
162 | 	#Fs=np.transpose(Fa, (1, 0, 2))
163 | 	global overlap
164 | 	Fs=np.zeros((N,N,overlap))
165 |  	for m in range(overlap/2):
166 | 	   Fs[:,:,2*m]+=np.fliplr(np.diag(np.flipud(qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=N/2))
167 | 	   Fs[:,:,2*m]+=(np.diag((qmfwin[m*2*N+N/2:(m*2*N+N)]*((-1)**m)),k=N/2))
168 | 	   Fs[:,:,2*m+1]+=(np.diag((qmfwin[m*2*N+N:(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
169 | 	   Fs[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(-qmfwin[m*2*N+1.5*N:(m*2*N+2*N)]*((-1)**m)),k=-N/2))
170 | 	#print "Fs.shape in hs2Fs : ", Fs.shape
171 | 	#avoid sign change after reconstruction:
172 | 	return -Fs
173 | 
174 | def hs2Fs3d_sinmod_fast(qmfwin,N):
175 | 	#usage: Fs=hs2Fs3d_fast(hs,N);
176 | 	#produces the synthesis polyphase folding matrix Fs with all polyphase components
177 | 	#in 3D matrix representation
178 | 	#from a basband filter ha with
179 | 	#a cosine modulation
180 | 	#N: Blocklength
181 | 	#Fast implementation
182 | 	#Gerald Schuller
183 | 	#shl@idmt.fhg.de
184 | 	#Jan-29-15
185 | 
186 | 	#Fa=ha2Fa3d_fast(hs,N)
187 | 	#print "Fa.shape in hs2Fs : ", Fa.shape
188 | 	#Transpose first two dimensions to obtain synthesis folding matrix:
189 | 	#Fs=np.transpose(Fa, (1, 0, 2))
190 | 	global overlap
191 | 	Fs=np.zeros((N,N,overlap))
192 |  	for m in range(overlap/2):
193 | 	   Fs[:,:,2*m]+=np.fliplr(np.diag(np.flipud(-qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=N/2))
194 | 	   Fs[:,:,2*m]+=(np.diag((qmfwin[m*2*N+N/2:(m*2*N+N)]*((-1)**m)),k=N/2))
195 | 	   Fs[:,:,2*m+1]+=(np.diag((qmfwin[m*2*N+N:(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
196 | 	   Fs[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(qmfwin[m*2*N+1.5*N:(m*2*N+2*N)]*((-1)**m)),k=-N/2))
197 | 	#print "Fs.shape in hs2Fs : ", Fs.shape
198 | 	#avoid sign change after reconstruction:
199 | 	return -Fs
200 | 
201 | def hs2Fs3d(hs,N):
202 | 	#usage: Fs=hs2Fs3d(hs,N);
203 | 	#produces the synthesis polyphase folding matrix Fs with all polyphase components
204 | 	#in 3D matrix representation
205 | 	#from a basband filter ha with
206 | 	#a cosine modulation
207 | 	#N: Blocklength
208 | 	#Gerald Schuller
209 | 	#shl@idmt.fhg.de
210 | 	#Dec-2-15
211 | 
212 | 	Ps=hs2Ps3d(hs,N);
213 | 	Fs=polmatmult(DCToMatrix(N),Ps)
214 |         #round zeroth polyphase component to 7 decimals after point:
215 |         Fs=np.around(Fs,8)
216 | 
217 | 	return Fs
218 | 
219 | def DCToMatrix(N):
220 | 	#produces an odd DCT matrix with size NxN
221 | 	#Gerald Schuller, Dec. 2015
222 | 
223 | 	import numpy as np
224 | 
225 | 	y=np.zeros((N,N,1));
226 | 
227 | 	for n in range(N):
228 | 	   for k in range(N):
229 | 	      y[n,k,0]=np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(n+0.5));
230 | 	      #y(n,k)=cos(pi/N*(k-0.5)*(n-1));
231 | 	return y   
232 | 
233 | def DSToMatrix(N):
234 | 	#produces an odd DST matrix with size NxN
235 | 	#Gerald Schuller, Jan. 2016
236 | 
237 | 	import numpy as np
238 | 
239 | 	y=np.zeros((N,N,1));
240 | 
241 | 	for n in range(N):
242 | 	   for k in range(N):
243 | 	      y[n,k,0]=np.sqrt(2.0/N)*np.sin(np.pi/N*(k+0.5)*(n+0.5));
244 | 	      #y(n,k)=cos(pi/N*(k-0.5)*(n-1));
245 | 	return y   
246 | 
247 | def polmatmult(A,B):
248 | 	#function C=polmatmult(A,B)
249 | 	#multiplies 2 polynomial matrices A and B, where each matrix entry is a polynomial, e.g. in z^-1.
250 | 	#Those polynomial entries are in the 3rd dimension 
251 | 	#The third dimension can also be interpreted as containing the (2D) coefficient matrices for each 
252 | 	#exponent of z^-1. 
253 | 	#Result is C=A*B;
254 | 
255 | 	import numpy as np
256 | 	from scipy import sparse
257 | 
258 | 	[NAx,NAy,NAz]=A.shape;
259 | 	[NBx,NBy,NBz]=B.shape;
260 | 
261 | 	#Degree +1 of resulting polynomial, with NAz-1 and NBz-1 beeing the degree of the input  polynomials:
262 | 	Deg=NAz+NBz-1;
263 | 
264 | 	C=np.zeros((NAx,NBy,Deg));
265 | 
266 | 	for n in range(Deg):
267 | 	  for m in range(n+1):
268 | 	    if ((n-m)<NAz and m<NBz):
269 | 	      C[:,:,n]=C[:,:,n]+ A[:,:,(n-m)].dot(B[:,:,m])
270 | 	      #sparse version:
271 | 	      #C[:,:,n]=C[:,:,n]+ (sparse.csr_matrix(A[:,:,(n-m)]).dot(sparse.csr_matrix(B[:,:,m]))).todense()
272 | 	return C
273 | 
274 | #The DCT4 transform:
275 | def DCT4(samples):
276 |    #use a DCT3 to implement a DCT4:
277 |    samplesup=np.zeros(2*N)
278 |    #upsample signal:
279 |    samplesup[1::2]=samples
280 |    y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
281 |    return y[0:N]
282 | 
283 | #The DST4 transform:
284 | def DST4(samples):
285 |    #use a DCT3 to implement a DCT4:
286 |    samplesup=np.zeros(2*N)
287 |    #upsample signal:
288 |    #reverse order to obtain DST4 out of DCT4:
289 |    #samplesup[1::2]=np.flipud(samples)
290 |    samplesup[0::2]=samples
291 |    y=spfft.dst(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
292 | 
293 |    #flip sign of every 2nd subband to obtain DST4 out of DCT4
294 |    #y=(y[0:N])*(((-1)*np.ones(N))**range(N))
295 |    return y[0:N]
296 | 
297 | 
298 | def x2polyphase(x,N):
299 | 	#xp=x2polyphase(x);
300 | 	#Converts input signal x (a row vector) into a polphase row vector
301 | 	#For blocks of length N
302 | 	#For 3D polyphase representation (exponents of z in the third matrix/tensor dimension)
303 | 	#Gerald Schuller
304 | 	#shl@idmt.fhg.de
305 | 	#Dec-2-2015
306 | 
307 | 	#Number of blocks in the signal:
308 | 	L=int(np.floor(len(x)/N))
309 | 
310 | 	xp=np.zeros((1,N,L));
311 | 
312 | 	for m in range(L):
313 | 	  xp[0,:,m]=x[m*N: (m*N+N)];
314 | 	
315 | 	return xp
316 | 
317 | def polyphase2x(xp):
318 | 	#xp=polyphase2x(xp,N);
319 | 	#Converts polyphase input signal xp (a row vector) into a contiguous row vector
320 | 	#For blocks of length N
321 | 	#For 3D polyphase representation (exponents of z in the third matrix/tensor dimension)
322 | 	#Gerald Schuller
323 | 	#shl@idmt.fhg.de
324 | 	#Aug-24-11
325 | 
326 | 	#Number of blocks in the signal:
327 | 	[r,N,b]=xp.shape;
328 | 	L=b;
329 | 
330 | 	x=np.zeros(N*L);
331 | 
332 | 	for m in range(L):
333 | 	  #print x[(m*N):((m+1)*N)].shape, xp[0,:,m].shape
334 | 	  x[(m*N):(m+1)*N]=xp[0,:,m];
335 | 
336 | 	return x
337 | 
338 | def analysisqmf(x,qmfwin,N):
339 | 	#QMF Analysis filter bank
340 | 	#usage: y=analysisqmf(x,qmfwin,N);
341 | 	#with x: input signal (row vector)
342 | 	#qmfwin: qmf window
343 | 	#N: number of subbands
344 | 	#y: output, subband signals as a polphase vector,
345 | 	#each column is one subband signal,
346 | 	#the third dimension corresponds to the blocks of the signal
347 | 	#Gerald Schuller
348 | 	#shl@idmt.fhg.de
349 | 	#Dec 2015
350 | 
351 | 	#generation of input polyphase vector:
352 | 	xp=x2polyphase(x,N);
353 | 
354 | 	#Analysis polphase matrix:
355 | 	Fa=ha2Fa3d(qmfwin,N);
356 | 
357 | 	#Analysis filter bank:
358 | 	#Sparse mult. with folding mat:
359 | 	y=polmatmult(xp,Fa);
360 |         #Transform:
361 |         y=polmatmult(y,DCToMatrix(N))
362 | 	return y
363 | 
364 | def analysisqmf_realtime(xrt,Fa,N):
365 | 	#computes the QMF subband samples for each real time input block xrt. 
366 | 	#Conducts an implicit polynomial multiplication with folding matrix Fa of 
367 | 	#the polyphase matrix of the QMF filter bank, using
368 | 	#internal memory of the past input blocks.
369 | 
370 | 	#from scipy import sparse
371 |         global blockmemory
372 | 	global overlap
373 | 	#push down old blocks:
374 | 	blockmemory[0:(overlap-1),:]=blockmemory[1:(overlap),:]
375 | 	#write new block into top of memory stack:
376 | 	blockmemory[overlap-1,:]=xrt;
377 |     	y=np.zeros((1,N));
378 |         #print "Fa.shape =", Fa.shape
379 | 	for m in range(overlap):
380 |    	   y+=np.dot(np.array([blockmemory[overlap-1-m,:]]), Fa[:,:,m])
381 | 	   #y+= (sparse.csr_matrix(blockmemory[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
382 | 	#fast DCT4:
383 | 	y=DCT4(y)
384 | 	return y
385 | 
386 | def analysisqmf_sinmod_realtime(xrt,Fa,N):
387 | 	#computes the QMF subband samples for each real time input block xrt. 
388 | 	#Conducts an implicit polynomial multiplication with folding matrix Fa of 
389 | 	#the polyphase matrix of the QMF filter bank, using
390 | 	#internal memory of the past input blocks.
391 | 
392 | 	#from scipy import sparse
393 |         global blockmemory
394 | 	global overlap
395 | 	#push down old blocks:
396 | 	blockmemory[0:(overlap-1),:]=blockmemory[1:(overlap),:]
397 | 	#write new block into top of memory stack:
398 | 	blockmemory[overlap-1,:]=xrt;
399 |     	y=np.zeros((1,N));
400 |         #print "Fa.shape =", Fa.shape
401 | 	for m in range(overlap):
402 |    	   y+=np.dot(np.array([blockmemory[overlap-1-m,:]]), Fa[:,:,m])
403 | 	   #y+= (sparse.csr_matrix(blockmemory[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
404 | 	#fast DCT4:
405 | 	y=DST4(y)
406 | 	return y
407 | 
408 | def synthesisqmf_realtime(y,Fs,N):
409 | 	#computes the inverse QMF for each subband block y, 
410 | 	#conducts an implicit polynomial multiplication with synthesis folding matrix Fs 
411 | 	#of the synthesis polyphase matrix of the QMF filter bank, using
412 | 	#internal memory for future output blocks.
413 | 
414 | 	#from scipy import sparse
415 |         global blockmemorysyn
416 | 	global overlap
417 | 	#print "overlap= ", overlap
418 | 	#push down old blocks:
419 | 	blockmemorysyn[0:(overlap-1),:]=blockmemorysyn[1:(overlap),:]
420 | 	blockmemorysyn[overlap-1,:]=np.zeros((1,N)) #avoid leaving previous values in top of memory.
421 | 	#print "memory content synthesis: ", np.sum(np.abs(blockmemorysyn))
422 |         #print "Fs.shape =", Fs.shape
423 | 	#print "y.shape= ", y.shape
424 | 	#Overlap-add after fast (inverse) DCT4::
425 | 	for m in range(overlap):
426 |    	   blockmemorysyn[m,:]+=np.dot(DCT4(y), Fs[:,:,m])
427 | 	   #y+= (sparse.csr_matrix(blockmemory[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
428 |    	xrek=blockmemorysyn[0,:]
429 | 	
430 | 	
431 | 	return xrek
432 | 
433 | def synthesisqmf_sinmod_realtime(y,Fs,N):
434 | 	#computes the inverse QMF for each subband block y, 
435 | 	#conducts an implicit polynomial multiplication with synthesis folding matrix Fs 
436 | 	#of the synthesis polyphase matrix of the QMF filter bank, using
437 | 	#internal memory for future output blocks.
438 | 
439 | 	#from scipy import sparse
440 |         global blockmemorysyn
441 | 	global overlap
442 | 	#print "overlap= ", overlap
443 | 	#push down old blocks:
444 | 	blockmemorysyn[0:(overlap-1),:]=blockmemorysyn[1:(overlap),:]
445 | 	blockmemorysyn[overlap-1,:]=np.zeros((1,N)) #avoid leaving previous values in top of memory.
446 | 	#print "memory content synthesis: ", np.sum(np.abs(blockmemorysyn))
447 |         #print "Fs.shape =", Fs.shape
448 | 	#print "y.shape= ", y.shape
449 | 	#Overlap-add after fast (inverse) DCT4::
450 | 	for m in range(overlap):
451 |    	   blockmemorysyn[m,:]+=np.dot(DST4(y), Fs[:,:,m])
452 | 	   #y+= (sparse.csr_matrix(blockmemory[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
453 |    	xrek=blockmemorysyn[0,:]
454 | 	
455 | 	
456 | 	return xrek
457 | 
458 | 
459 | def synthesisqmf(y,qmfwin,N):
460 | 	#QMF Synthesis filter bank
461 | 	#usage: xrek=synthesisqmf(y,qmfwin,N);
462 | 	#with y: subband signals (polyphase row vector, 3d tensor in Matlab/Octave)
463 | 	#each column is one subband signal,
464 | 	#the third dimension corresponds to the blocks of the signal
465 | 	#qmfwin: qmf window
466 | 	#N: number of subbands
467 | 	#xrek: output, rekonstructed signal 
468 | 	#Gerald Schuller
469 | 	#shl@idmt.fhg.de
470 | 	#Dec. 2015
471 | 
472 | 	#generation of input polyphase vector:
473 | 
474 | 	#Analysis polphase matrix:
475 | 	Fs=hs2Fs3d(qmfwin,N);
476 | 
477 | 	#Analysis filter bank:
478 | 	#Inverse transform:
479 | 	xrekp=polmatmult(y,DCToMatrix(N))
480 |         #print xrekp.shape
481 | 	#Sparse mult with folding mat.:
482 | 	xrekp=polmatmult(xrekp,Fs);
483 | 	#print xrekp.shape
484 | 
485 | 	#polyphase to contiguous vector:
486 | 	xrek=polyphase2x(xrekp);
487 | 	return xrek
488 | 
489 | 
490 | def qmfrt_example():
491 | 	from scipy import sparse
492 | 	import matplotlib.pyplot as plt
493 |         import time
494 | 
495 | 	#QMF Test:
496 | 	global N  #number of subbands
497 |    	global overlap 
498 | 	"""
499 | 	xtest=np.arange(N);
500 |         ytest1=np.dot(xtest,DSToMatrix(N))
501 | 	print "output with DSTo:", ytest1
502 | 	ytest2=DST4(xtest)
503 | 	print "output with DST4:", ytest2
504 |  	"""
505 | 	qmfwin=np.loadtxt('qmf1024_8x.mat');
506 | 	qmfwin=np.hstack((qmfwin,np.flipud(qmfwin)))
507 | 	#qmfwin=np.loadtxt('qmf.dat');
508 | 	print "qmfwin.shape= ", qmfwin.shape
509 |         
510 | 	plt.plot(qmfwin)
511 | 	plt.show(block=False)
512 | 	#Testing Analysis:
513 | 	#Input signal (can be live from the sound card):
514 | 	#x=np.random.random((10*N))-0.5;
515 | 	x=np.sin(2*np.pi/N*10*np.arange(10*N))
516 | 	x=np.zeros((10*N))
517 | 	x[10]=1
518 | 	#initialize subband matrix:
519 | 	y=np.zeros((10,N));
520 |        
521 | 	start=time.time()
522 | 	#Analysis polphase matrix:
523 | 	print "Compute analysis Folding matrix:"
524 | 	#Fao=ha2Fa3d(qmfwin,N);
525 | 	Fa=ha2Fa3d_fast(qmfwin,N) 
526 | 	time1=time.time()
527 |         print "Time for Fa: ", time1-start
528 | 	#mult vector with full matrix:
529 | 	xtest=np.random.rand(N);
530 | 	startmultfull=time.time()
531 | 	testout=np.dot(xtest,Fa[:,:,4])
532 | 	endtime=time.time()
533 | 	print "time for full mult: ", endtime-startmultfull
534 | 
535 | 	sparsemat=sparse.csr_matrix(Fa[:,:,4])
536 | 	sparsex=sparse.csr_matrix(xtest)
537 | 	startmultsparse=time.time()
538 | 	testout=sparsex.dot(sparsemat)
539 | 	endtime=time.time()
540 | 	print "time for sparse mult: ", endtime-startmultsparse	
541 | 	
542 | 	#plt.figure()
543 | 	#plt.imshow(np.sum(np.abs(Fa), axis=2))
544 | 	#plt.show(block=False)
545 |   	print "Fa.shape= ", Fa.shape	   
546 | 	#print "Fa=", Fa[N-1,N/2-1,:]
547 |      	#print "sum of diff:", np.sum(np.abs(Fa-Fao))
548 |         #print "Fa.shape =", Fa.shape
549 |         
550 | 	print "Start QMF:"
551 | 	for m in range(10):
552 | 	   y[m,:]=analysisqmf_realtime(x[m*N:(m+1)*N],Fa,N)
553 | 	time2=time.time()
554 | 	print "Time for filtering: ", time2-time1
555 | 	
556 |         print "y.shape= \n", y.shape
557 | 
558 |   	#xrek=synthesisqmf(y,qmfwin,N)
559 |   	plt.figure()
560 | 	#plt.plot(y.T)
561 | 	plt.imshow(np.abs(y[:,0:300]))
562 | 	plt.show(block=False)
563 | 
564 | 	#Testing synthesis
565 | 	#Synthesis folding matrix:
566 | 	Fs=hs2Fs3d_fast(qmfwin,N)
567 | 	#plt.figure()
568 | 	#plt.imshow(np.sum(np.abs(Fa), axis=2))
569 | 	#plt.show(block=False)
570 | 	#Fso=hs2Fs3d(qmfwin,N)
571 | 	#print "sum of diff of Fs:", np.sum(np.abs(Fs-Fso))
572 | 	#print "Fs.shape after hs2Fs: ", Fs.shape
573 | 	#Test synthesis impulse response of lowest filter:
574 | 	#y[:,30:]=np.zeros((10,1024-30))
575 | 	
576 | 	#y=np.zeros((10,N))
577 | 	#y[2:,19]=6.0*np.ones((8));
578 | 	#y[2:,20]=-6.0*np.ones((8));
579 | 	#y[0,0]=1.0
580 | 	
581 | 	plt.figure()
582 | 	plt.plot(y.T)
583 | 	#plt.imshow(y[:,0:40])
584 | 	plt.show(block=False)
585 | 
586 | 	print "y.shape= \n", y.shape
587 | 	xrek=np.zeros((10*N));
588 | 	for m in range(10):
589 | 	   xrek[m*N:(m+1)*N]=synthesisqmf_realtime(y[m,:],Fs,N)
590 | 	#clear plot figure:
591 | 	#plt.clf()
592 | 	plt.figure()
593 | 	plt.plot(xrek)
594 | 	plt.show()
595 | 	#Observe: Since QMF is non PR, a reconstructed pulse has small pre- and post-pulses!!! 
596 | 
597 | if __name__ == '__main__':
598 | 
599 |         import IOMethods as io
600 | 	import matplotlib.pyplot as plt
601 | 	import wave
602 | 	import struct
603 | 
604 | 	#qmfrt_example()
605 | 	
606 | 	#global N  #number of subbands
607 |    	#global overlap 
608 | 	qmfwin=np.loadtxt('qmf1024_8x.mat');
609 | 	qmfwin=np.hstack((qmfwin,np.flipud(qmfwin)))
610 | 	#plt.plot(qmfwin)
611 | 	#plt.show(block=False)
612 | 	#plt.figure()
613 | 	#Analysis Folding matrix:
614 | 	#For cosine modulation:
615 | 	#Fa=ha2Fa3d_fast(qmfwin,N)
616 | 	#For sine modulation:
617 | 	Fa=ha2Fa3d_sinmod_fast(qmfwin,N)
618 | 
619 | 	#Synthesis Folding matrix: 
620 | 	#For cosine modulation:
621 | 	#Fs=hs2Fs3d_fast(qmfwin,N)
622 | 	#For sine modulation:
623 | 	Fs=hs2Fs3d_sinmod_fast(qmfwin,N)
624 | 
625 | 
626 | 	""""""
627 | 	#Open sound file to read:
628 |    	wf=wave.open('mixed.wav','rb');
629 |    	nchan=wf.getnchannels();
630 |    	bytes=wf.getsampwidth();
631 |    	rate=wf.getframerate();
632 |    	length=wf.getnframes();
633 |    	print("Number of channels: ", nchan);
634 |    	print("Number of bytes per sample:", bytes);
635 |    	print("Sampling rate: ", rate);
636 |    	print("Number of samples:", length);
637 | 
638 | 	#open sound file to write the reconstruced sound:
639 | 	wfw = wave.open('mixedrek.wav', 'wb')
640 |    	wfw.setnchannels(1)
641 |    	wfw.setsampwidth(2)
642 |    	wfw.setframerate(rate)
643 | 
644 | 	print "Start QMF:"
645 | 	#Process the audio signal block-wise (N samples) from file and into file:
646 | 	for m in range(length/N):
647 | 	   print "Block number: ", m
648 | 	   #Analysis:
649 | 	   data=wf.readframes(N);
650 | 	   x = (struct.unpack( 'h' * N, data ));
651 | 	   #plt.plot(x / (2**16))
652 | 	   #plt.show()
653 | 
654 | 	   #Cos modulation:
655 | 	   #y=analysisqmf_realtime(x,Fa,N)
656 | 	   #Sin modulation:
657 | 	   y=analysisqmf_sinmod_realtime(x,Fa,N)
658 | 	   #plt.plot(y)
659 | 	   #plt.show(block=False)
660 | 
661 | 	   #Synthesis:
662 | 	   #Cos modulation:
663 | 	   #xrek=synthesisqmf_realtime(y,Fs,N)
664 | 	   #Sin modulation:
665 | 	   xrek=synthesisqmf_sinmod_realtime(y,Fs,N)
666 | 
667 | 	   #plt.plot(x - xrek)
668 | 	   #plt.show()
669 | 	   #write with 2 Bytes per sample:
670 | 	   data=struct.pack( 'h' * N, *xrek )
671 | 	   wfw.writeframes(data)
672 | 	wf.close()   	
673 | 	wfw.close()
674 | 
675 | 
676 | 


--------------------------------------------------------------------------------
/QMF/old_vers/qmf_realtime_synthesis.py:
--------------------------------------------------------------------------------
  1 | #This is a library that implements a QMF filter bank, including a realt time implelentation.
  2 | #The "main" is an example, which reads in an python pickle file which contains the subband samples, block-wise, 
  3 | #then computes the synthesis QMF filter bank to reconstruct the audio signal from the subbands, and writes them, block wise, into another audio .wav file. 
  4 | #As such, it can read and process arbitrary long audio files, without memory problems!
  5 | #example call from a terminal window: 
  6 | #        python qmf_realtime_synthesis.py mixed_y.bin mixed_rek.wav
  7 | #It loads the qmf coefficient file 'qmf1024_8x.mat'
  8 | # 
  9 | #Implements a real time Pseudo Quandrature Mirror Filter Bank
 10 | #"real time" means: it reads a block of N samples in and generates a block of N spectral samples from the QMF.
 11 | #The QMF implementaion uses internal memory to accomodate the window overlap
 12 | #Gerald Schuller, gerald.schuller@tu-ilmenau.de, January 2016
 13 | 
 14 | import numpy as np
 15 | import scipy.fftpack as spfft
 16 | 
 17 | N=1024  #number of subbands of the QMF filter bank
 18 | #N=64
 19 | #internal memory for the input blocks, for 8 times overlap:
 20 | overlap=8;
 21 | #overlap=10;
 22 | blockmemory=np.zeros((overlap,N))
 23 | blockmemorysyn=np.zeros((overlap,N))
 24 | 
 25 | def ha2Pa3d(ha,N):
 26 | 	#usage: Pa=ha2Pa3d(ha,N);
 27 | 	#produces the analysis polyphase matrix Pa
 28 | 	#in 3D matrix representation
 29 | 	#from a basband filter ha with
 30 | 	#a cosine modulation
 31 | 	#N: Blocklength
 32 | 	#Gerald Schuller
 33 | 	#shl@idmt.fhg.de
 34 | 	#Dec-2-15
 35 | 
 36 | 	import numpy as np
 37 | 
 38 | 	L=len(ha);
 39 | 
 40 | 	blocks=int(np.ceil(L/N));
 41 |         #print(blocks)
 42 | 
 43 | 	Pa=np.zeros((N,N,blocks));
 44 | 
 45 | 	for k in range(N): #subband
 46 | 	  for m in range(blocks):  #m: block number 
 47 | 	    for nphase in range(N): #nphase: Phase 
 48 | 	      n=m*N+nphase;
 49 | 	      #indexing like impulse response, phase index is reversed (N-np):
 50 | 	      Pa[N-1-nphase,k,m]=ha[n]*np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(blocks*N-1-n-N/2.0+0.5)); 
 51 | 
 52 | 	return Pa
 53 | 
 54 | def hs2Ps3d(hs,N):
 55 | 	#usage: Ps=hs2Ps3d(hs,N);
 56 | 	#produces the synthesis polyphase matrix Ps
 57 | 	#in 3D matrix representation
 58 | 	#from a basband filter hs with
 59 | 	#a cosine modulation
 60 | 	#N: Blocklength
 61 | 	#Gerald Schuller
 62 | 	#shl@idmt.fhg.de
 63 | 	#Dec-2-15
 64 | 
 65 | 	import numpy as np
 66 | 
 67 | 	L=len(hs);
 68 | 
 69 | 	blocks=int(np.ceil(L/N));
 70 |         #print(blocks)
 71 | 
 72 | 	Ps=np.zeros((N,N,blocks));
 73 | 
 74 | 	for k in range(N): #subband
 75 | 	  for m in range(blocks):  #m: block number 
 76 | 	    for nphase in range(N): #nphase: Phase 
 77 | 	      n=m*N+nphase;
 78 | 	      #synthesis:
 79 | 	      Ps[k,nphase,m]=hs[n]*np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(n-N/2.0+0.5)); 
 80 | 
 81 | 	return Ps
 82 | 
 83 | 
 84 | def ha2Fa3d(ha,N):
 85 | 	#usage: Fa=ha2Fa3d(ha,N);
 86 | 	#produces the analysis polyphase folding matrix Fa with all polyphase components
 87 | 	#in 3D matrix representation
 88 | 	#from a basband filter ha with
 89 | 	#a cosine modulation
 90 | 	#N: Blocklength
 91 | 	#Gerald Schuller
 92 | 	#shl@idmt.fhg.de
 93 | 	#Dec-2-15
 94 | 	print "ha2Pa3d:"
 95 | 	Pa=ha2Pa3d(ha,N);
 96 | 	print "polmatmult DCT:"
 97 | 	Fa=polmatmult(Pa,DCToMatrix(N))
 98 |         #round zeroth polyphase component to 7 decimals after point:
 99 |         Fa=np.around(Fa,8)
100 | 
101 | 	return Fa
102 | 
103 | def ha2Fa3d_fast(qmfwin,N):
104 | 	#usage: Fa=ha2Fa3d_fast(ha,N);
105 | 	#produces the analysis polyphase folding matrix Fa with all polyphase components
106 | 	#in 3D matrix representation
107 | 	#from a basband filter ha with
108 | 	#a cosine modulation
109 | 	#N: Blocklength
110 | 	#using a fast implementation (important for large N)
111 | 	#See my book chapter about "Filter Banks", cosine modulated filter banks.
112 | 	#Gerald Schuller
113 | 	#shl@idmt.fhg.de
114 | 	#Jan-23-16
115 | 
116 | 	Fa=np.zeros((N,N,overlap))
117 |  	for m in range(overlap/2):
118 | 	   Fa[:,:,2*m]+=np.fliplr(np.diag(np.flipud(-qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=-N/2))
119 | 	   Fa[:,:,2*m]+=(np.diag(np.flipud(qmfwin[m*2*N+N/2:(m*2*N+N)]*((-1)**m)),k=N/2))
120 | 	   Fa[:,:,2*m+1]+=(np.diag(np.flipud(qmfwin[m*2*N+N:(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
121 | 	   Fa[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(qmfwin[m*2*N+1.5*N:(m*2*N+2*N)]*((-1)**m)),k=N/2))
122 | 	   #print -qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)
123 | 	return Fa
124 | 
125 | def hs2Fs3d_fast(qmfwin,N):
126 | 	#usage: Fs=hs2Fs3d_fast(hs,N);
127 | 	#produces the synthesis polyphase folding matrix Fs with all polyphase components
128 | 	#in 3D matrix representation
129 | 	#from a basband filter ha with
130 | 	#a cosine modulation
131 | 	#N: Blocklength
132 | 	#Fast implementation
133 | 	#Gerald Schuller
134 | 	#shl@idmt.fhg.de
135 | 	#Jan-23-15
136 | 
137 | 	#Fa=ha2Fa3d_fast(hs,N)
138 | 	#print "Fa.shape in hs2Fs : ", Fa.shape
139 | 	#Transpose first two dimensions to obtain synthesis folding matrix:
140 | 	#Fs=np.transpose(Fa, (1, 0, 2))
141 | 	global overlap
142 | 	Fs=np.zeros((N,N,overlap))
143 |  	for m in range(overlap/2):
144 | 	   Fs[:,:,2*m]+=np.fliplr(np.diag(np.flipud(qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=N/2))
145 | 	   Fs[:,:,2*m]+=(np.diag((qmfwin[m*2*N+N/2:(m*2*N+N)]*((-1)**m)),k=N/2))
146 | 	   Fs[:,:,2*m+1]+=(np.diag((qmfwin[m*2*N+N:(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
147 | 	   Fs[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(-qmfwin[m*2*N+1.5*N:(m*2*N+2*N)]*((-1)**m)),k=-N/2))
148 | 	#print "Fs.shape in hs2Fs : ", Fs.shape
149 | 	#avoid sign change after reconstruction:
150 | 	return -Fs
151 | 
152 | def hs2Fs3d(hs,N):
153 | 	#usage: Fs=hs2Fs3d(hs,N);
154 | 	#produces the synthesis polyphase folding matrix Fs with all polyphase components
155 | 	#in 3D matrix representation
156 | 	#from a basband filter ha with
157 | 	#a cosine modulation
158 | 	#N: Blocklength
159 | 	#Gerald Schuller
160 | 	#shl@idmt.fhg.de
161 | 	#Dec-2-15
162 | 
163 | 	Ps=hs2Ps3d(hs,N);
164 | 	Fs=polmatmult(DCToMatrix(N),Ps)
165 |         #round zeroth polyphase component to 7 decimals after point:
166 |         Fs=np.around(Fs,8)
167 | 
168 | 	return Fs
169 | 
170 | def DCToMatrix(N):
171 | 	#produces an odd DCT matrix with size NxN
172 | 	#Gerald Schuller, Dec. 2015
173 | 
174 | 	import numpy as np
175 | 
176 | 	y=np.zeros((N,N,1));
177 | 
178 | 	for n in range(N):
179 | 	   for k in range(N):
180 | 	      y[n,k,0]=np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(n+0.5));
181 | 	      #y(n,k)=cos(pi/N*(k-0.5)*(n-1));
182 | 	return y   
183 | 
184 | def polmatmult(A,B):
185 | 	#function C=polmatmult(A,B)
186 | 	#multiplies 2 polynomial matrices A and B, where each matrix entry is a polynomial, e.g. in z^-1.
187 | 	#Those polynomial entries are in the 3rd dimension 
188 | 	#The third dimension can also be interpreted as containing the (2D) coefficient matrices for each 
189 | 	#exponent of z^-1. 
190 | 	#Result is C=A*B;
191 | 
192 | 	import numpy as np
193 | 	from scipy import sparse
194 | 
195 | 	[NAx,NAy,NAz]=A.shape;
196 | 	[NBx,NBy,NBz]=B.shape;
197 | 
198 | 	#Degree +1 of resulting polynomial, with NAz-1 and NBz-1 beeing the degree of the input  polynomials:
199 | 	Deg=NAz+NBz-1;
200 | 
201 | 	C=np.zeros((NAx,NBy,Deg));
202 | 
203 | 	for n in range(Deg):
204 | 	  for m in range(n+1):
205 | 	    if ((n-m)<NAz and m<NBz):
206 | 	      C[:,:,n]=C[:,:,n]+ A[:,:,(n-m)].dot(B[:,:,m])
207 | 	      #sparse version:
208 | 	      #C[:,:,n]=C[:,:,n]+ (sparse.csr_matrix(A[:,:,(n-m)]).dot(sparse.csr_matrix(B[:,:,m]))).todense()
209 | 	return C
210 | 
211 | #The DCT4 transform:
212 | def DCT4(samples):
213 |    #use a DCT3 to implement a DCT4:
214 |    samplesup=np.zeros(2*N)
215 |    #upsample signal:
216 |    samplesup[1::2]=samples
217 |    y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
218 |    return y[0:N]
219 | 
220 | def x2polyphase(x,N):
221 | 	#xp=x2polyphase(x);
222 | 	#Converts input signal x (a row vector) into a polphase row vector
223 | 	#For blocks of length N
224 | 	#For 3D polyphase representation (exponents of z in the third matrix/tensor dimension)
225 | 	#Gerald Schuller
226 | 	#shl@idmt.fhg.de
227 | 	#Dec-2-2015
228 | 
229 | 	#Number of blocks in the signal:
230 | 	L=int(np.floor(len(x)/N))
231 | 
232 | 	xp=np.zeros((1,N,L));
233 | 
234 | 	for m in range(L):
235 | 	  xp[0,:,m]=x[m*N: (m*N+N)];
236 | 	
237 | 	return xp
238 | 
239 | def polyphase2x(xp):
240 | 	#xp=polyphase2x(xp,N);
241 | 	#Converts polyphase input signal xp (a row vector) into a contiguous row vector
242 | 	#For blocks of length N
243 | 	#For 3D polyphase representation (exponents of z in the third matrix/tensor dimension)
244 | 	#Gerald Schuller
245 | 	#shl@idmt.fhg.de
246 | 	#Aug-24-11
247 | 
248 | 	#Number of blocks in the signal:
249 | 	[r,N,b]=xp.shape;
250 | 	L=b;
251 | 
252 | 	x=np.zeros(N*L);
253 | 
254 | 	for m in range(L):
255 | 	  #print x[(m*N):((m+1)*N)].shape, xp[0,:,m].shape
256 | 	  x[(m*N):(m+1)*N]=xp[0,:,m];
257 | 
258 | 	return x
259 | 
260 | def analysisqmf(x,qmfwin,N):
261 | 	#QMF Analysis filter bank
262 | 	#usage: y=analysisqmf(x,qmfwin,N);
263 | 	#with x: input signal (row vector)
264 | 	#qmfwin: qmf window
265 | 	#N: number of subbands
266 | 	#y: output, subband signals as a polphase vector,
267 | 	#each column is one subband signal,
268 | 	#the third dimension corresponds to the blocks of the signal
269 | 	#Gerald Schuller
270 | 	#shl@idmt.fhg.de
271 | 	#Dec 2015
272 | 
273 | 	#generation of input polyphase vector:
274 | 	xp=x2polyphase(x,N);
275 | 
276 | 	#Analysis polphase matrix:
277 | 	Fa=ha2Fa3d(qmfwin,N);
278 | 
279 | 	#Analysis filter bank:
280 | 	#Sparse mult. with folding mat:
281 | 	y=polmatmult(xp,Fa);
282 |         #Transform:
283 |         y=polmatmult(y,DCToMatrix(N))
284 | 	return y
285 | 
286 | def analysisqmf_realtime(xrt,Fa,N):
287 | 	#computes the QMF subband samples for each real time input block xrt. 
288 | 	#Conducts an implicit polynomial multiplication with folding matrix Fa of 
289 | 	#the polyphase matrix of the QMF filter bank, using
290 | 	#internal memory of the past input blocks.
291 | 
292 | 	#from scipy import sparse
293 |         global blockmemory
294 | 	global overlap
295 | 	#push down old blocks:
296 | 	blockmemory[0:(overlap-1),:]=blockmemory[1:(overlap),:]
297 | 	#write new block into top of memory stack:
298 | 	blockmemory[overlap-1,:]=xrt;
299 |     	y=np.zeros((1,N));
300 |         #print "Fa.shape =", Fa.shape
301 | 	for m in range(overlap):
302 |    	   y+=np.dot(np.array([blockmemory[overlap-1-m,:]]), Fa[:,:,m])
303 | 	   #y+= (sparse.csr_matrix(blockmemory[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
304 | 	#fast DCT4:
305 | 	y=DCT4(y)
306 | 	return y
307 | 
308 | def synthesisqmf_realtime(y,Fs,N):
309 | 	#computes the inverse QMF for each subband block y, 
310 | 	#conducts an implicit polynomial multiplication with synthesis folding matrix Fs 
311 | 	#of the synthesis polyphase matrix of the QMF filter bank, using
312 | 	#internal memory for future output blocks.
313 | 
314 | 	#from scipy import sparse
315 |         global blockmemorysyn
316 | 	global overlap
317 | 	#print "overlap= ", overlap
318 | 	#push down old blocks:
319 | 	blockmemorysyn[0:(overlap-1),:]=blockmemorysyn[1:(overlap),:]
320 | 	blockmemorysyn[overlap-1,:]=np.zeros((1,N)) #avoid leaving previous values in top of memory.
321 | 	#print "memory content synthesis: ", np.sum(np.abs(blockmemorysyn))
322 |         #print "Fs.shape =", Fs.shape
323 | 	#print "y.shape= ", y.shape
324 | 	#Overlap-add after fast (inverse) DCT4::
325 | 	for m in range(overlap):
326 |    	   blockmemorysyn[m,:]+=np.dot(DCT4(y), Fs[:,:,m])
327 | 	   #y+= (sparse.csr_matrix(blockmemory[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
328 |    	xrek=blockmemorysyn[0,:]
329 | 	
330 | 	
331 | 	return xrek
332 | 
333 | def synthesisqmf(y,qmfwin,N):
334 | 	#QMF Synthesis filter bank
335 | 	#usage: xrek=synthesisqmf(y,qmfwin,N);
336 | 	#with y: subband signals (polyphase row vector, 3d tensor in Matlab/Octave)
337 | 	#each column is one subband signal,
338 | 	#the third dimension corresponds to the blocks of the signal
339 | 	#qmfwin: qmf window
340 | 	#N: number of subbands
341 | 	#xrek: output, rekonstructed signal 
342 | 	#Gerald Schuller
343 | 	#shl@idmt.fhg.de
344 | 	#Dec. 2015
345 | 
346 | 	#generation of input polyphase vector:
347 | 
348 | 	#Analysis polphase matrix:
349 | 	Fs=hs2Fs3d(qmfwin,N);
350 | 
351 | 	#Analysis filter bank:
352 | 	#Inverse transform:
353 | 	xrekp=polmatmult(y,DCToMatrix(N))
354 |         #print xrekp.shape
355 | 	#Sparse mult with folding mat.:
356 | 	xrekp=polmatmult(xrekp,Fs);
357 | 	#print xrekp.shape
358 | 
359 | 	#polyphase to contiguous vector:
360 | 	xrek=polyphase2x(xrekp);
361 | 	return xrek
362 | 
363 | 
364 | def qmfrt_example():
365 | 	from scipy import sparse
366 | 	import matplotlib.pyplot as plt
367 |         import time
368 | 
369 | 	#QMF Test:
370 | 	global N  #number of subbands
371 |    	global overlap 
372 | 	qmfwin=np.loadtxt('qmf1024_8x.mat');
373 | 	qmfwin=np.hstack((qmfwin,np.flipud(qmfwin)))
374 | 	#qmfwin=np.loadtxt('qmf.dat');
375 | 	print "qmfwin.shape= ", qmfwin.shape
376 |         
377 | 	plt.plot(qmfwin)
378 | 	plt.show(block=False)
379 | 	#Testing Analysis:
380 | 	#Input signal (can be live from the sound card):
381 | 	#x=np.random.random((10*N))-0.5;
382 | 	x=np.sin(2*np.pi/N*10*np.arange(10*N))
383 | 	x=np.zeros((10*N))
384 | 	x[10]=1
385 | 	#initialize subband matrix:
386 | 	y=np.zeros((10,N));
387 |        
388 | 	start=time.time()
389 | 	#Analysis polphase matrix:
390 | 	print "Compute analysis Folding matrix:"
391 | 	#Fao=ha2Fa3d(qmfwin,N);
392 | 	Fa=ha2Fa3d_fast(qmfwin,N) 
393 | 	time1=time.time()
394 |         print "Time for Fa: ", time1-start
395 | 	#mult vector with full matrix:
396 | 	xtest=np.random.rand(N);
397 | 	startmultfull=time.time()
398 | 	testout=np.dot(xtest,Fa[:,:,4])
399 | 	endtime=time.time()
400 | 	print "time for full mult: ", endtime-startmultfull
401 | 
402 | 	sparsemat=sparse.csr_matrix(Fa[:,:,4])
403 | 	sparsex=sparse.csr_matrix(xtest)
404 | 	startmultsparse=time.time()
405 | 	testout=sparsex.dot(sparsemat)
406 | 	endtime=time.time()
407 | 	print "time for sparse mult: ", endtime-startmultsparse	
408 | 	
409 | 	#plt.figure()
410 | 	#plt.imshow(np.sum(np.abs(Fa), axis=2))
411 | 	#plt.show(block=False)
412 |   	print "Fa.shape= ", Fa.shape	   
413 | 	#print "Fa=", Fa[N-1,N/2-1,:]
414 |      	#print "sum of diff:", np.sum(np.abs(Fa-Fao))
415 |         #print "Fa.shape =", Fa.shape
416 |         
417 | 	print "Start QMF:"
418 | 	for m in range(10):
419 | 	   y[m,:]=analysisqmf_realtime(x[m*N:(m+1)*N],Fa,N)
420 | 	time2=time.time()
421 | 	print "Time for filtering: ", time2-time1
422 | 	
423 |         print "y.shape= \n", y.shape
424 | 
425 |   	#xrek=synthesisqmf(y,qmfwin,N)
426 |   	plt.figure()
427 | 	#plt.plot(y.T)
428 | 	plt.imshow(np.abs(y[:,0:300]))
429 | 	plt.show(block=False)
430 | 
431 | 	#Testing synthesis
432 | 	#Synthesis folding matrix:
433 | 	Fs=hs2Fs3d_fast(qmfwin,N)
434 | 	#plt.figure()
435 | 	#plt.imshow(np.sum(np.abs(Fa), axis=2))
436 | 	#plt.show(block=False)
437 | 	#Fso=hs2Fs3d(qmfwin,N)
438 | 	#print "sum of diff of Fs:", np.sum(np.abs(Fs-Fso))
439 | 	#print "Fs.shape after hs2Fs: ", Fs.shape
440 | 	#Test synthesis impulse response of lowest filter:
441 | 	#y[:,30:]=np.zeros((10,1024-30))
442 | 	"""
443 | 	y=np.zeros((10,N))
444 | 	y[2:,19]=6.0*np.ones((8));
445 | 	y[2:,20]=-6.0*np.ones((8));
446 | 	"""
447 | 	plt.figure()
448 | 	plt.plot(y.T)
449 | 	#plt.imshow(y[:,0:40])
450 | 	plt.show(block=False)
451 | 
452 | 	print "y.shape= \n", y.shape
453 | 	xrek=np.zeros((10*N));
454 | 	for m in range(10):
455 | 	   xrek[m*N:(m+1)*N]=synthesisqmf_realtime(y[m,:],Fs,N)
456 | 	#clear plot figure:
457 | 	#plt.clf()
458 | 	plt.figure()
459 | 	plt.plot(xrek)
460 | 	plt.show()
461 | 	#Observe: Since QMF is non PR, a reconstructed pulse has small pre- and post-pulses!!! 
462 | 
463 | if __name__ == '__main__':
464 | 
465 |         import IOMethods as io
466 | 	import matplotlib.pyplot as plt
467 | 	import wave
468 | 	import struct
469 | 	import cPickle as pickle
470 | 	import sys
471 | 
472 | 	nchan=1 #number of channels, mono
473 | 	bytes=2 #number of bytes per sample
474 | 	rate=44100 #sample rate
475 | 	
476 | 	#qmfrt_example()
477 | 	
478 | 	#global N  #number of subbands
479 |    	#global overlap 
480 | 	if len(sys.argv) < 3:
481 | 	    print("Need 2 arguments.\n\nUsage: %s infile.bin outfile.wav" % sys.argv[0])
482 |  	    sys.exit(-1)
483 | 
484 | 	qmfwin=np.loadtxt('qmf1024_8x.mat');
485 | 	qmfwin=np.hstack((qmfwin,np.flipud(qmfwin)))
486 | 	plt.plot(qmfwin)
487 | 	plt.show(block=False)
488 | 	
489 | 	#Synthesis Folding matrix: 
490 | 	Fs=hs2Fs3d_fast(qmfwin,N)
491 | 
492 | 	#Open pickle file to read:	
493 | 	wf=open(sys.argv[1], 'r')
494 |    	
495 |    	print("Number of channels: ", nchan);
496 |    	print("Number of bytes per sample:", bytes);
497 |    	print("Sampling rate: ", rate);
498 | 
499 | 	#open sound file to write the reconstruced sound:
500 | 	wfw = wave.open(sys.argv[2], 'wb')
501 |    	wfw.setnchannels(nchan)
502 |    	wfw.setsampwidth(bytes)
503 |    	wfw.setframerate(rate)
504 | 
505 | 	print "Start QMF:"
506 | 	#Process the audio signal block-wise (N samples) from file and into file:
507 | 	m=0
508 | 	while True:
509 | 	   print "Block number: ", m
510 | 	   m+=1
511 | 	   #Synthesis:
512 | 	   try:
513 | 	      y=pickle.load(wf)
514 | 	   except (EOFError):
515 |               break
516 | 	   xrek=synthesisqmf_realtime(y,Fs,N)
517 | 	   #write with 2 Bytes per sample:
518 | 	   data=struct.pack( 'h' * N, *xrek )
519 | 	   wfw.writeframes(data)
520 | 	wf.close()   	
521 | 	wfw.close()
522 | 	
523 | 
524 | 


--------------------------------------------------------------------------------
/QMF/qmf.dat:
--------------------------------------------------------------------------------
  1 |   0.0000000e+000
  2 |  -5.5252865e-004
  3 |  -5.6176926e-004
  4 |  -4.9475181e-004
  5 |  -4.8752280e-004
  6 |  -4.8937912e-004
  7 |  -5.0407143e-004
  8 |  -5.2265643e-004
  9 |  -5.4665656e-004
 10 |  -5.6778026e-004
 11 |  -5.8709305e-004
 12 |  -6.1327474e-004
 13 |  -6.3124935e-004
 14 |  -6.5403334e-004
 15 |  -6.7776908e-004
 16 |  -6.9416146e-004
 17 |  -7.1577365e-004
 18 |  -7.2550431e-004
 19 |  -7.4409419e-004
 20 |  -7.4905981e-004
 21 |  -7.6813719e-004
 22 |  -7.7248486e-004
 23 |  -7.8343323e-004
 24 |  -7.7798695e-004
 25 |  -7.8036647e-004
 26 |  -7.8014496e-004
 27 |  -7.7579773e-004
 28 |  -7.6307936e-004
 29 |  -7.5300014e-004
 30 |  -7.3193572e-004
 31 |  -7.2153920e-004
 32 |  -6.9179375e-004
 33 |  -6.6504151e-004
 34 |  -6.3415949e-004
 35 |  -5.9461189e-004
 36 |  -5.5645764e-004
 37 |  -5.1455722e-004
 38 |  -4.6063255e-004
 39 |  -4.0951215e-004
 40 |  -3.5011759e-004
 41 |  -2.8969812e-004
 42 |  -2.0983373e-004
 43 |  -1.4463809e-004
 44 |  -6.1733441e-005
 45 |   1.3494974e-005
 46 |   1.0943831e-004
 47 |   2.0430171e-004
 48 |   2.9495311e-004
 49 |   4.0265402e-004
 50 |   5.1073885e-004
 51 |   6.2393761e-004
 52 |   7.4580259e-004
 53 |   8.6084433e-004
 54 |   9.8859883e-004
 55 |   1.1250155e-003
 56 |   1.2577885e-003
 57 |   1.3902495e-003
 58 |   1.5443220e-003
 59 |   1.6868083e-003
 60 |   1.8348265e-003
 61 |   1.9841141e-003
 62 |   2.1461584e-003
 63 |   2.3017255e-003
 64 |   2.4625617e-003
 65 |   2.6201759e-003
 66 |   2.7870464e-003
 67 |   2.9469448e-003
 68 |   3.1125421e-003
 69 |   3.2739613e-003
 70 |   3.4418874e-003
 71 |   3.6008268e-003
 72 |   3.7603923e-003
 73 |   3.9207432e-003
 74 |   4.0819753e-003
 75 |   4.2264269e-003
 76 |   4.3730720e-003
 77 |   4.5209853e-003
 78 |   4.6606461e-003
 79 |   4.7932561e-003
 80 |   4.9137604e-003
 81 |   5.0393023e-003
 82 |   5.1407354e-003
 83 |   5.2461166e-003
 84 |   5.3471681e-003
 85 |   5.4196776e-003
 86 |   5.4876040e-003
 87 |   5.5475715e-003
 88 |   5.5938023e-003
 89 |   5.6220643e-003
 90 |   5.6455197e-003
 91 |   5.6389200e-003
 92 |   5.6266114e-003
 93 |   5.5917129e-003
 94 |   5.5404364e-003
 95 |   5.4753783e-003
 96 |   5.3838976e-003
 97 |   5.2715759e-003
 98 |   5.1382275e-003
 99 |   4.9839688e-003
100 |   4.8109469e-003
101 |   4.6039530e-003
102 |   4.3801862e-003
103 |   4.1251642e-003
104 |   3.8456408e-003
105 |   3.5401247e-003
106 |   3.2091886e-003
107 |   2.8446758e-003
108 |   2.4508540e-003
109 |   2.0274176e-003
110 |   1.5784683e-003
111 |   1.0902329e-003
112 |   5.8322642e-004
113 |   2.7604519e-005
114 |  -5.4642809e-004
115 |  -1.1568136e-003
116 |  -1.8039473e-003
117 |  -2.4826724e-003
118 |  -3.1933778e-003
119 |  -3.9401124e-003
120 |  -4.7222596e-003
121 |  -5.5337211e-003
122 |  -6.3792293e-003
123 |  -7.2615817e-003
124 |  -8.1798233e-003
125 |  -9.1325330e-003
126 |  -1.0115022e-002
127 |  -1.1131555e-002
128 |  -1.2185000e-002
129 |  -1.3271822e-002
130 |  -1.4390467e-002
131 |  -1.5540555e-002
132 |  -1.6732471e-002
133 |  -1.7943338e-002
134 |  -1.9187243e-002
135 |  -2.0453179e-002
136 |  -2.1746755e-002
137 |  -2.3068017e-002
138 |  -2.4416099e-002
139 |  -2.5787585e-002
140 |  -2.7185943e-002
141 |  -2.8607217e-002
142 |  -3.0050266e-002
143 |  -3.1501761e-002
144 |  -3.2975408e-002
145 |  -3.4462095e-002
146 |  -3.5969756e-002
147 |  -3.7481285e-002
148 |  -3.9005368e-002
149 |  -4.0534917e-002
150 |  -4.2064909e-002
151 |  -4.3609754e-002
152 |  -4.5148841e-002
153 |  -4.6684303e-002
154 |  -4.8216572e-002
155 |  -4.9738576e-002
156 |  -5.1255616e-002
157 |  -5.2763075e-002
158 |  -5.4245277e-002
159 |  -5.5717365e-002
160 |  -5.7161645e-002
161 |  -5.8591568e-002
162 |  -5.9983748e-002
163 |  -6.1345517e-002
164 |  -6.2685781e-002
165 |  -6.3971590e-002
166 |  -6.5224711e-002
167 |  -6.6436751e-002
168 |  -6.7607599e-002
169 |  -6.8704383e-002
170 |  -6.9763024e-002
171 |  -7.0762871e-002
172 |  -7.1700267e-002
173 |  -7.2568258e-002
174 |  -7.3362026e-002
175 |  -7.4100364e-002
176 |  -7.4745256e-002
177 |  -7.5313734e-002
178 |  -7.5800836e-002
179 |  -7.6199248e-002
180 |  -7.6499217e-002
181 |  -7.6709349e-002
182 |  -7.6817398e-002
183 |  -7.6823001e-002
184 |  -7.6720492e-002
185 |  -7.6505072e-002
186 |  -7.6174832e-002
187 |  -7.5730576e-002
188 |  -7.5157626e-002
189 |  -7.4466439e-002
190 |  -7.3640601e-002
191 |  -7.2677464e-002
192 |  -7.1582636e-002
193 |  -7.0353307e-002
194 |  -6.8966401e-002
195 |  -6.7452502e-002
196 |  -6.5769067e-002
197 |  -6.3944481e-002
198 |  -6.1960278e-002
199 |  -5.9816657e-002
200 |  -5.7515269e-002
201 |  -5.5046003e-002
202 |  -5.2409382e-002
203 |  -4.9597868e-002
204 |  -4.6630331e-002
205 |  -4.3476878e-002
206 |  -4.0145828e-002
207 |  -3.6641812e-002
208 |  -3.2958393e-002
209 |  -2.9082401e-002
210 |  -2.5030756e-002
211 |  -2.0799707e-002
212 |  -1.6370126e-002
213 |  -1.1762383e-002
214 |  -6.9636862e-003
215 |  -1.9765601e-003
216 |   3.2086897e-003
217 |   8.5711749e-003
218 |   1.4128883e-002
219 |   1.9883413e-002
220 |   2.5822729e-002
221 |   3.1953127e-002
222 |   3.8277657e-002
223 |   4.4780682e-002
224 |   5.1480418e-002
225 |   5.8370533e-002
226 |   6.5440985e-002
227 |   7.2694330e-002
228 |   8.0137293e-002
229 |   8.7754754e-002
230 |   9.5553335e-002
231 |   1.0353295e-001
232 |   1.1168269e-001
233 |   1.2000780e-001
234 |   1.2850029e-001
235 |   1.3715518e-001
236 |   1.4597665e-001
237 |   1.5496071e-001
238 |   1.6409589e-001
239 |   1.7338082e-001
240 |   1.8281725e-001
241 |   1.9239667e-001
242 |   2.0212502e-001
243 |   2.1197359e-001
244 |   2.2196527e-001
245 |   2.3206909e-001
246 |   2.4230169e-001
247 |   2.5264803e-001
248 |   2.6310533e-001
249 |   2.7366340e-001
250 |   2.8432142e-001
251 |   2.9507167e-001
252 |   3.0590986e-001
253 |   3.1682789e-001
254 |   3.2781137e-001
255 |   3.3887227e-001
256 |   3.4999141e-001
257 |   3.6115899e-001
258 |   3.7237955e-001
259 |   3.8363500e-001
260 |   3.9492118e-001
261 |   4.0623177e-001
262 |   4.1756969e-001
263 |   4.2891199e-001
264 |   4.4025538e-001
265 |   4.5159965e-001
266 |   4.6293081e-001
267 |   4.7424532e-001
268 |   4.8552531e-001
269 |   4.9677083e-001
270 |   5.0798175e-001
271 |   5.1912350e-001
272 |   5.3022409e-001
273 |   5.4125534e-001
274 |   5.5220513e-001
275 |   5.6307891e-001
276 |   5.7385241e-001
277 |   5.8454032e-001
278 |   5.9511231e-001
279 |   6.0557835e-001
280 |   6.1591099e-001
281 |   6.2612427e-001
282 |   6.3619801e-001
283 |   6.4612697e-001
284 |   6.5590163e-001
285 |   6.6551399e-001
286 |   6.7496632e-001
287 |   6.8423533e-001
288 |   6.9332824e-001
289 |   7.0223887e-001
290 |   7.1094104e-001
291 |   7.1944626e-001
292 |   7.2774489e-001
293 |   7.3582118e-001
294 |   7.4368279e-001
295 |   7.5131375e-001
296 |   7.5870808e-001
297 |   7.6586749e-001
298 |   7.7277809e-001
299 |   7.7942875e-001
300 |   7.8583531e-001
301 |   7.9197358e-001
302 |   7.9784664e-001
303 |   8.0344858e-001
304 |   8.0876950e-001
305 |   8.1381913e-001
306 |   8.1857760e-001
307 |   8.2304199e-001
308 |   8.2722753e-001
309 |   8.3110385e-001
310 |   8.3469374e-001
311 |   8.3797173e-001
312 |   8.4095414e-001
313 |   8.4362383e-001
314 |   8.4598185e-001
315 |   8.4803158e-001
316 |   8.4978052e-001
317 |   8.5119715e-001
318 |   8.5230470e-001
319 |   8.5310209e-001
320 |   8.5357206e-001
321 |   8.5373856e-001
322 |   8.5357206e-001
323 |   8.5310209e-001
324 |   8.5230470e-001
325 |   8.5119715e-001
326 |   8.4978052e-001
327 |   8.4803158e-001
328 |   8.4598185e-001
329 |   8.4362383e-001
330 |   8.4095414e-001
331 |   8.3797173e-001
332 |   8.3469374e-001
333 |   8.3110385e-001
334 |   8.2722753e-001
335 |   8.2304199e-001
336 |   8.1857760e-001
337 |   8.1381913e-001
338 |   8.0876950e-001
339 |   8.0344858e-001
340 |   7.9784664e-001
341 |   7.9197358e-001
342 |   7.8583531e-001
343 |   7.7942875e-001
344 |   7.7277809e-001
345 |   7.6586749e-001
346 |   7.5870808e-001
347 |   7.5131375e-001
348 |   7.4368279e-001
349 |   7.3582118e-001
350 |   7.2774489e-001
351 |   7.1944626e-001
352 |   7.1094104e-001
353 |   7.0223887e-001
354 |   6.9332824e-001
355 |   6.8423533e-001
356 |   6.7496632e-001
357 |   6.6551399e-001
358 |   6.5590163e-001
359 |   6.4612697e-001
360 |   6.3619801e-001
361 |   6.2612427e-001
362 |   6.1591099e-001
363 |   6.0557835e-001
364 |   5.9511231e-001
365 |   5.8454032e-001
366 |   5.7385241e-001
367 |   5.6307891e-001
368 |   5.5220513e-001
369 |   5.4125534e-001
370 |   5.3022409e-001
371 |   5.1912350e-001
372 |   5.0798175e-001
373 |   4.9677083e-001
374 |   4.8552531e-001
375 |   4.7424532e-001
376 |   4.6293081e-001
377 |   4.5159965e-001
378 |   4.4025538e-001
379 |   4.2891199e-001
380 |   4.1756969e-001
381 |   4.0623177e-001
382 |   3.9492118e-001
383 |   3.8363500e-001
384 |   3.7237955e-001
385 |   3.6115899e-001
386 |   3.4999141e-001
387 |   3.3887227e-001
388 |   3.2781137e-001
389 |   3.1682789e-001
390 |   3.0590986e-001
391 |   2.9507167e-001
392 |   2.8432142e-001
393 |   2.7366340e-001
394 |   2.6310533e-001
395 |   2.5264803e-001
396 |   2.4230169e-001
397 |   2.3206909e-001
398 |   2.2196527e-001
399 |   2.1197359e-001
400 |   2.0212502e-001
401 |   1.9239667e-001
402 |   1.8281725e-001
403 |   1.7338082e-001
404 |   1.6409589e-001
405 |   1.5496071e-001
406 |   1.4597665e-001
407 |   1.3715518e-001
408 |   1.2850029e-001
409 |   1.2000780e-001
410 |   1.1168269e-001
411 |   1.0353295e-001
412 |   9.5553335e-002
413 |   8.7754754e-002
414 |   8.0137293e-002
415 |   7.2694330e-002
416 |   6.5440985e-002
417 |   5.8370533e-002
418 |   5.1480418e-002
419 |   4.4780682e-002
420 |   3.8277657e-002
421 |   3.1953127e-002
422 |   2.5822729e-002
423 |   1.9883413e-002
424 |   1.4128883e-002
425 |   8.5711749e-003
426 |   3.2086897e-003
427 |  -1.9765601e-003
428 |  -6.9636862e-003
429 |  -1.1762383e-002
430 |  -1.6370126e-002
431 |  -2.0799707e-002
432 |  -2.5030756e-002
433 |  -2.9082401e-002
434 |  -3.2958393e-002
435 |  -3.6641812e-002
436 |  -4.0145828e-002
437 |  -4.3476878e-002
438 |  -4.6630331e-002
439 |  -4.9597868e-002
440 |  -5.2409382e-002
441 |  -5.5046003e-002
442 |  -5.7515269e-002
443 |  -5.9816657e-002
444 |  -6.1960278e-002
445 |  -6.3944481e-002
446 |  -6.5769067e-002
447 |  -6.7452502e-002
448 |  -6.8966401e-002
449 |  -7.0353307e-002
450 |  -7.1582636e-002
451 |  -7.2677464e-002
452 |  -7.3640601e-002
453 |  -7.4466439e-002
454 |  -7.5157626e-002
455 |  -7.5730576e-002
456 |  -7.6174832e-002
457 |  -7.6505072e-002
458 |  -7.6720492e-002
459 |  -7.6823001e-002
460 |  -7.6817398e-002
461 |  -7.6709349e-002
462 |  -7.6499217e-002
463 |  -7.6199248e-002
464 |  -7.5800836e-002
465 |  -7.5313734e-002
466 |  -7.4745256e-002
467 |  -7.4100364e-002
468 |  -7.3362026e-002
469 |  -7.2568258e-002
470 |  -7.1700267e-002
471 |  -7.0762871e-002
472 |  -6.9763024e-002
473 |  -6.8704383e-002
474 |  -6.7607599e-002
475 |  -6.6436751e-002
476 |  -6.5224711e-002
477 |  -6.3971590e-002
478 |  -6.2685781e-002
479 |  -6.1345517e-002
480 |  -5.9983748e-002
481 |  -5.8591568e-002
482 |  -5.7161645e-002
483 |  -5.5717365e-002
484 |  -5.4245277e-002
485 |  -5.2763075e-002
486 |  -5.1255616e-002
487 |  -4.9738576e-002
488 |  -4.8216572e-002
489 |  -4.6684303e-002
490 |  -4.5148841e-002
491 |  -4.3609754e-002
492 |  -4.2064909e-002
493 |  -4.0534917e-002
494 |  -3.9005368e-002
495 |  -3.7481285e-002
496 |  -3.5969756e-002
497 |  -3.4462095e-002
498 |  -3.2975408e-002
499 |  -3.1501761e-002
500 |  -3.0050266e-002
501 |  -2.8607217e-002
502 |  -2.7185943e-002
503 |  -2.5787585e-002
504 |  -2.4416099e-002
505 |  -2.3068017e-002
506 |  -2.1746755e-002
507 |  -2.0453179e-002
508 |  -1.9187243e-002
509 |  -1.7943338e-002
510 |  -1.6732471e-002
511 |  -1.5540555e-002
512 |  -1.4390467e-002
513 |  -1.3271822e-002
514 |  -1.2185000e-002
515 |  -1.1131555e-002
516 |  -1.0115022e-002
517 |  -9.1325330e-003
518 |  -8.1798233e-003
519 |  -7.2615817e-003
520 |  -6.3792293e-003
521 |  -5.5337211e-003
522 |  -4.7222596e-003
523 |  -3.9401124e-003
524 |  -3.1933778e-003
525 |  -2.4826724e-003
526 |  -1.8039473e-003
527 |  -1.1568136e-003
528 |  -5.4642809e-004
529 |   2.7604519e-005
530 |   5.8322642e-004
531 |   1.0902329e-003
532 |   1.5784683e-003
533 |   2.0274176e-003
534 |   2.4508540e-003
535 |   2.8446758e-003
536 |   3.2091886e-003
537 |   3.5401247e-003
538 |   3.8456408e-003
539 |   4.1251642e-003
540 |   4.3801862e-003
541 |   4.6039530e-003
542 |   4.8109469e-003
543 |   4.9839688e-003
544 |   5.1382275e-003
545 |   5.2715759e-003
546 |   5.3838976e-003
547 |   5.4753783e-003
548 |   5.5404364e-003
549 |   5.5917129e-003
550 |   5.6266114e-003
551 |   5.6389200e-003
552 |   5.6455197e-003
553 |   5.6220643e-003
554 |   5.5938023e-003
555 |   5.5475715e-003
556 |   5.4876040e-003
557 |   5.4196776e-003
558 |   5.3471681e-003
559 |   5.2461166e-003
560 |   5.1407354e-003
561 |   5.0393023e-003
562 |   4.9137604e-003
563 |   4.7932561e-003
564 |   4.6606461e-003
565 |   4.5209853e-003
566 |   4.3730720e-003
567 |   4.2264269e-003
568 |   4.0819753e-003
569 |   3.9207432e-003
570 |   3.7603923e-003
571 |   3.6008268e-003
572 |   3.4418874e-003
573 |   3.2739613e-003
574 |   3.1125421e-003
575 |   2.9469448e-003
576 |   2.7870464e-003
577 |   2.6201759e-003
578 |   2.4625617e-003
579 |   2.3017255e-003
580 |   2.1461584e-003
581 |   1.9841141e-003
582 |   1.8348265e-003
583 |   1.6868083e-003
584 |   1.5443220e-003
585 |   1.3902495e-003
586 |   1.2577885e-003
587 |   1.1250155e-003
588 |   9.8859883e-004
589 |   8.6084433e-004
590 |   7.4580259e-004
591 |   6.2393761e-004
592 |   5.1073885e-004
593 |   4.0265402e-004
594 |   2.9495311e-004
595 |   2.0430171e-004
596 |   1.0943831e-004
597 |   1.3494974e-005
598 |  -6.1733441e-005
599 |  -1.4463809e-004
600 |  -2.0983373e-004
601 |  -2.8969812e-004
602 |  -3.5011759e-004
603 |  -4.0951215e-004
604 |  -4.6063255e-004
605 |  -5.1455722e-004
606 |  -5.5645764e-004
607 |  -5.9461189e-004
608 |  -6.3415949e-004
609 |  -6.6504151e-004
610 |  -6.9179375e-004
611 |  -7.2153920e-004
612 |  -7.3193572e-004
613 |  -7.5300014e-004
614 |  -7.6307936e-004
615 |  -7.7579773e-004
616 |  -7.8014496e-004
617 |  -7.8036647e-004
618 |  -7.7798695e-004
619 |  -7.8343323e-004
620 |  -7.7248486e-004
621 |  -7.6813719e-004
622 |  -7.4905981e-004
623 |  -7.4409419e-004
624 |  -7.2550431e-004
625 |  -7.1577365e-004
626 |  -6.9416146e-004
627 |  -6.7776908e-004
628 |  -6.5403334e-004
629 |  -6.3124935e-004
630 |  -6.1327474e-004
631 |  -5.8709305e-004
632 |  -5.6778026e-004
633 |  -5.4665656e-004
634 |  -5.2265643e-004
635 |  -5.0407143e-004
636 |  -4.8937912e-004
637 |  -4.8752280e-004
638 |  -4.9475181e-004
639 |  -5.6176926e-004
640 |  -5.5252865e-004
641 | 


--------------------------------------------------------------------------------
/QMF/qmf_comp_call.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     Main method to analyse and synthesize audio files using PQMF, without much python interaction.
 4 |     Usage :
 5 |         To analyse a wave file :
 6 |             1) cd QMF
 7 |             2) python qmf_comp_call.py analysis mixed.wav test_analysis.p
 8 | 
 9 |         To re-synthesize a a pickled file :
10 |             1) cd QMF (if you are already in, just skip to 2)
11 |             2) python qmf_comp_call.py synthesis test_analysis.p resynth.wav
12 | 
13 |     Arguments :
14 |             Method    :   (string)  Can be either "analysis" or "synthesis"
15 |             Path#1    :   (string)  String containing the path to the file. If "analysis" method is selected
16 |                                     the path should prompt to an audio file. Else, it should refer the analysed
17 |                                     pickle file.
18 |             Path#2    :   (string)  String containing an arbitrary path and filename. If "analysis" method is selected
19 |                                     the string should contain the path and the filename for the analysis results to be
20 |                                     stored. Else, path and the filename for the resynthesized audio should be denoted.
21 | """
22 | __author__ = 'S.I. Mimilakis'
23 | __copyright__ = 'MacSeNet'
24 | 
25 | import sys
26 | import qmf_realtime_class as qmfc
27 | import cPickle as pickle
28 | 
29 | def main(argv):
30 |     mode = argv[0]
31 | 
32 |     if mode == 'analysis' :
33 |         filePathWave = argv[1]
34 |         savePathFile = argv[2]
35 | 
36 |         # Complex analysis, with external storing using pickle
37 |         qmfc.PQMFAnalysis.analyseNStore(filePathWave, N = 1024, saveStr = savePathFile)
38 | 
39 |     elif mode == 'synthesis' :
40 | 
41 |         filePathPickle = argv[1]
42 |         saveWavePath = argv[2]
43 |         # Load complex matrix from the analysis
44 |         qmfc.PQMFSynthesis.loadNRes(filePathPickle, fs = 44100, saveStr = saveWavePath)
45 | 
46 |     else :
47 |         print('Unrecognised operation! "analysis" & "synthesis" are currently supported...')
48 | 
49 | if __name__ == "__main__":
50 |     main(sys.argv[1:])


--------------------------------------------------------------------------------
/QMF/qmf_realtime_class.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |         Class to handle the implementations of a real time Pseudo Quandrature Mirror Filter Bank.
  4 |         "Real time" means: it reads a block of N samples in and generates a block of N spectral samples from the QMF.
  5 |         The QMF implementaion uses internal memory to accomodate the window overlap. To use it out of the box, see
  6 |         "qmf_comp_call.py" file.
  7 | """
  8 | __author__ = 'G. Schuller, S.I. Mimilakis'
  9 | __copyright__ = "Fraunhofer IDMT, TU Ilmenau, MacSeNet"
 10 | 
 11 | import numpy as np
 12 | 
 13 | try :
 14 |     import numpy.core._dotblas
 15 |     print('Fast BLAS found!')
 16 | except ImportError:
 17 |     print('Slow BLAS will be used...')
 18 | 
 19 | import scipy.fftpack as spfft
 20 | from scipy import io, sparse, signal
 21 | import cPickle as pickle
 22 | import IOMethods as IO
 23 | 
 24 | 
 25 | # Utilities for Analysis
 26 | def ha2Pa3d(ha, N = 1024):
 27 |         """
 28 |             Method to produce the analysis polyphase Matrix "Pa"
 29 | 
 30 |             Arguments :
 31 |                 ha    :  (1D Array) Windowing function (Cosine Modulation)
 32 |                 N     :  (int) Number of subbands
 33 | 
 34 |             Returns :
 35 |                 Pa  :  (3D Array) Analysis polyphase matrix
 36 | 
 37 | 
 38 |             Usage : Pa=ha2Pa3d(ha, N)
 39 |             Author : Gerald Schuller ('shl'), Dec/2/15
 40 |         """
 41 | 
 42 |         # Initialize
 43 |         L=len(ha)
 44 |         blocks=int(np.ceil(L/N))
 45 |         Pa=np.zeros((N,N,blocks), dtype = np.float32)
 46 | 
 47 |         # Construction loops
 48 |         for k in range(N): # Over subbands
 49 |           for m in range(blocks):  # Over block numbers
 50 |             for nphase in range(N): # Over Phase
 51 |               n=m*N+nphase
 52 | 
 53 |               # Indexing like impulse response, phase index is reversed (N-np):
 54 |               Pa[N-1-nphase,k,m]=ha[n]*np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(blocks*N-1-n-N/2.0+0.5))
 55 | 
 56 | 	    return Pa
 57 | 
 58 | def ha2Fa3d_fast(qmfwin,N):
 59 |         """
 60 |             Method to produce the analysis polyphase Matrix "Pa".
 61 |             An optimized version
 62 | 
 63 |             Arguments  :
 64 |                 qmfwin :  (1D Array) Windowing function (Cosine Modulation)
 65 |                 N      :  (int) Number of subbands
 66 | 
 67 |             Returns  :
 68 |                 Fa   :  (3D Array) Analysis polyphase matrix
 69 | 
 70 | 
 71 |             Usage : Pa = ha2Fa3d_fast(qmfwin, N)
 72 |             Author : Gerald Schuller ('shl'), Jan/23/16
 73 |         """
 74 | 
 75 |         # Initialization
 76 |         Fa=np.zeros((N,N,overlap), dtype = np.float32)
 77 | 
 78 |         # Construction loop
 79 |         for m in range(overlap/2):
 80 |            Fa[:,:,2*m]+=np.fliplr(np.diag(np.flipud(-qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=-N/2))
 81 |            Fa[:,:,2*m]+=(np.diag(np.flipud(qmfwin[m*2*N+N/2:(m*2*N+N)]*((-1)**m)),k=N/2))
 82 |            Fa[:,:,2*m+1]+=(np.diag(np.flipud(qmfwin[m*2*N+N : np.int(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
 83 |            Fa[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(qmfwin[np.int(m*2*N+1.5*N) :(m*2*N+2*N)]*((-1)**m)),k=N/2))
 84 | 
 85 |         return Fa
 86 | 
 87 | def ha2Fa3d_sinmod_fast(qmfwin, N):
 88 |         """
 89 |             Method to produce the analysis polyphase Matrix "Pa".
 90 |             An optimized version
 91 | 
 92 |             Arguments  :
 93 |                 qmfwin :  (1D Array) Windowing function (Cosine Modulation)
 94 |                 N      :  (int) Number of subbands
 95 | 
 96 |             Returns  :
 97 |                 Fa   :  (3D Array) Analysis polyphase matrix
 98 | 
 99 | 
100 |             Usage : Pa = ha2Fa3d_sinmod_fast(qmfwin, N)
101 |             Author : Gerald Schuller ('shl'), Jan/29/16
102 |         """
103 | 
104 |         # Initialization
105 |         Fa=np.zeros((N,N,overlap), dtype = np.float32)
106 | 
107 |         # Construction loop
108 |         for m in range(overlap/2):
109 |            Fa[:,:,2*m]+=np.fliplr(np.diag(np.flipud(qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=-N/2))
110 |            Fa[:,:,2*m]+=(np.diag(np.flipud(qmfwin[m*2*N+N/2:(m*2*N+N)]*((-1)**m)),k=N/2))
111 |            Fa[:,:,2*m+1]+=(np.diag(np.flipud(qmfwin[m*2*N+N:np.int(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
112 |            Fa[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(-qmfwin[np.int(m*2*N+1.5*N):(m*2*N+2*N)]*((-1)**m)),k=N/2))
113 | 
114 |         return Fa
115 | 
116 |     # Utilities for Synthesis
117 | 
118 | # Utilities for Synthesis
119 | def hs2Ps3d(hs,N):
120 |         """
121 |             Method to produce the synthesis polyphase Matrix "Pa"
122 | 
123 |             Arguments :
124 |                 hs    :  (1D Array) Windowing function (Cosine Modulation for synthesis)
125 |                 N     :  (int) Number of subbands
126 | 
127 |             Returns :
128 |                 Pa  :  (3D Array) Analysis polyphase matrix
129 | 
130 | 
131 |             Usage : Pa=hs2Ps3d(ha, N)
132 |             Author : Gerald Schuller ('shl'), Dec/2/15
133 |         """
134 | 
135 |         # Initialize
136 |         L=len(hs);
137 |         blocks=int(np.ceil(L/N))
138 |         Ps=np.zeros((N,N,blocks), dtype = np.float32)
139 | 
140 |         # Construction loops
141 |         for k in range(N): # Over subbands
142 |           for m in range(blocks):  # Over block numbers
143 |             for nphase in range(N): # Over Phase
144 |               n=m*N+nphase
145 | 
146 |               # Synthesis
147 |               Ps[k,nphase,m]=hs[n]*np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(n-N/2.0+0.5))
148 | 
149 |         return Ps
150 | 
151 | def hs2Fs3d_fast(qmfwin, N):
152 |     """
153 |         Method to produce the synthesis polyphase Matrix "Pa".
154 |         An optimized version
155 | 
156 |         Arguments  :
157 |             qmfwin :  (1D Array) Windowing function (Cosine Modulation)
158 |             N      :  (int) Number of subbands
159 | 
160 |         Returns  :
161 |             Fs   :  (3D Array) Synthesis polyphase matrix
162 | 
163 | 
164 |         Usage : Fs = hs2Fs3d_fast(qmfwin, N)
165 |         Author : Gerald Schuller ('shl'), Jan/23/15
166 | 
167 |     """
168 | 
169 |     # Initialize
170 |     global overlap
171 |     Fs = np.zeros((N,N,overlap), dtype = np.float32)
172 | 
173 |     # Construction loop
174 |     for m in range(overlap/2):
175 |         Fs[:,:,2*m]+=np.fliplr(np.diag(np.flipud(qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=N/2))
176 |         Fs[:,:,2*m]+=(np.diag((qmfwin[m*2*N+N/2:np.int(m*2*N+N)]*((-1)**m)),k=N/2))
177 |         Fs[:,:,2*m+1]+=(np.diag((qmfwin[m*2*N+N:np.int(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
178 |         Fs[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(-qmfwin[np.int(m*2*N+1.5*N):(m*2*N+2*N)]*((-1)**m)),k=-N/2))
179 | 
180 | 
181 |     # Avoid sign change after reconstruction
182 |     return -Fs
183 | 
184 | def hs2Fs3d_sinmod_fast(qmfwin,N):
185 |     """
186 |         Method to produce the synthesis polyphase Matrix "Pa".
187 |         An optimized version
188 | 
189 |         Arguments  :
190 |             qmfwin :  (1D Array) Windowing function (Cosine Modulation)
191 |             N      :  (int) Number of subbands
192 | 
193 |         Returns    :
194 |             Fs     :  (3D Array) Synthesis polyphase matrix
195 | 
196 |            Usage : Fs = hs2Fs3d_sinmod_fast(qmfwin, N)
197 |            Author : Gerald Schuller ('shl'), Jan/29/16
198 |     """
199 | 
200 |     # Initialize
201 |     global overlap
202 |     Fs=np.zeros((N,N,overlap), dtype = np.float32)
203 | 
204 |     # Construction loop
205 |     for m in range(overlap/2):
206 |         Fs[:,:,2*m]+=np.fliplr(np.diag(np.flipud(-qmfwin[m*2*N:(m*2*N+N/2)]*((-1)**m)),k=N/2))
207 |         Fs[:,:,2*m]+=(np.diag((qmfwin[m*2*N+N/2:(m*2*N+N)]*((-1)**m)),k=N/2))
208 |         Fs[:,:,2*m+1]+=(np.diag((qmfwin[m*2*N+N : np.int(m*2*N+1.5*N)]*((-1)**m)),k=-N/2))
209 |         Fs[:,:,2*m+1]+=np.fliplr(np.diag(np.flipud(qmfwin[ np.int(m*2*N+1.5*N) : (m*2*N+2*N)]*((-1)**m)),k=-N/2))
210 | 
211 |     # Avoid sign change after reconstruction
212 |     return -Fs
213 | 
214 |     # Other Utilities
215 | 
216 | # Matrix Utilities
217 | def DCToMatrix(N):
218 |     """
219 |         Method to create an odd DCT Matrix.
220 | 
221 |         Arguments  :
222 |             N      :  (int) Number of subbands
223 | 
224 |         Returns    :
225 |             y      :  (3D Array) Odd DCT Mmtrix
226 | 
227 |            Author  : Gerald Schuller ('shl'), Dec. 2015
228 |     """
229 | 
230 |     # Initialize
231 |     y=np.zeros((N,N,1), dtype = np.float32);
232 | 
233 |     # Construction loops
234 |     for n in range(N):
235 |         for k in range(N):
236 |             y[n,k,0]=np.sqrt(2.0/N)*np.cos(np.pi/N*(k+0.5)*(n+0.5))
237 |             #y(n,k)=cos(pi/N*(k-0.5)*(n-1))
238 | 
239 |     return y
240 | 
241 | def DSToMatrix(N):
242 |     """
243 |         Method to create an odd DST Matrix.
244 | 
245 |         Arguments  :
246 |                N   :  (int) Number of subbands
247 | 
248 |         Returns    :
249 |                y   :  (3D Array) Odd DST Mmtrix
250 | 
251 |         Author : Gerald Schuller ('shl'), Jan. 2016
252 |     """
253 | 
254 |     # Initialize
255 |     y=np.zeros((N,N,1), dtype = np.float32);
256 | 
257 |     # Construction Loops
258 |     for n in range(N):
259 |         for k in range(N):
260 |             y[n,k,0]=np.sqrt(2.0/N)*np.sin(np.pi/N*(k+0.5)*(n+0.5));
261 |             #y(n,k)=cos(pi/N*(k-0.5)*(n-1));
262 | 
263 |     return y
264 | 
265 | def polmatmult(A, B):
266 |     """
267 |         Method for multiplying 2 polynomial matrices A and B,
268 |         where each matrix entry is a polynomial, e.g. in z^-1.
269 |         Those polynomial entries are in the 3rd dimension.
270 |         The third dimension can also be interpreted as containing
271 |         the (2D) coefficient matrices for each exponent of z^-1.
272 |         Result is C=A*B;
273 | 
274 |             Arguments  :
275 |                 A      :  (ND Array) Matrix
276 |                 B      :  (ND Array) Matrix
277 | 
278 |             Returns    :
279 |                 C      :  (3D Array) Matrix
280 | 
281 |             Author : Gerald Schuller ('shl'), Dec. 2015
282 |     """
283 | 
284 |     # Initialize
285 |     [NAx,NAy,NAz]=A.shape
286 |     [NBx,NBy,NBz]=B.shape
287 |     #Degree +1 of resulting polynomial, with NAz-1 and NBz-1 beeing the degree of the input  polynomials:
288 |     Deg=NAz+NBz-1
289 |     C=np.zeros((NAx,NBy,Deg), dtype = np.float32)
290 | 
291 |     for n in range(Deg):
292 |         for m in range(n+1):
293 |             if ((n-m)<NAz and m<NBz):
294 |                 C[:,:,n]=C[:,:,n]+ A[:,:,(n-m)].dot(B[:,:,m])
295 |                 #sparse version:
296 |                 #C[:,:,n]=C[:,:,n]+ (sparse.csr_matrix(A[:,:,(n-m)]).dot(sparse.csr_matrix(B[:,:,m]))).todense()
297 |     return C
298 | 
299 | def reset_rt():
300 |     """
301 |         Method to reset the block buffers
302 |     """
303 | 
304 |     global blockmemorysyn
305 |     global blockmemory
306 |     global overlap
307 | 
308 |     blockmemory = np.zeros((overlap,N))
309 |     blockmemorysyn = np.zeros((overlap,N))
310 |     blockmemory_sin = np.zeros((overlap, N))
311 |     blockmemorysyn_sin = np.zeros((overlap, N))
312 | 
313 |     print('Block Memories Resetted')
314 | 
315 |     return None
316 | 
317 | #The DCT4 transform:
318 | def DCT4(samples):
319 |     """
320 |         Method to create DCT4 transformation using DCT3
321 | 
322 |         Arguments   :
323 | 
324 |             samples : (1D Array) Input samples to be transformed
325 | 
326 |         Returns     :
327 | 
328 |             y       :  (1D Array) Transformed output samples
329 | 
330 |     """
331 | 
332 |     # Initialize
333 |     samplesup=np.zeros(2*N, dtype = np.float32)
334 |     # Upsample signal:
335 |     samplesup[1::2]=samples
336 | 
337 |     y=spfft.dct(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
338 | 
339 |     return y[0:N]
340 | 
341 | #The DST4 transform:
342 | def DST4(samples):
343 |     """
344 |         Method to create DST4 transformation using DST3
345 | 
346 |         Arguments   :
347 |             samples : (1D Array) Input samples to be transformed
348 | 
349 |         Returns     :
350 |             y       :  (1D Array) Transformed output samples
351 | 
352 |     """
353 | 
354 |     # Initialize
355 |     samplesup=np.zeros(2*N, dtype = np.float32)
356 | 
357 |     # Upsample signal
358 |     # Reverse order to obtain DST4 out of DCT4:
359 |     #samplesup[1::2]=np.flipud(samples)
360 |     samplesup[0::2] = samples
361 |     y = spfft.dst(samplesup,type=3,norm='ortho')*np.sqrt(2)#/2
362 | 
363 |     # Flip sign of every 2nd subband to obtain DST4 out of DCT4
364 |     #y=(y[0:N])*(((-1)*np.ones(N, dtype = np.float32))**range(N))
365 | 
366 |     return y[0: N]
367 | 
368 | def x2polyphase(x, N):
369 |     """
370 |         Method to convert input signal x (a row vector) into a polphase row vector
371 | 	    of blocks with length N.
372 | 
373 |         Arguments   :
374 |             x       : (1D Array) Input samples
375 |             N       : (int)  Number of subbands
376 | 
377 |         Returns     :
378 |             y       :  (3D Array) Polyphase representation of the input signal
379 | 
380 |         Author : Gerald Schuller ('shl'), Dec/2/15
381 |     """
382 | 
383 |     #Number of blocks in the signal:
384 |     L=int(np.floor(len(x)/N))
385 | 
386 |     xp=np.zeros((1,N,L), dtype = np.float32)
387 | 
388 |     for m in range(L):
389 |         xp[0,:,m] = x[m*N: (m*N+N)]
390 | 
391 |     return xp
392 | 
393 | def polyphase2x(xp):
394 |     """
395 |         Method to convert a polyphase input signal xp (a row vector) into a contiguous row vector.
396 | 
397 |         Arguments   :
398 |             xp      : (3D Array) Input Polyphase representation
399 | 
400 |         Returns     :
401 |             x       : (1D Array) Output row vector signal
402 | 
403 |         Author : Gerald Schuller ('shl'), Aug/24/11
404 |     """
405 | 
406 |     #Number of blocks in the signal:
407 |     [r,N,b] = xp.shape
408 |     L = b
409 | 
410 |     x = np.zeros(N*L, dtype = np.float32);
411 | 
412 |     for m in range(L):
413 |         #print x[(m*N):((m+1)*N)].shape, xp[0,:,m].shape
414 |         x[(m*N):(m+1)*N] = xp[0,:,m]
415 | 
416 |     return x
417 | 
418 | # Parameters
419 | # Number of subbands of the QMF filter bank
420 | N = 1024
421 | 
422 | # QMF Window
423 | qmfwin = np.loadtxt('QMF/qmf1024qn.mat').astype(np.float32)
424 | qmfwin = np.hstack((qmfwin, qmfwin[::-1]))
425 | 
426 | # Overlapped frames
427 | overlap = 8
428 | 
429 | # Block frames for analysis and synthesis (Cosine)
430 | blockmemory = np.zeros((overlap, N), dtype = np.float32)
431 | blockmemorysyn = np.zeros((overlap, N), dtype = np.float32)
432 | 
433 | # Block frames for analysis and synthesis (Sine)
434 | blockmemory_sin = np.zeros((overlap, N), dtype = np.float32)
435 | blockmemorysyn_sin = np.zeros((overlap, N), dtype = np.float32)
436 | 
437 | # Analysis/Synthesis Matrices
438 | FsCos = hs2Fs3d_fast(qmfwin, N)
439 | FsSin = hs2Fs3d_sinmod_fast(qmfwin, N)
440 | FaCos = ha2Fa3d_fast(qmfwin, N)
441 | FaSin = ha2Fa3d_sinmod_fast(qmfwin, N)
442 | 
443 | class PQMFAnalysis():
444 |     """
445 |         Implements a real time Pseudo Quadrature Mirror Filter Bank (Analysis part).
446 |         "Real time" means: it reads a block of N samples in and generates a block of N spectral samples from the QMF.
447 |         The QMF implementaion uses internal memory to accomodate the window overlap.
448 |         Gerald Schuller, gerald.schuller@tu-ilmenau.de, January 2016.
449 |     """
450 |     @staticmethod
451 |     def analysisqmf_realtime(xrt, N = 1024):
452 |         """
453 |             Method to compute the QMF subband samples for each real time input block xrt.
454 |             Conducts an implicit polynomial multiplication with folding matrix Fa of
455 |             the polyphase matrix of the QMF filter bank, using
456 |             internal memory of the past input blocks.
457 | 
458 |             Arguments   :
459 |                 xrt     : (1D Array) Input signal (block-based)
460 |                 N       : (int)      Number of sub-bands
461 | 
462 |             Returns     :
463 |                 y       : (2D Array) Output of QMF analysis matrix (Cosine)
464 | 
465 |             Author : Gerald Schuller ('shl')
466 |         """
467 | 
468 |         global blockmemory
469 |         global overlap
470 | 
471 |         # Push down old blocks:
472 |         blockmemory[0:(overlap-1),:]=blockmemory[1:(overlap),:]
473 | 
474 |         # Write new block into top of memory stack:
475 |         blockmemory[overlap-1,:]=xrt;
476 |         y=np.zeros((1,N), dtype = np.float32);
477 | 
478 |         for m in range(overlap):
479 |            y += np.dot(np.array([blockmemory[overlap-1-m,:]]), FaCos[:,:,m])
480 |            #y+= (sparse.csr_matrix(blockmemory[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
481 | 
482 |         # Fast DCT4:
483 |         y = DCT4(y)
484 |         return y
485 | 
486 |     @staticmethod
487 |     def analysisqmf_sinmod_realtime(xrt, N = 1024):
488 |         """
489 |             Method to compute the QMF subband samples for each real time input block xrt.
490 |             Conducts an implicit polynomial multiplication with folding matrix Fa of
491 |             the polyphase matrix of the QMF filter bank, using
492 |             internal memory of the past input blocks.
493 | 
494 |             Arguments   :
495 |                 xrt     : (1D Array) Input signal (block-based)
496 |                 N       : (int)      Number of sub-bands
497 | 
498 |             Returns     :
499 |                 y       : (2D Array) Output of QMF analysis matrix (Sine)
500 | 
501 |             Author : Gerald Schuller ('shl')
502 |         """
503 | 
504 |         global blockmemory_sin
505 |         global overlap
506 | 
507 |         # Push down old blocks:
508 |         blockmemory_sin[0:(overlap-1),:] = blockmemory_sin[1:(overlap),:]
509 | 
510 |         # Write new block into top of memory stack
511 | 
512 |         blockmemory_sin[overlap-1,:] = xrt
513 |         y=np.zeros((1, N), dtype = np.float32)
514 | 
515 |         for m in range(overlap):
516 |            y += np.dot(np.array([blockmemory_sin[overlap-1-m,:]]), FaSin[:,:,m])
517 |            #y+= (sparse.csr_matrix(blockmemory_sin[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
518 | 
519 |         #Fast DST4
520 |         y = DST4(y)
521 | 
522 |         return y
523 | 
524 |     @staticmethod
525 |     def complex_analysis_realtime(xrt, N = 1024):
526 |         """
527 |             Method to compute the QMF subband samples for each real time input block xrt.
528 |             Conducts an implicit polynomial multiplication with folding matrix Fa of
529 |             the polyphase matrix of the QMF filter bank, using
530 |             internal memory of the past input blocks. A complex output matrix will be
531 |             computed using DCT and DST.
532 | 
533 |             Arguments   :
534 |                 xrt     : (1D Array) Input signal (block-based)
535 |                 N       : (int)      Number of sub-bands
536 | 
537 |             Returns     :
538 |                 y       : (2D Array) Complex output of QMF analysis matrix (Cosine)
539 | 
540 |             Authors     : Gerald Schuller('shl'), S.I. Mimilakis ('mis')
541 |         """
542 | 
543 |         global blockmemory_sin, blockmemory
544 |         global overlap
545 | 
546 |         # Push down old blocks:
547 |         blockmemory[0:(overlap-1),:] = blockmemory[1:(overlap),:]
548 |         blockmemory_sin[0:(overlap-1),:] = blockmemory_sin[1:(overlap),:]
549 | 
550 |         # Write new block into top of memory stack
551 |         blockmemory_sin[overlap-1,:] = xrt
552 |         blockmemory[overlap-1,:] = xrt
553 | 
554 |         # Initialize storing vectors
555 |         y = np.empty((1, N), dtype = complex)
556 |         ycos = np.zeros((1, N), dtype = np.float32)
557 |         ysin = np.zeros((1, N), dtype = np.float32)
558 | 
559 |         for m in range(overlap):
560 |             ycos += np.dot(np.array([blockmemory[overlap-1-m,:]]), FaCos[:,:,m])
561 |             ysin += np.dot(np.array([blockmemory_sin[overlap-1-m,:]]), FaSin[:,:,m])
562 | 
563 |         # Fast DCT4
564 |         ycos = DCT4(ycos)
565 |         # Fast DST4
566 |         ysin = DST4(ysin)
567 | 
568 |         y = ycos + (1j*ysin)
569 | 
570 |         return y
571 | 
572 |     @staticmethod
573 |     def compute_pseudo_magnitude(ms):
574 |         """
575 |             Method to compute the regularized magnitude spectrum for real-valued
576 |             (cosine modulated) signal representations.
577 | 
578 |             References :
579 |                 [1] L. Daudet and M. Sandler, "MDCT analysis of sinusoids: exact
580 |                 results and applications to coding artifacts reduction,"
581 |                 in IEEE Transactions on Speech and Audio Processing, vol. 12,
582 |                 no. 3, pp. 302-312, May 2004.
583 | 
584 | 
585 |             Arguments   :
586 |                 ms      : (2D Array)  Input Time-Frequency representation (timeframes, sub-bands).
587 | 
588 |             Returns     :
589 |                 mX      : (2D Array)  Regularized Magnitude Coefficients (timeframes, sub-bands).
590 | 
591 |             Authors     : S.I. Mimilakis ('mis')
592 |         """
593 |         mX = np.zeros((ms.shape[0], ms.shape[1]), dtype=np.float32)
594 | 
595 |         for indx in xrange(ms.shape[0]):
596 |             cfr = np.zeros((ms.shape[1],), dtype=np.float32)
597 |             for csamples in xrange(1, len(cfr) - 1):
598 |                 cfr[csamples] = ms[indx, csamples] ** 2. + (ms[indx, csamples - 1] - ms[indx, csamples + 1]) ** 2.
599 |             mX[indx, :] = cfr[:]
600 | 
601 |         return mX
602 | 
603 |     @staticmethod
604 |     def analyseNStore(filePath, N = 1024, saveStr = 'Analysis_QMF.p'):
605 |         """
606 |             PQMF analysis and storing into pickle.
607 | 
608 |             Arguments   :
609 |                 filePath: (string)   Path for the input wave file
610 |                 N       : (int)      Number of sub-bands
611 |                 saveStr : (string)   Output filename
612 | 
613 |             Authors     : Gerald Schuller('shl'), S.I. Mimilakis ('mis')
614 |         """
615 | 
616 |         x, fs = IO.AudioIO.wavRead(filePath, mono = False)
617 | 
618 |         if x.shape[1] == 1 :
619 |             timeSlots = len(x)/N
620 | 
621 |             ms = np.empty((timeSlots, N), dtype = complex)
622 |             for indx in xrange(timeSlots):
623 |                 ms[indx, :] = PQMFAnalysis.complex_analysis_realtime(x[indx * N : (indx + 1) * N, 0], N)
624 | 
625 |         elif x.shape[1] == 2 :
626 |             timeSlots = len(x[:, 0])/N
627 |             ms = np.empty((2, timeSlots, N), dtype = complex)
628 | 
629 |             for channelIndx in xrange(2) :
630 |                 for indx in xrange(timeSlots):
631 |                     ms[channelIndx, indx, :] = PQMFAnalysis.complex_analysis_realtime(x[indx * N : (indx + 1) * N, channelIndx], N)
632 | 
633 | 
634 |         if saveStr[-1] == 'p' :
635 |             pickle.dump(ms, open(saveStr, 'wb'))
636 |         else :
637 |             io.savemat(saveStr, dict(analysis = ms))
638 | 
639 |         reset_rt()
640 |         return None
641 | 
642 | class PQMFSynthesis() :
643 | 
644 |     """
645 |         Implements a real time Pseudo Quandrature Mirror Filter Bank (Synthesis part).
646 |         "Real time" means: it reads a block of N samples in and generates a block of N spectral samples from the QMF.
647 |         The QMF implementaion uses internal memory to accomodate the window overlap.
648 |         Gerald Schuller, gerald.schuller@tu-ilmenau.de, January 2016
649 |     """
650 | 
651 |     @staticmethod
652 |     def synthesisqmf_realtime(y, N = 1024):
653 |         """
654 |             Method to compute the inverse QMF for each subband block y,
655 |             conducts an implicit polynomial multiplication with synthesis folding matrix Fs
656 |             of the synthesis polyphase matrix of the QMF filter bank, using
657 |             internal memory for future output blocks.
658 | 
659 |             Arguments   :
660 |                 y       : (2D Array) Analysed QMF Matrix
661 |                 N       : (int)      Number of sub-bands
662 | 
663 |             Returns     :
664 |                 xrek    : (1D Array) Reconstructed signal
665 | 
666 |             Author : Gerald Schuller ('shl')
667 |         """
668 |         global blockmemorysyn
669 |         global overlap
670 | 
671 |         # Push down old blocks:
672 |         blockmemorysyn[0:(overlap-1),:]=blockmemorysyn[1:(overlap), :]
673 | 
674 |         # Avoid leaving previous values in top of memory
675 |         blockmemorysyn[overlap-1,:]=np.zeros((1, N), dtype = np.float32)
676 | 
677 |         #print "memory content synthesis: ", np.sum(np.abs(blockmemorysyn))
678 |         #print "Fs.shape =", Fs.shape
679 |         #print "y.shape= ", y.shape
680 | 
681 |         #Overlap-add after fast (inverse) DCT4::
682 |         for m in range(overlap):
683 |            blockmemorysyn[m,:] += np.dot(DCT4(y), FsCos[:,:,m])
684 |            #y+= (sparse.csr_matrix(blockmemory[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
685 | 
686 |         xrek = blockmemorysyn[0,:]
687 | 
688 |         return xrek
689 | 
690 |     @staticmethod
691 |     def synthesisqmf_sinmod_realtime(y, N = 1024):
692 |         """
693 |             Method to compute the inverse QMF for each subband block y,
694 |             conducts an implicit polynomial multiplication with synthesis folding matrix Fs
695 |             of the synthesis polyphase matrix of the QMF filter bank, using
696 |             internal memory for future output blocks.
697 | 
698 |             Arguments   :
699 |                 y       : (2D Array) Analysed QMF Matrix
700 |                 N       : (int)      Number of sub-bands
701 | 
702 |             Returns     :
703 |                 xrek    : (1D Array) Reconstructed signal
704 | 
705 |             Author : Gerald Schuller ('shl')
706 |         """
707 | 
708 |         global blockmemorysyn_sin
709 |         global overlap
710 | 
711 |         #print "overlap= ", overlap
712 | 
713 |         # Push down old blocks:
714 |         blockmemorysyn_sin[0:(overlap-1),:]=blockmemorysyn_sin[1:(overlap),:]
715 |         blockmemorysyn_sin[overlap-1,:]=np.zeros((1,N)) #avoid leaving previous values in top of memory.
716 | 
717 |         #print "memory content synthesis: ", np.sum(np.abs(blockmemorysyn))
718 |         #print "Fs.shape =", Fs.shape
719 |         #print "y.shape= ", y.shape
720 | 
721 |         # Overlap-add after fast (inverse) DCT4::
722 |         for m in range(overlap):
723 |            blockmemorysyn_sin[m,:] += np.dot(DST4(y), FsSin[:,:,m])
724 |            #y+= (sparse.csr_matrix(blockmemorysyn_sin[overlap-1-m,:]).dot(sparse.csr_matrix(Fa[:,:,m]))).todense()
725 |         xrek = blockmemorysyn_sin[0,:]
726 | 
727 |         return xrek
728 | 
729 |     @staticmethod
730 |     def complex_synthesis_realtime(ycomp, N = 1024):
731 |         """
732 |             Method to compute the inverse QMF for each subband block y,
733 |             conducts an implicit polynomial multiplication with synthesis folding matrix Fs
734 |             of the synthesis polyphase matrix of the QMF filter bank, using
735 |             internal memory for future output blocks. A complex input matrix will be
736 |             expected.
737 | 
738 |              Arguments   :
739 |                  xrt     : (1D Array) Input signal (block-based)
740 |                  N       : (int)      Number of sub-bands
741 |             Returns     :
742 |                  xrek   : (1D Array) Reconstructed signal
743 | 
744 |             Authors     : Gerald Schuller('shl'), S.I. Mimilakis ('mis')
745 |         """
746 | 
747 |         global blockmemorysyn_sin, blockmemorysyn
748 |         global overlap
749 | 
750 |         # Push down old blocks:
751 |         blockmemorysyn[0:(overlap-1),:] = blockmemorysyn[1:(overlap), :]
752 |         blockmemorysyn_sin[0:(overlap-1),:] = blockmemorysyn_sin[1:(overlap),:]
753 |         # Avoid leaving previous values in top of memory.
754 |         blockmemorysyn_sin[overlap-1,:] = np.zeros((1,N), dtype = np.float32)
755 |         blockmemorysyn[overlap-1,:] = np.zeros((1, N), dtype = np.float32)
756 | 
757 |         # Overlap-add after fast (inverse) DCT4:
758 |         for m in range(overlap):
759 |             blockmemorysyn[m,:] += np.dot(DCT4(np.real(ycomp)), FsCos[:,:,m])
760 |             blockmemorysyn_sin[m,:] += np.dot(DST4(np.imag(ycomp)), FsSin[:,:,m])
761 | 
762 |         xrek = 0.5 * (blockmemorysyn[0,:] + blockmemorysyn_sin[0, :])
763 |         #xrek = blockmemorysyn_sin[0, :]
764 |         #xrek = blockmemorysyn[0,:]
765 | 
766 |         return xrek
767 | 
768 |     @staticmethod
769 |     def loadNRes(filePath, fs = 44100, saveStr = 'SynthesisQMF.wav'):
770 |         """
771 |             PQMF resynthesis from stored into pickle.
772 | 
773 |             Arguments   :
774 |                 filePath: (string)   Path for the input analysis pickle file
775 |                 fs      : (int)      Sampling Frequency
776 |                 saveStr : (string)   Output filename (wave)
777 | 
778 |             Authors     : Gerald Schuller('shl'), S.I. Mimilakis ('mis')
779 |         """
780 | 
781 |         ms = pickle.load(open(filePath, 'rb'))
782 | 
783 |         if len(ms.shape) == 2:
784 |             timeSlots = ms.shape[0]
785 |             N = ms.shape[1]
786 | 
787 |             yrec = np.empty((timeSlots*N, 1))
788 |             for indx in xrange(timeSlots):
789 |                 yrec[indx * N : (indx + 1) * N, 0] = PQMFSynthesis.complex_synthesis_realtime(ms[indx, :], N)
790 | 
791 |             IO.AudioIO.wavWrite(yrec, fs, 16, saveStr)
792 |             reset_rt()
793 | 
794 |         elif len(ms.shape) == 3:
795 |             timeSlots = ms.shape[1]
796 |             N = ms.shape[1]
797 |             channels = ms.shape[0]
798 | 
799 |             yrec = np.empty((timeSlots*N, channels))
800 |             for channelIndx in xrange(channels):
801 |                 for indx in xrange(timeSlots):
802 |                     yrec[indx * N : (indx + 1) * N, channelIndx] = PQMFSynthesis.complex_synthesis_realtime(ms[channelIndx, indx, :], N)
803 |                 reset_rt()
804 | 
805 |             IO.AudioIO.wavWrite(yrec, fs, 16, saveStr)
806 | 
807 |         return None
808 | 
809 | if __name__ == '__main__':
810 | 
811 |     # Examples using the class
812 |     import qmf_realtime_class as qmf
813 |     import IOMethods as IO
814 |     import numpy as np
815 |     import cPickle  as pickle
816 |     import os
817 |     import matplotlib.pyplot as plt
818 | 
819 |     x, fs = IO.AudioIO.wavRead('mixed.wav', mono = True)
820 | 
821 |     N = 1024
822 |     timeSlots = len(x)/1024
823 | 
824 |     # Initialize analysis matrix
825 |     ms = np.empty((timeSlots, N), dtype = complex)
826 | 
827 |     # Complex analysis, with internal storing
828 |     for indx in xrange(timeSlots):
829 |         ms[indx, :] = qmf.PQMFAnalysis.complex_analysis_realtime(x[indx * N : (indx + 1) * N, 0], N)
830 | 
831 |     # Magnitude
832 |     plt.figure(1)
833 |     plt.imshow(np.abs((ms[:500,:].T)), aspect='auto', interpolation='nearest', origin='lower', cmap='jet')
834 |     plt.draw()
835 |     plt.show()
836 | 
837 |     # Phase
838 |     plt.figure(2)
839 |     plt.imshow((np.angle(ms[:500,:].T)), aspect='auto', interpolation='nearest', origin='lower', cmap='jet')
840 |     plt.draw()
841 |     plt.show()
842 | 
843 |     # Complex synthesis, from internal variable
844 |     yrec = np.empty((timeSlots*N, 1))
845 |     for indx in xrange(timeSlots):
846 |         yrec[indx * N : (indx + 1) * N, 0] = qmf.PQMFSynthesis.complex_synthesis_realtime(ms[indx, :], N)
847 | 
848 |     # Reset Internal buffers!!!
849 |     qmf.reset_rt()
850 | 
851 |     filePathWave = 'mixed.wav'
852 |     # Complex analysis, with external storing using pickle
853 |     qmf.PQMFAnalysis.analyseNStore(filePathWave, N = 1024, saveStr = 'Analysis_QMF.p')
854 | 
855 |     filePathPickle = 'Analysis_QMF.p'
856 |     # Load complex matrix from the analysis
857 |     ycomp = pickle.load(open(filePathPickle, 'rb'))
858 | 
859 |     # Magnitude
860 |     plt.figure(3)
861 |     plt.imshow(np.abs((ycomp[:500,:].T)), aspect='auto', interpolation='nearest', origin='lower', cmap='jet')
862 |     plt.draw()
863 |     plt.show()
864 | 
865 |     # Phase
866 |     plt.figure(4)
867 |     plt.imshow(np.angle((ycomp[:500,:].T)), aspect='auto', interpolation='nearest', origin='lower', cmap='jet')
868 |     plt.draw()
869 |     plt.show()
870 | 
871 |     # Complex synthesis, from external file using pickle
872 |     qmf.PQMFSynthesis.loadNRes(filePathPickle, fs = 44100, saveStr = 'SynthesisQMF.wav')
873 | 
874 |     os.remove('Analysis_QMF.p')
875 |     os.remove('SynthesisQMF.wav')


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ASP
 2 | 
 3 | ### You have just found Audio Signal Processing (ASP).
 4 | It covers a range of utilities for I/O handling and Time-Frequency Decompositions and soon enough audio source separation methods.
 5 | Currently supported functionallity :
 6 | - WAV/MP3/AAC Reading and Writing
 7 | - Time Frequency Methods : MDCT/MDST/PQMF/STFT/FrFFT(Fractional FFT)
 8 | - Cepstral Analysis : Uniform Discrete Cepstrum
 9 | - Misc Operations : Bark Scaling, W-Disjoint Orthogonality Measure, Gini Index Sparsity Measure, Time-frequency Masking, Noise to Mask Ratio, Psychoacoustic Model (based on non-linear superposition) 
10 | 
11 | For code usage, please refer to each class. Examples are given inside method or in the "main()" call.
12 | 
13 | ### Requirements :
14 | - NumPy version   '1.10.4' or later
15 | - SciPy version   '0.17.0' or later (Crucial for avoiding poor reconstruction for the complex PQMF)
16 | - cPickle version '1.71' or later
17 | - pyglet           For audio playback routines
18 | 


--------------------------------------------------------------------------------
/testFiles/mixed.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/Audio-Masking-Methods/64782c3ff9564b06677c2a2112b2b2bdb48dc2a1/testFiles/mixed.wav


--------------------------------------------------------------------------------
/testFiles/mt_test.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | __author__ = 'S.I. Mimilakis, G. Schuller'
  3 | __copyright__ = 'MacSeNet, TU Ilmenau'
  4 | import sys, os
  5 | current_dir = os.path.dirname(os.path.realpath(__file__))
  6 | sys.path.insert(0, current_dir + '/..')
  7 | 
  8 | import IOMethods as IO
  9 | import TFMethods as TF
 10 | import numpy as np
 11 | import matplotlib.pyplot as plt
 12 | import pyaudio
 13 | import wave
 14 | import time
 15 | import pygame
 16 | 
 17 | # Parameters
 18 | wsz = 1024
 19 | N = 2048
 20 | hop = 512
 21 | gain = 0.
 22 | 
 23 | # Reading
 24 | # File Reading
 25 | x, fs = IO.AudioIO.wavRead('testFiles/mixed.wav', mono = True)
 26 | x *= 0.1
 27 | # Cosine testq
 28 | #x = np.cos(np.arange(88200) * (1000.0 * (3.1415926 * 2.0) / 44100)) * 0.1
 29 | #fs = 44100
 30 | # Generate noise. Scaling was found experimentally to perfectly match the masking threshold magnitude.
 31 | noise = np.random.uniform(-30., 30., (len(x), 1))
 32 | 
 33 | # STFT/iSTFT Test
 34 | w = np.bartlett(wsz)
 35 | # Normalise windowing function
 36 | w = w / sum(w)
 37 | 
 38 | # Initialize psychoacoustic mode
 39 | pm = TF.PsychoacousticModel(N = N, fs = fs, nfilts = 60)
 40 | 
 41 | # Visual stuff
 42 | option = 'pygame'
 43 | 
 44 | # Pygame visual handles
 45 | pygame.init()
 46 | pygame.display.set_caption("Masking Threshold Visualization")
 47 | color = pygame.Color(255, 0, 0, 0)
 48 | color2 = pygame.Color(0, 255, 0, 0)
 49 | color3 = pygame.Color(0, 110, 0, 0)
 50 | background_color = pygame.Color(0, 0, 0, 0)
 51 | screen = pygame.display.set_mode((wsz + 40, 480))
 52 | screen.fill(background_color)
 53 | pygame.display.flip()
 54 | dBScales = np.linspace(-120, 0, 25)
 55 | dBpos = (dBScales/-120 * 480)
 56 | 
 57 | # Display  Labels
 58 | font = pygame.font.Font(None, 24)
 59 | xlabel = font.render("Frequency sub-bands", 1, (100, 100, 250))
 60 | ylabel = font.render("Magnitude in dB FS", 1, (100, 100, 250))
 61 | legendA = font.render("Magnitude Spectrum", 1, (250, 0, 0))
 62 | legendB = font.render("Estimated masking threshold", 1, (0, 250, 0))
 63 | ylabel = pygame.transform.rotate(ylabel, 90)
 64 | 
 65 | if option == 'matplotlib':
 66 | # Using matplotlib
 67 |     plt.ion()
 68 |     ax = plt.axes(xlim=(0, wsz), ylim=(-120, 0))
 69 |     line, = plt.plot(np.bartlett(wsz), label = 'Signal')
 70 |     line2, = plt.plot(np.bartlett(wsz), label = 'Masking Threshold')
 71 |     plt.xlabel('Frequency sub-bands')
 72 |     plt.ylabel('dB FS')
 73 |     plt.legend(handles=[line, line2])
 74 |     plt.show()
 75 | 
 76 | # Main Loop
 77 | run = True
 78 | while run == True:
 79 |     # Initialize sound pointers and buffers
 80 |     pin = 0
 81 |     pend = x.size - wsz - hop
 82 |     b_mX = np.zeros((1, wsz + 1), dtype = np.float32)
 83 |     b_nX = np.zeros((1, wsz + 1), dtype = np.float32)
 84 |     mt = np.zeros((1, wsz + 1), dtype = np.float32)
 85 |     output_buffer = np.zeros((1, wsz), dtype = np.float32)
 86 |     ola_buffer = np.zeros((1, wsz+hop), dtype = np.float32)
 87 |     prv_seg = np.zeros((1, 1024), dtype = np.float32)
 88 |     noiseMagnitude = np.ones((1, 1025), dtype = np.float32)
 89 | 
 90 |     # For less frequent plotting to avoide buffer underruns
 91 |     indx = 0
 92 | 
 93 |     # Reshaping the signal
 94 |     x = x.reshape(len(x), 1)
 95 | 
 96 |     # Streaming
 97 |     p = pyaudio.PyAudio()
 98 |     stream = p.open(format=pyaudio.paFloat32, channels=1, rate=fs,output=True, frames_per_buffer = wsz)
 99 | 
100 |     while pin <= pend:
101 | 
102 |         # Key events for the arrows. Up Arrow : + 1 dB Down Arrow : -1 dB
103 |         cevent = pygame.event.get()
104 |         if cevent != [] :
105 |             if cevent[0].type == pygame.KEYDOWN:
106 |                 if cevent[0].key == pygame.K_q:
107 |                     run = False
108 |                     pygame.display.quit()
109 |                     pygame.quit()
110 |                     break
111 |                 elif cevent[0].key == 273 :
112 |                     gain += 1.
113 |                 elif cevent[0].key == 274 :
114 |                     gain -= 1.
115 | 
116 |         # Visual stuff
117 |         if indx % 10 == 0:
118 |             # Maximum quantization rate computation
119 |             bc = (np.log2( (b_mX[0, :] + 1e-16)/(mt[0, :] + 1e-16)))
120 |             bc = (np.mean(bc[bc >= 0 ]))
121 | 
122 |             if option == 'matplotlib' :
123 |                 # Matplotlib
124 |                 line.set_ydata(20. * np.log10(b_mX[0, :-1] + 1e-16))
125 |                 # Check for scaling the noise!
126 |                 line2.set_ydata(20. * np.log10(mt[0, :-1] + 1e-16))
127 |                 plt.draw()
128 |                 plt.pause(0.00001)
129 | 
130 |             else :
131 |                 # Pygame
132 |                 screen.fill(background_color)
133 |                 prv_pos = (60, 480)
134 |                 prv_pos2 = (60, 480)
135 |                 prv_pos3 = (60, 480)
136 |                 for n in xrange(0, wsz):
137 |                     val = 20. * np.log10(b_mX[0, n] + 1e-16)
138 |                     val2 = 20. * np.log10(mt[0, n] + 1e-16)
139 |                     val3 = 20. * np.log10(b_nX[0, n] * mt[0, n] + 1e-16)
140 |                     val3 /= -120
141 |                     val /= -120
142 |                     val2/= -120
143 |                     val *= 480
144 |                     val2 *= 480
145 |                     val3 *= 480
146 |                     position = (n + 60, int(val))
147 |                     position2 = (n + 60, int(val2))
148 |                     position3 = (n + 60, int(val3))
149 |                     pygame.draw.line(screen, color, prv_pos, position)
150 |                     pygame.draw.line(screen, color2, prv_pos2, position2)
151 |                     pygame.draw.line(screen, color3, prv_pos3, position3)
152 |                     prv_pos = position
153 |                     prv_pos2 = position2
154 |                     prv_pos3 = position3
155 | 
156 |                 # Print the surface
157 |                 screen.blit(xlabel, (895, 460))
158 |                 screen.blit(ylabel, (0, 5))
159 |                 screen.blit(legendA, (800, 0))
160 |                 screen.blit(legendB, (800, 15))
161 |                 offset = font.render("Masking Threshold Offset in dB: " + str(gain), 1, (0, 250, 0))
162 |                 bpc = font.render("Average bits per subband: " + str(bc), 1, (190, 160, 110))
163 |                 helptext = font.render("(Adjust the threshold by pressing 'Up' & 'Down' Arrow keys)", 1, (0, 250, 0))
164 |                 screen.blit(offset, (300, 0))
165 |                 screen.blit(helptext, (300, 15))
166 |                 screen.blit(bpc, (300, 30))
167 | 
168 |                 for n2 in xrange(len(dBpos)):
169 |                     dB = font.render(str(np.int(dBScales[n2])), 1, (0,120,120))
170 |                     screen.blit(dB, (20, int(dBpos[n2])))
171 | 
172 |                 # Display
173 |                 pygame.display.flip()
174 | 
175 |         # Acquire Segment
176 |         xSeg = x[pin:pin+wsz, 0]
177 |         nSeg = noise[pin:pin+wsz, 0]
178 | 
179 |         # Perform DFT on segment
180 |         mX, pX = TF.TimeFrequencyDecomposition.DFT(xSeg, w, N)
181 |         nX, npX = TF.TimeFrequencyDecomposition.DFT(nSeg, w, N)
182 | 
183 |         # Set it to buffer
184 |         b_mX[0, :] = mX
185 |         b_nX[0, :] = nX
186 |         # Masking threshold
187 |         mt = pm.maskingThreshold(b_mX) * (10**(gain/20.))
188 | 
189 |         # Resynthesize
190 |         nSeg = TF.TimeFrequencyDecomposition.iDFT(nX * mt[0, :], npX, wsz)
191 |         xSeg = TF.TimeFrequencyDecomposition.iDFT(mX, pX, wsz)
192 |         mix = (xSeg + nSeg) * hop
193 | 
194 |         ola_buffer[0, 0:wsz] = prv_seg
195 |         ola_buffer[0, hop:hop+wsz] += mix
196 | 
197 |         # Place it to output buffer
198 |         output_buffer = ola_buffer[:, :wsz]
199 | 
200 |         # Playback
201 |         writedata = output_buffer[0, :].astype(np.float32).tostring()
202 |         stream.write(writedata, num_frames = wsz/2, exception_on_underflow = False)
203 | 
204 |         # Store previous frame samples
205 |         prv_seg = ola_buffer[:, hop:wsz+hop]
206 | 
207 |         # Clear the overlap
208 |         ola_buffer = np.zeros((1, wsz+hop), dtype = np.float32)
209 | 
210 |         # Update pointer and index
211 |         pin += hop
212 |         indx += 1
213 | 
214 | plt.close()
215 | stream.stop_stream()
216 | stream.close()
217 | p.terminate()
218 | pygame.display.quit()
219 | pygame.quit()
220 | 


--------------------------------------------------------------------------------
/testFiles/pulse.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/Audio-Masking-Methods/64782c3ff9564b06677c2a2112b2b2bdb48dc2a1/testFiles/pulse.wav


--------------------------------------------------------------------------------
/testFiles/ramp.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/Audio-Masking-Methods/64782c3ff9564b06677c2a2112b2b2bdb48dc2a1/testFiles/ramp.wav


--------------------------------------------------------------------------------
/testFiles/sc03_16m.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ashishpatel26/Audio-Masking-Methods/64782c3ff9564b06677c2a2112b2b2bdb48dc2a1/testFiles/sc03_16m.wav


--------------------------------------------------------------------------------