├── ISTFT.m ├── PCA.m ├── README.md ├── SNRmix.m ├── STFT.m ├── example_STFTandISTFT.m ├── example_foCepstrum.m ├── example_foYin.m ├── example_musicSpect.m ├── foCepstrum.m ├── foYin.m ├── input ├── a.wav ├── bass.wav ├── drums.wav ├── guitar.wav └── piano.wav ├── musicSpect.m ├── showSpect.m └── showSpect3d.m /ISTFT.m: -------------------------------------------------------------------------------- 1 | function sig = ISTFT(specgram,shiftSize,analyWin,orgLen) 2 | % 3 | % Inverse short-time Fourier transform 4 | % Synthesis window is calculated based on minimal distortion principle, 5 | % which is described below: 6 | % D. Griffin and J. Lim, "Signal estimation from modified short-time 7 | % Fourier transform," IEEE Transactions on Acoustics, Speech, and Signal 8 | % Processing, vol. 32, no. 2, pp. 236-243, 1984. 9 | % 10 | % Coded by D. Kitamura (d-kitamura@ieee.org) 11 | % 12 | % See also: 13 | % http://d-kitamura.net 14 | % 15 | % [syntax] 16 | % sig = ISTFT(specgram,shiftSize) 17 | % sig = ISTFT(specgram,shiftSize,analyWin) 18 | % sig = ISTFT(specgram,shiftSize,analyWin,orgLen) 19 | % 20 | % [inputs] 21 | % specgram: STFT of input signal (frequency bins (fftSize/2+1) x time frames x channels) 22 | % shiftSize: frame shift length 23 | % analyWin: analysis window function used in STFT (fftSize x 1) or choose used analysis window function from below: 24 | % 'hamming' : Hamming window (default) 25 | % 'hann' : von Hann window 26 | % 'rectangular': rectangular window 27 | % 'blackman' : Blackman window 28 | % 'sine' : sine window 29 | % orgLen: length of original signal (before zero padding) (default: the same as that of output signal) 30 | % 31 | % [outputs] 32 | % sig: time-domain waveform of input spectrogram (signal x channels) 33 | % 34 | 35 | % Arguments check and set default values 36 | arguments 37 | specgram (:,:,:) {mustBeNumeric} 38 | shiftSize (1,1) double {mustBeInteger(shiftSize)} 39 | analyWin 40 | orgLen (1,1) double {mustBeInteger(orgLen)} 41 | end 42 | 43 | % Error check 44 | [nFreq, nFrame, nCh] = size(specgram); 45 | fftSize = (nFreq-1) * 2; % fft length used in STFT 46 | if nCh > nFreq; error('Input spectrogram might be wrong. The size of it must be (freq x frame x ch).\n'); end 47 | if isreal(specgram); error('Input spectrogram might be wrong. It does not complex-valued matrix.\n'); end 48 | if mod(nFreq,2) == 0; error('The number of rows of sectrogram must be an odd number because it is (fftSize/2)+1.\n'); end 49 | if mod(fftSize,shiftSize) ~= 0; error('fftSize must be dividable by shiftSize.\n'); end 50 | if nargin < 3 51 | analyWin = local_hamming(fftSize); % default window 52 | else 53 | if isnumeric(analyWin) 54 | if size(analyWin, 1) ~= fftSize; error('The length of synthesis window must be the same as fftSize used in STFT.\n'); end 55 | else 56 | switch analyWin 57 | case 'hamming'; analyWin = local_hamming(fftSize); 58 | case 'hann'; analyWin = local_hann(fftSize); 59 | case 'rectangular'; analyWin = local_rectangular(fftSize); 60 | case 'blackman'; analyWin = local_blackman(fftSize); 61 | case 'sine'; analyWin = local_sine(fftSize); 62 | otherwise; error('Input window type is not supported. Type "help ISTFT" and check options.\n'); 63 | end 64 | end 65 | end 66 | 67 | % Calculate optimal synthesis window based on minimal distortion principle 68 | synthWin = local_optSynthWin(analyWin, shiftSize); 69 | 70 | % Inverse STFT 71 | tmpSig = zeros((nFrame-1)*shiftSize+fftSize, nCh); % memory allocation (zero-padded signal, length x nch) 72 | specgram(1,:,:) = specgram(1,:,:)/2; % DC component 73 | specgram(fftSize/2+1,:,:) = specgram(fftSize/2+1,:,:)/2; % Nyquist frequency component 74 | for iCh = 1:nCh 75 | shortTimeSig = real(ifft(specgram(:,:,iCh), fftSize) .* synthWin) * 2; 76 | for iFrame = 1:nFrame % overlap add of short-time signals 77 | startPoint = (iFrame-1)*shiftSize; 78 | tmpSig(startPoint+1:startPoint+fftSize,iCh) = tmpSig(startPoint+1:startPoint+fftSize,iCh) + shortTimeSig(:,iFrame); 79 | end 80 | end 81 | sig = tmpSig(fftSize-shiftSize+1:(nFrame-1)*shiftSize+fftSize, :); % discard padded zeros at beginning of signal, which are added in STFT 82 | 83 | % Discarding padded zeros at the end of the signal 84 | if exist('orgLen', 'var') 85 | sig = sig(1:orgLen,:); 86 | end 87 | end 88 | 89 | %% Local functions 90 | function synthWin = local_optSynthWin(analyWin,shiftSize) % based on minimal distortion principle 91 | fftSize = size(analyWin,1); 92 | synthWin = zeros(fftSize,1); 93 | for i = 1:shiftSize 94 | amp = 0; 95 | for j = 1:fftSize/shiftSize 96 | amp = amp + analyWin(i+(j-1)*shiftSize,1)*analyWin(i+(j-1)*shiftSize,1); 97 | end 98 | for j = 1:fftSize/shiftSize 99 | synthWin(i+(j-1)*shiftSize,1) = analyWin(i+(j-1)*shiftSize,1)/amp; 100 | end 101 | end 102 | end 103 | 104 | function win = local_hamming(fftSize) 105 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window) 106 | win = 0.54*ones(fftSize,1) - 0.46*cos(2.0*pi*t(1:fftSize)); 107 | end 108 | 109 | function win = local_hann(fftSize) 110 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window) 111 | win = max(0.5*ones(fftSize,1) - 0.5*cos(2.0*pi*t(1:fftSize)),eps); 112 | end 113 | 114 | function win = local_rectangular(fftSize) 115 | win = ones(fftSize,1); 116 | end 117 | 118 | function win = local_blackman(fftSize) 119 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window) 120 | win = max(0.42*ones(fftSize,1) - 0.5*cos(2.0*pi*t(1:fftSize)) + 0.08*cos(4.0*pi*t(1:fftSize)),eps); 121 | end 122 | 123 | function win = local_sine(fftSize) 124 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window) 125 | win = max(sin(pi*t(1:fftSize)),eps); 126 | end 127 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -------------------------------------------------------------------------------- /PCA.m: -------------------------------------------------------------------------------- 1 | function [Y,Z,eigVal,eigVec] = PCA(X,dim,centering) 2 | % 3 | % Principal component analysis 4 | % This function supports both PCAs with and without data centering. 5 | % 6 | % Coded by D. Kitamura (d-kitamura@ieee.org) 7 | % 8 | % See also: 9 | % http://d-kitamura.net 10 | % 11 | % [syntax] 12 | % [Y,Z,eigVal,eigVec] = PCA(X) 13 | % [Y,Z,eigVal,eigVec] = PCA(X,dim) 14 | % [Y,Z,eigVal,eigVec] = PCA(X,dim,centering) 15 | % 16 | % [inputs] 17 | % X: input data ( K (variables) x N (samples) ) 18 | % dim: number of dimensions to which X is projected 19 | % centering: centering X before applying PCA or not (true or false, default: true) 20 | % 21 | % [outputs] 22 | % Y: output matrix (dim x N) 23 | % Z: transformation matrix (dim x K, Y = ZX) 24 | % eigVal: all eigenvalues (K x 1) 25 | % eigVec: all eigenvectors (K x K) 26 | 27 | % Check errors and set default values 28 | if (nargin < 3) 29 | centering = true; % Default setting 30 | end 31 | 32 | [K,N] = size(X); % variables x samples 33 | if centering 34 | cX = X - mean(X,2); % Data centering (using implicit expansion) 35 | % cX = X - repmat(mean(X,2),1,N); % Data centering (prior to R2016b) 36 | else 37 | cX = X; % Do not apply centering 38 | end 39 | V = cX*(cX')/N; % Covariance matrix of data matrix 40 | [P,D] = eig(V); % Eigenvalue decomposition (V = P*D*inv(P), P includes eigenvectors and D is a diagonal matrix with eigenvalues) 41 | 42 | % Sort eigenvalues in descending order 43 | eigVal = diag(D); 44 | [eigVal,idx] = sort(eigVal,'descend'); 45 | D = D(idx,idx); 46 | P = P(:,idx); 47 | 48 | % Pick up top-dim eigenvalues and their eigenvectors 49 | reducedD = D(1:dim,1:dim); 50 | reducedP = P(:,1:dim); 51 | 52 | Y = reducedP'*cX; % Output matrix 53 | Z = reducedP'; % Transformation matrix 54 | eigVec = P; % All eigenvectors 55 | end 56 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tools for audio signal processing 2 | 3 | ## About 4 | Sample MATLAB script of audio signal processing tools including short-time Fourier transform (STFT) and its inversion. 5 | 6 | ## Contents 7 | - input [dir]: includes test audio signals (dry source signals) 8 | - example_foCepstrum.m: example script that estimates fo (fundamental frequency) based on cepstrum analysis 9 | - example_foYin.m: example script that estimates fo (fundamental frequency) based on YIN 10 | - example_musicSpect.m: example script that calculates MUSIC spectrum 11 | - example_STFTandISTFT.m: example script that applies SNRmix, STFT, and inverse STFT 12 | - foCepstrum.m: estimate fo (fundamental frequency) based on cepstrum analysis 13 | - foYin.m: estimate fo (fundamental frequency) based on YIN 14 | - ISTFT.m: inverse short-time Fourier transform 15 | - musicSpect.m: calculation of MUSIC spectrum 16 | - PCA.m: principal component analysis 17 | - showSpect.m: show spectrogram 18 | - SNRmix.m: mix two signals with a desired signal-to-noise ratio 19 | - STFT.m: short-time Fourier transform 20 | 21 | ## Usage Note 22 | STFT returns only 0Hz to Nyquist frequency components to avoid redundant calculation. 23 | 24 | In inverse STFT, optimal synthesis window is calculated and applied. This optimal synthesis window is based on a minimal distortion principle described below: 25 | * D. Griffin and J. Lim, "Signal estimation from modified short-time Fourier transform," IEEE Transactions on Acoustics, Speech, and Signal Processing, vol. 32, no. 2, pp. 236-243, 1984. 26 | 27 | ## See Also 28 | * HP: http://d-kitamura.net -------------------------------------------------------------------------------- /SNRmix.m: -------------------------------------------------------------------------------- 1 | function [mix,outSignal,outNoise,coef] = SNRmix(inSignal,inNoise,SNR) 2 | % 3 | % Mixing two signals with a desired signal-to-noise ratio (SNR) 4 | % This function supports multichannel signals. 5 | % 6 | % Coded by D. Kitamura (d-kitamura@ieee.org) 7 | % 8 | % See also: 9 | % http://d-kitamura.net 10 | % 11 | % [syntax] 12 | % [mix,outSignal,outNoise,coef] = SNRmix(inSignal,inNoise) 13 | % [mix,outSignal,outNoise,coef] = SNRmix(inSignal,inNoise,SNR) 14 | % 15 | % [inputs] 16 | % inSignal: input signal (length x ch) 17 | % inNoise: input noise (length x ch) 18 | % SNR: desired SNR [dB] (default = 0) 19 | % 20 | % [outputs] 21 | % mix: mixed signal with desired SNR (length x ch) 22 | % outSignal: signal in the mixture signal (length x ch) 23 | % outNoize: noise in the mixture signal (length x ch) 24 | % coef: mixing coefficient (scalar) 25 | 26 | % Check errors and set default values 27 | [length, nch ] = size( inSignal ); 28 | if size(inNoise,1) ~= length || size(inNoise,2) ~= nch 29 | error('The size of two input signals are not the same.\n') 30 | end 31 | if (nargin<2) 32 | error('Too few input arguments.\n'); 33 | end 34 | if (nargin<3) 35 | SNR = 0; 36 | end 37 | 38 | if length < nch 39 | [mix,outSignal,outNoise,coef] = SNRmix(inSignal.',inNoise.',SNR); 40 | else 41 | squareSums = zeros(2,1); 42 | for m = 1 : nch 43 | squareSums(1,1) = squareSums(1,1) + ( inSignal(:,m)' * inSignal(:,m) ); 44 | squareSums(2,1) = squareSums(2,1) + ( inNoise(:,m)' * inNoise(:,m) ); 45 | end 46 | inSNR = 10*log10( ( squareSums(1,:) ) ./ ( squareSums(2,:) ) ); 47 | coef = ( 10 ^ ( ( inSNR - SNR ) / 20 ) ); 48 | outSignal = inSignal; 49 | outNoise = inNoise .* coef; 50 | mix = outSignal + outNoise; 51 | normCoef = max(max(abs(mix))); 52 | if normCoef >= 1 53 | mix = mix ./ normCoef; 54 | outNoise = outNoise ./ normCoef; 55 | outSignal = outSignal ./ normCoef; 56 | fprintf('The signals are normalized in SNRmix.\n'); 57 | end 58 | end 59 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -------------------------------------------------------------------------------- /STFT.m: -------------------------------------------------------------------------------- 1 | function [specgram,analyWin,sigLen] = STFT(sig,fftSize,shiftSize,analyWin) 2 | % 3 | % Short-time Fourier transform 4 | % 5 | % Coded by D. Kitamura (d-kitamura@ieee.org) 6 | % 7 | % See also: 8 | % http://d-kitamura.net 9 | % 10 | % [syntax] 11 | % [specgram,analyWin,sigLen] = STFT(sig,fftSize,shiftSize) 12 | % [specgram,analyWin,sigLen] = STFT(sig,fftSize,shiftSize,analyWin) 13 | % 14 | % [inputs] 15 | % sig: input signal (length x channels) 16 | % fftSize: window length [points] in STFT (scalar, even number) 17 | % shiftSize: shift length [points] in STFT (scalar) 18 | % analyWin: arbitrary analysis window function in STFT (fftSize x 1) or choose used analysis window function from below: 19 | % 'hamming' : Hamming window (default) 20 | % 'hann' : von Hann window 21 | % 'rectangular': rectangular window 22 | % 'blackman' : Blackman window 23 | % 'sine' : sine window 24 | % 25 | % [outputs] 26 | % specgram: spectrogram of input signal (frequency bins (fftSize/2+1) x time frames x channels) 27 | % analyWin: analysis window function used in STFT (fftSize x 1) and can be used for calculating optimal synthesis window 28 | % sigLen: length of original signal without zero padding 29 | % 30 | 31 | % Arguments check and set default values 32 | arguments 33 | sig (:,:) double 34 | fftSize (1,1) double {mustBeInteger(fftSize)} 35 | shiftSize (1,1) double {mustBeInteger(shiftSize)} 36 | analyWin 37 | end 38 | 39 | % Errors check 40 | [sigLen, nCh] = size(sig); % get signal length and number of channels 41 | if sigLen < nCh; error('The size of input signal might be wrong. The signal must be length x channels size.\n'); end 42 | if mod(fftSize,2) ~= 0; error('fftSize must be an even number.\n'); end 43 | if mod(fftSize,shiftSize) ~= 0; error('fftSize must be dividable by shiftSize.\n'); end 44 | if nargin < 4 45 | analyWin = local_hamming(fftSize); % default window 46 | else 47 | if isnumeric(analyWin) 48 | if size(analyWin, 1) ~= fftSize; error('The length of analysis window must be the same as fftSize.\n'); end 49 | else 50 | switch analyWin 51 | case 'hamming'; analyWin = local_hamming(fftSize); 52 | case 'hann'; analyWin = local_hann(fftSize); 53 | case 'rectangular'; analyWin = local_rectangular(fftSize); 54 | case 'blackman'; analyWin = local_blackman(fftSize); 55 | case 'sine'; analyWin = local_sine(fftSize); 56 | otherwise; error('Input winType is not supported. Type "help STFT" and check options.\n'); 57 | end 58 | end 59 | end 60 | 61 | % Pad zeros at the beginning and ending of the input signal 62 | zeroPadSize = fftSize - shiftSize; % size of zero padding 63 | padSig = [zeros(zeroPadSize,nCh); sig; zeros(fftSize,nCh)]; % padding zeros 64 | padSigLen = size(padSig,1); % zero-padded signal length 65 | 66 | % Calculate STFT 67 | nFrame = floor((padSigLen - fftSize + shiftSize) / shiftSize); % number of time frames in spectrogram 68 | specgram = zeros(fftSize/2+1, nFrame, nCh); % memory allocation (nFreq x nFrames x nCh) 69 | shortTimeSig = zeros(fftSize, nFrame); % memory allocation (nFreq x nFrames x nCh) 70 | for iCh = 1:nCh 71 | for iFrame = 1:nFrame % get short-time signals by framing 72 | startPoint = (iFrame-1)*shiftSize; % start point of short-time signal 73 | shortTimeSig(:,iFrame) = padSig(startPoint+1:startPoint+fftSize, iCh); % store short-time signal 74 | end 75 | tmp = fft(shortTimeSig .* analyWin); % get DFT spectra of windowed short-time signals 76 | specgram(:,:,iCh) = tmp(1:fftSize/2+1, :); % store spectrum (only from DC to Nyquist frequency components) 77 | end 78 | end 79 | 80 | %% Local functions 81 | function win = local_hamming(fftSize) 82 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window) 83 | win = 0.54*ones(fftSize,1) - 0.46*cos(2.0*pi*t(1:fftSize)); 84 | end 85 | 86 | function win = local_hann(fftSize) 87 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window) 88 | win = max(0.5*ones(fftSize,1) - 0.5*cos(2.0*pi*t(1:fftSize)),eps); 89 | end 90 | 91 | function win = local_rectangular(fftSize) 92 | win = ones(fftSize,1); 93 | end 94 | 95 | function win = local_blackman(fftSize) 96 | t = linspace(0, 1,fftSize+1).'; % periodic (produce L+1 window and return L window) 97 | win = max(0.42*ones(fftSize,1) - 0.5*cos(2.0*pi*t(1:fftSize)) + 0.08*cos(4.0*pi*t(1:fftSize)),eps); 98 | end 99 | 100 | function win = local_sine(fftSize) 101 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window) 102 | win = max(sin(pi*t(1:fftSize)),eps); 103 | end 104 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -------------------------------------------------------------------------------- /example_STFTandISTFT.m: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Sample program for applying STFT and ISTFT to audio signals % 3 | % % 4 | % Coded by D. Kitamura (d-kitamura@ieee.org) % 5 | % % 6 | % See also: % 7 | % http://d-kitamura.net % 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 9 | 10 | clear; % clear memory (workspace variables) 11 | close all; % close all plot figures 12 | 13 | % Parameters 14 | wavPath1 = sprintf('./input/drums.wav'); % file path of wav signal 15 | wavPath2 = sprintf('./input/piano.wav'); % file path of wav signal 16 | 17 | % Read audio files 18 | [s1,fs] = audioread(wavPath1); % fs: sampling frequency [Hz], s1 is a vector of size "length x channels" 19 | [s2,fs] = audioread(wavPath2); % s1, s2, and s3 are column vectors because sample wave files are monaural 20 | 21 | % Mixing with SNR = 0 [dB] 22 | SNR = 0; 23 | [x,s1,s2,coef] = SNRmix(s1,s2,SNR); % mixture signal of size "1 x length" 24 | 25 | % Apply short-time Fourier transform (STFT) 26 | fftSize = 2048; 27 | shiftSize = fftSize/4; 28 | winType = 'hamming'; 29 | [S1,analyWin,orgLen1] = STFT(s1,fftSize,shiftSize,winType); 30 | [S2,analyWin,orgLen2] = STFT(s2,fftSize,shiftSize,winType); 31 | [X,analyWin,orgLenX] = STFT(x,fftSize,shiftSize,winType); 32 | 33 | % Show spectrograms 34 | showSpect(S1,fs,shiftSize); 35 | showSpect(S2,fs,shiftSize); 36 | showSpect(X,fs,shiftSize); 37 | 38 | % Apply inverse STFT (ISTFT) 39 | y1 = ISTFT(S1,shiftSize,analyWin,orgLen1); 40 | y2 = ISTFT(S2,shiftSize,analyWin,orgLen2); 41 | z = ISTFT(X,shiftSize,analyWin,orgLenX); 42 | 43 | % Numerical error caused by calculations in STFT and ISTFT 44 | err1 = sum((s1-y1).^2) 45 | err2 = sum((s2-y2).^2) 46 | err3 = sum((x-z).^2) 47 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 48 | -------------------------------------------------------------------------------- /example_foCepstrum.m: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Sample program for estimating Fo (fundamental frequency) based on % 3 | % cepstrum analysis % 4 | % % 5 | % Coded by D. Kitamura (d-kitamura@ieee.org) % 6 | % % 7 | % See also: % 8 | % http://d-kitamura.net % 9 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 10 | 11 | clear; close all; clc; 12 | addpath('./input'); 13 | 14 | % parameters 15 | fileName = 'a.wav'; % audio file name 16 | fftSize = 4096; % FFT length (short-time length) for analysis 17 | foMin = 80; % minimum frequency [Hz] for analysis 18 | foMax = 400; % maximum frequency [Hz] for analysis 19 | 20 | % Read wav fale 21 | [signal,fs] = audioread(fileName); % fs is a sampling frequency [Hz] 22 | 23 | % Fo estimation 24 | estFo = foCepstrum(signal,fs,foMin,foMax,fftSize,'rectangular'); 25 | 26 | fprintf('Estimated Fo is %.5f [Hz].\n', estFo); 27 | -------------------------------------------------------------------------------- /example_foYin.m: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Sample program for estimating Fo (fundamental frequency) based on YIN % 3 | % % 4 | % Coded by D. Kitamura (d-kitamura@ieee.org) % 5 | % % 6 | % See also: % 7 | % http://d-kitamura.net % 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 9 | 10 | clear; close all; clc; 11 | 12 | % Parameter setting 13 | samplingFreq = 1000; % sampling frequency [Hz] 14 | samplingTime = 1/samplingFreq; % sampling time [s] 15 | sigTime = 0.5; % signal length [s] 16 | timeAxis = 0:samplingTime:sigTime; % time axis 17 | fo = 15; % signal frequency [Hz] 18 | omega = 2*pi*fo; % angular frequency [rad/s] 19 | sigma = 0.8; % amplitude of noise signal 20 | threshold = 0.1; % threshold value in YIN (b/w 0 and 1) 21 | foMin = 10; % minimum frequency for Fo estimation [Hz] 22 | foMax = 20; % maximum frequency for Fo estimation [Hz] 23 | 24 | % Produce signals 25 | sig = sin(omega*timeAxis).'; % sine wave signal 26 | noisySig = sig + sigma*randn(size(timeAxis)).'; % observed noisy signal 27 | 28 | % Plot signals 29 | plot(timeAxis, noisySig); hold on; plot(timeAxis, sig, 'LineWidth', 2); % plotting signals 30 | xlabel('Time [s]'); ylabel('Amplitude'); % add axis labels 31 | legend('Observed noisy signal', 'True signal', 'Location', 'northeast'); % add legends 32 | 33 | % Fundamental frequency estimation based on YIN 34 | estFo = foYin(noisySig,threshold,samplingFreq, foMin, foMax); 35 | fprintf('True Fo: %.2f Hz\nEstimated Fo: %.2f Hz\nError rate: %.2f %%\n', fo, estFo, 100*abs(fo-estFo)/fo); 36 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 37 | -------------------------------------------------------------------------------- /example_musicSpect.m: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % Sample program for calculating MUSIC spectrum % 3 | % % 4 | % Coded by D. Kitamura (d-kitamura@ieee.org) % 5 | % % 6 | % See also: % 7 | % http://d-kitamura.net % 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 9 | 10 | clear; close all; clc; 11 | 12 | % Fix random seed 13 | seed = 2; % seed 14 | RandStream.setGlobalStream(RandStream('mt19937ar','Seed',seed)); % set pseudo random stream (mt19937ar) 15 | 16 | % Parameters 17 | fs = 100; % sampling frequency [Hz] 18 | n = (0:1/fs:10)'; % discrete time index 19 | f1 = 15; % frequency 1 [Hz] 20 | f2 = 35; % frequency 2 [Hz] 21 | sigma = 5; % amplitude of white noise 22 | order = 2; % number of (real-valued) sinusoidal waves 23 | windowSize = 512; % length of short-time signal (frame) 24 | fftSize = 512; % FFT length for calculating MUSIC spectrum 25 | shiftSize = 1; % shift length for short-time signals (frames) 26 | 27 | % Produce noisy signal 28 | sig = cos(2*pi*f1*n) + sin(2*pi*f2*n) + sigma*randn(size(n)); 29 | 30 | % Plot signal 31 | figure; plot(n,sig); 32 | xlabel('Time'); ylabel('Amplitude'); 33 | title('Noisy signal'); grid on; 34 | 35 | % Plot amplitude spectrum of signal 36 | figure; plot(20*log10(abs(fft(sig)))); 37 | xlabel('Frequency [Hz]'); ylabel('Power [dB]'); 38 | title('Fourier power spectrum'); grid on; 39 | 40 | % Calculation of MUSIC spectrum based on sub-space method 41 | [P,f] = musicSpect(sig,order,fs,windowSize,fftSize,shiftSize); 42 | figure; plot(f,20*log10(P)); 43 | xlim([0,fs/2]); 44 | xlabel('Frequency [Hz]'); ylabel('Power [dB]'); 45 | title('Pseudospectrum Estimate via MUSIC'); grid on; 46 | 47 | % MUSIC spectrum using MATLAB built-in function (see: https://jp.mathworks.com/help/signal/ref/pmusic.html) 48 | % short-time signals are produced with shiftSize=1 and apply SVD 49 | dim = 2*order; % 2 times of number of sinusoidal waves when signal is real-valued 50 | fftSize = windowSize; % FFT size for calculating MUSIC spectrum 51 | [P,f] = pmusic(sig,dim,fftSize,fs,windowSize); 52 | figure; plot(f,20*log10(abs(P))); 53 | xlabel('Frequency [Hz]'); ylabel('Power [dB]'); 54 | title('Pseudospectrum Estimate via MUSIC (MATLAB built-in)'); grid on; 55 | -------------------------------------------------------------------------------- /foCepstrum.m: -------------------------------------------------------------------------------- 1 | function estFo = foCepstrum(signal,fs,foMin,foMax,fftSize,window) 2 | % 3 | % Estimation of Fo (fundamental frequency) based on cepstrum analysis 4 | % 5 | % Coded by D. Kitamura (d-kitamura@ieee.org) 6 | % 7 | % See also: 8 | % http://d-kitamura.net 9 | % 10 | % [syntax] 11 | % estFo = foCepstrum(signal,fs,foMin,foMax) 12 | % estFo = foCepstrum(signal,fs,foMin,foMax,fftSize) 13 | % estFo = foCepstrum(signal,fs,foMin,foMax,fftSize,window) 14 | % 15 | % [inputs] 16 | % signal: input signal (sigLen x 1) 17 | % fs: sampling frequency [Hz] 18 | % foMin: minimum frequency for analysis (default: 0 [Hz]) 19 | % foMax: maximum frequency for analysis (default: fs/2 [Hz]) 20 | % fftSize: length of short-time signal for calculating cepstrum (scaler) 21 | % window: arbitrary analysis window function (fftSize x 1) or choose function from below: 22 | % 'hamming' : Hamming window (default) 23 | % 'hann' : von Hann window 24 | % 'rectangular': rectangular window 25 | % 'blackman' : Blackman window 26 | % 'sine' : sine window 27 | % 28 | % [outputs] 29 | % estFo: estimated fo (scaler [Hz]) 30 | % 31 | 32 | [sigLen,nCh] = size(signal); 33 | cepMax = round(fs/foMin+1); % Maximum order of cepstrum for analysis 34 | cepMin = round(fs/foMax+1); % Minimum order of cepstrum for analysis 35 | 36 | % Check errors and set default values 37 | if (nargin < 4) 38 | error('Too few input arguments.\n'); 39 | end 40 | if nCh > sigLen 41 | signal = signal'; 42 | end 43 | if nCh ~= 1 44 | error('foCepstrum only supports single-channel signal.\n'); 45 | end 46 | if (nargin < 5) 47 | fftSize = 2^(nextpow2(round(sigLen/2))); % default analysis length 48 | end 49 | if fftSize < cepMax 50 | error('fftSize (analysis length) is too short or foMin is too low.\n'); 51 | end 52 | if (nargin < 6) 53 | window = hamming_local(fftSize); % default analysis window 54 | else 55 | if isnumeric(window) 56 | if size(window, 1) ~= fftSize 57 | error('The length of analysis window must be the same as that of fftSize.\n'); 58 | end 59 | else 60 | switch window 61 | case 'hamming' 62 | window = hamming_local(fftSize); 63 | case 'hann' 64 | window = hann_local(fftSize); 65 | case 'rectangular' 66 | window = rectangular_local(fftSize); 67 | case 'blackman' 68 | window = blackman_local(fftSize); 69 | case 'sine' 70 | window = sine_local(fftSize); 71 | otherwise 72 | error('Input window type is not supported. Check options.\n') 73 | end 74 | end 75 | end 76 | 77 | % Find a short-time signal whose power is maximum 78 | powMax = 0; indStart = 1; 79 | for ind = 1:fftSize:sigLen-fftSize 80 | pow = sum(signal(ind:ind+fftSize-1).^2); % signal power of short-time signal 81 | if pow > powMax 82 | powMax = pow; 83 | indStart = ind; 84 | end 85 | end 86 | analySignal = signal(indStart:indStart+fftSize-1); % short-time signal 87 | 88 | % Calculate cepstrum of short-time signal 89 | windowedAnalySignal = analySignal.*window; % windowing 90 | spectrum = fft(windowedAnalySignal); % FFT 91 | logAbsSpectrum = log(max(abs(spectrum),eps)); % absolute, eps flooring, and log 92 | cepstrum = real(ifft(logAbsSpectrum)); % inverse FFT 93 | 94 | % Find maximum cepstrum and get its order (quefrency) 95 | [~, indCep] = max(cepstrum(cepMin:cepMax)); 96 | maxQuef = indCep + cepMin - 2; 97 | 98 | % Convert quefrency to frequency 99 | estFo = fs/maxQuef; 100 | 101 | end 102 | 103 | %% Local functions 104 | function analyWindow = hamming_local(fftSize) 105 | t = linspace(0,1,fftSize+1).'; % periodic (produce L+1 window and return L window) 106 | analyWindow = 0.54*ones(fftSize,1) - 0.46*cos(2.0*pi*t(1:fftSize)); 107 | end 108 | 109 | function analyWindow = hann_local(fftSize) 110 | t = linspace(0,1,fftSize+1).'; % periodic (produce L+1 window and return L window) 111 | analyWindow = max(0.5*ones(fftSize,1) - 0.5*cos(2.0*pi*t(1:fftSize)),eps); 112 | end 113 | 114 | function analyWindow = rectangular_local(fftSize) 115 | analyWindow = ones(fftSize,1); 116 | end 117 | 118 | function analyWindow = blackman_local(fftSize) 119 | t = linspace(0,1,fftSize+1).'; % periodic (produce L+1 window and return L window) 120 | analyWindow = max(0.42*ones(fftSize,1) - 0.5*cos(2.0*pi*t(1:fftSize)) + 0.08*cos(4.0*pi*t(1:fftSize)),eps); 121 | end 122 | 123 | function analyWindow = sine_local(fftSize) 124 | t = linspace(0,1,fftSize+1).'; % periodic (produce L+1 window and return L window) 125 | analyWindow = max(sin(pi*t(1:fftSize)),eps); 126 | end 127 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -------------------------------------------------------------------------------- /foYin.m: -------------------------------------------------------------------------------- 1 | function estFo = foYin(sig,threshold,sampFreq,foMin,foMax) 2 | % 3 | % Estimation of Fo (fundamental frequency) based on YIN 4 | % 5 | % Coded by D. Kitamura (d-kitamura@ieee.org) 6 | % 7 | % See also: 8 | % http://d-kitamura.net 9 | % M. Mauch and S. Dixon, "PYIN: A fundamental frequency estimator using probabilistic threshold distributions," Proc. ICASSP, pp. 659-663, 2014. 10 | % A. Cheveigne and H. Kawahara, "YIN, a fundamental frequency estimator for speech and music," The Journal of the Acoustical Society of America, vol. 111, no. 4, pp. 1917–1930, 2002. 11 | % 12 | % [syntax] 13 | % estFo = foYin(sig,threshold,fs) 14 | % estFo = foYin(sig,threshold,fs,foMin) 15 | % estFo = foYin(sig,threshold,fs,foMin,foMax) 16 | % 17 | % [inputs] 18 | % sig: input signal (sigLen x 1) 19 | % sampFreq: sampling frequency [Hz] 20 | % foMin: minimum frequency for analysis (default: 0 [Hz]) 21 | % foMax: maximum frequency for analysis (default: fs/2 [Hz]) 22 | % 23 | % [outputs] 24 | % estFo: estimated fo (scaler [Hz]) 25 | % 26 | 27 | % Check arguments and set default values 28 | arguments 29 | sig (:,1) double 30 | threshold (1,1) double 31 | sampFreq (1,1) double 32 | foMin (1,1) double = 0; 33 | foMax (1,1) double = sampFreq/2; 34 | end 35 | 36 | % Check errors 37 | if threshold < 0; error('Threshold value in YIN must be nonnegative.\n'); end 38 | if sampFreq <= 0; error('Sampling frequency must be positive.\n'); end 39 | if foMin < 0; error('Minimum frequency must be nonnegative.\n'); end 40 | if foMax <= 0; error('Maximum frequency must be positive.\n'); end 41 | 42 | % Initialization 43 | sigLen = size(sig, 1); % signal length 44 | sampTime = 1/sampFreq; % sampling time 45 | lagRange = floor(sigLen/2); % range of lag (denoted W in the papers, a half of signal length) 46 | laggedSig = zeros(lagRange, lagRange); % matrix for lagged signals 47 | indMin = round(1/foMax/sampTime); % index that corresponds to foMax (minimum of estimated lag) 48 | indMax = round(1/foMin/sampTime); % index that corresponds to foMin (maximum of estimated lag) 49 | if indMax > lagRange; indMax = lagRange; end % replace indMax to lagRange when indMax exceeds maximum index 50 | 51 | % Calculate cumulative-mean-normalized squared difference between original and lagged signals 52 | for lag = 1:lagRange 53 | laggedSig(:,lag) = sig(1+lag:lagRange+lag,1); % lagged signals (rows: signal, columns: lag) 54 | end 55 | diff = sum((sig(1:lagRange,1) - laggedSig).^2, 1).'; % squared difference between original and lagged signals (with each lag length) 56 | cumMeanDiff = cumsum(diff)./(1:1:lagRange).'; % cumulative mean 57 | normDiff = diff./cumMeanDiff; % cumulative-mean normalization 58 | 59 | % Estimation of fundamental frequency Fo 60 | validInd = find(normDiff(indMin:indMax,1) <= threshold); % get indexes of elements that satisfy normDiff<=threshold in the range [indMin, indMax] 61 | if isempty(validInd) % if there is no index that satisfies normDiff<=threshold in the range [indMin, indMax] 62 | [~, minInd] = min(normDiff(indMin:indMax,1)); % get index whose normDiff is the minumum 63 | estTo = (minInd(1)+indMin-1) * sampTime; % estimated fundamental period To, where indMin-1 is added because minInd(1) is an index for normDiff(indMin:indMax,1) (limited range) 64 | else % if there exist indexes that satisfy normDiff<=threshold in the range [indMin, indMax] 65 | estTo = (validInd(1)+indMin-1) * sampTime; % estimated fundamental period To, where indMin-1 is added because minInd(1) is an index for normDiff(indMin:indMax,1) (limited range) 66 | end 67 | estFo = 1/estTo; % estimated fundamental frequency Fo 68 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -------------------------------------------------------------------------------- /input/a.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d-kitamura/audioSignalProcessTools/b676d059e1afa5dcfeab6335941b61cb35d385ff/input/a.wav -------------------------------------------------------------------------------- /input/bass.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d-kitamura/audioSignalProcessTools/b676d059e1afa5dcfeab6335941b61cb35d385ff/input/bass.wav -------------------------------------------------------------------------------- /input/drums.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d-kitamura/audioSignalProcessTools/b676d059e1afa5dcfeab6335941b61cb35d385ff/input/drums.wav -------------------------------------------------------------------------------- /input/guitar.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d-kitamura/audioSignalProcessTools/b676d059e1afa5dcfeab6335941b61cb35d385ff/input/guitar.wav -------------------------------------------------------------------------------- /input/piano.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d-kitamura/audioSignalProcessTools/b676d059e1afa5dcfeab6335941b61cb35d385ff/input/piano.wav -------------------------------------------------------------------------------- /musicSpect.m: -------------------------------------------------------------------------------- 1 | function [musicSpect,freqAxis] = musicSpect(signal,order,fs,windowSize,fftSize,shiftSize) 2 | % 3 | % Estimation of MUSIC spectrum based on sub-space method 4 | % 5 | % Coded by D. Kitamura (d-kitamura@ieee.org) 6 | % 7 | % See also: 8 | % http://d-kitamura.net 9 | % 10 | % [syntax] 11 | % [musicSpect,freqAxis] = musicSpect(signal,order,fs,windowSize) 12 | % [musicSpect,freqAxis] = musicSpect(signal,order,fs,windowSize,fftSize) 13 | % [musicSpect,freqAxis] = musicSpect(signal,order,fs,windowSize,fftSize,shiftSize) 14 | % 15 | % [inputs] 16 | % signal: input signal (sigLen x 1) 17 | % order: number of (real-valued) sinusoidal waves in signal (scaler) 18 | % fs: sampling frequency [Hz] 19 | % windowSize: length of short-time signal frame (scaler) 20 | % fftSize: length of Fourier transform for calculating MUSIC spectrum (scaler) 21 | % shiftSize: shift length of frames (default: 1) 22 | % 23 | % [outputs] 24 | % musicSpect: pseudo spectrum (MUSIC spectrum) of input signal (frequency bins (windowSize) x 1) 25 | % freqAxis: frequency axis vector (windowSize x 1) 26 | % 27 | 28 | % Check errors and set default values 29 | if (nargin < 4) 30 | error('Too few input arguments.\n'); 31 | end 32 | if (size(signal,2) > 1) 33 | error ('Input argument "signal" must be a column vector.\n'); 34 | end 35 | if ~isreal(signal) 36 | error ('Input argument "signal" must be a real-valued vector.\n'); 37 | end 38 | if (nargin < 5) 39 | fftSize = windowSize; % default 40 | end 41 | if (nargin < 6) 42 | shiftSize = 1; % default 43 | end 44 | 45 | sigLen = size(signal,1); 46 | 47 | % short-time framing (break signal into short-time signal pieces) 48 | sigZeroPad = [signal;zeros(windowSize-1,1)]; % zero padding 49 | nFrames = ceil(sigLen/shiftSize); % number of frames 50 | shortTimeSig = zeros(windowSize, nFrames); % memory allocation 51 | for frame = 1:nFrames 52 | startPoint = (frame-1)*shiftSize+1; 53 | endPoint = startPoint+windowSize-1; 54 | shortTimeSig(:,frame) = sigZeroPad(startPoint:endPoint); 55 | end 56 | 57 | % MUSIC spectrum calculation 58 | covMat = (shortTimeSig*shortTimeSig')/nFrames; % sample covariance matrix 59 | [eigVec,eigVal] = eig(covMat); % eigenvalue decomposition (covMat = eigVec * eigVal * eigVec') 60 | [~, ind] = sort(diag(eigVal), 'descend'); % sort eigenvalues in descending order and get sorted index 61 | sortEigVec = eigVec(:,ind); % sort eigenvectors (column vectors of eigVec) in descending order 62 | noiseEigVec = sortEigVec(:,2*order+1:end); % noise eigenvectors 63 | fftNoiseEigVec = abs(fft(noiseEigVec,fftSize)).^2; % power spectrum of noise eigenvectors (the denominator of MUSIC spectrum is a sum of inner product of noise eigenvector and Fourier basis, which is equal to DFT) 64 | musicSpect = 1./sum(fftNoiseEigVec,2); % pseudo spectrum (MUSIC spectrum) 65 | freqAxis = 0:fs/windowSize:fs-1/windowSize; % frequency axis 66 | 67 | end 68 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -------------------------------------------------------------------------------- /showSpect.m: -------------------------------------------------------------------------------- 1 | function [figHdl,freqAx,timeAx] = showSpect(specgram,sampFreq,shiftSize) 2 | % 3 | % Show spectrogram from time-frequency matrix 4 | % This function supports both complex and nonnegative input and both 5 | % monaural and multichannel spectrograms. 6 | % For a multichannel spectrogram, the order of its indexes must be 7 | % [nfreqs x nframes x nch]. 8 | % Note that color map range is moderately defined. Tune by yourself. 9 | % 10 | % Coded by D. Kitamura (d-kitamura@ieee.org) 11 | % 12 | % See also: 13 | % http://d-kitamura.net 14 | % 15 | % [syntax] 16 | % [figHdl,freqAx,timeAx] = showSpect3d(specgram) 17 | % [figHdl,freqAx,timeAx] = showSpect3d(specgram,sampFreq,shiftSize) 18 | % 19 | % [inputs] 20 | % specgram: STFT matrix ([nFreqs x nTime] for a monaural spectrogram, 21 | % and [nFreqs x nTime x channels] for a multichannel 22 | % spectrogram, where number of frequency bins is sampFreq/2+1, 23 | % and both complex-valued and nonnegative spectrograms are supported.) 24 | % sampFreq: sampling frequency [Hz] 25 | % shiftSize: length of window shift 26 | % 27 | % [outputs] 28 | % figHdl: figure handle 29 | % freqAx: frequency axis (1 x nbin) 30 | % timeAx: time axis (1 x nframe) 31 | 32 | % Check errors and set default values 33 | [nFreq, nTime, nCh] = size(specgram); 34 | if ~isreal(specgram) % for complex spectrogram 35 | specgram = real(abs(specgram).^2); % calculate power spectrogram 36 | end 37 | if (nargin < 2) 38 | freqAx = 1:nFreq; 39 | timeAx = 1:nTime; 40 | elseif (nargin < 3) 41 | error('Too few input arguments.\nIf you input sampFreq, shiftSise is also required.\n'); 42 | else 43 | freqAx = linspace(0, sampFreq/2, nFreq); 44 | timeAx = linspace(0, shiftSize/sampFreq*nTime, nTime); 45 | end 46 | 47 | % Draw spectrogram surface 48 | logSpecgram = 10*log10(specgram); 49 | minVal = min(min(min(logSpecgram))); 50 | maxVal = max(max(max(logSpecgram))); 51 | for iCh = 1:nCh 52 | figHdl(iCh) = figure; 53 | imagesc(timeAx, freqAx, 10*log10(specgram(:,:,iCh))); 54 | axis tight; box on; 55 | caxis([(minVal - maxVal)/6, maxVal]); % moderately define color map range 56 | set(gca, 'YDir', 'normal'); % inverte virtical axis 57 | set(gca, 'FontName', 'Times', 'FontSize', 16); 58 | if nCh ~= 1 59 | title( sprintf('%dch spectrogram',iCh), 'FontName', 'Arial', 'FontSize', 16 ); 60 | end 61 | if (nargin < 2) 62 | xlabel('Time frame', 'FontName', 'Arial', 'FontSize', 16); 63 | ylabel('Frequency bin', 'FontName', 'Arial', 'FontSize', 16); 64 | else 65 | xlabel('Time [s]', 'FontName', 'Arial', 'FontSize', 16); 66 | ylabel('Frequency [Hz]', 'FontName', 'Arial', 'FontSize', 16); 67 | end 68 | end 69 | end 70 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -------------------------------------------------------------------------------- /showSpect3d.m: -------------------------------------------------------------------------------- 1 | function [figHdl,freqAx,timeAx] = showSpect3d(specgram,sampFreq,shiftSize) 2 | % 3 | % Show spectrogram from time-frequency matrix 4 | % This function supports both complex and nonnegative input and both 5 | % monaural and multichannel spectrograms. 6 | % For a multichannel spectrogram, the order of its indexes must be 7 | % [nfreqs x nframes x nch]. 8 | % Note that color map range is moderately defined. Tune by yourself. 9 | % 10 | % Coded by D. Kitamura (d-kitamura@ieee.org) 11 | % 12 | % See also: 13 | % http://d-kitamura.net 14 | % 15 | % [syntax] 16 | % [figHdl,freqAx,timeAx] = showSpect3d(specgram) 17 | % [figHdl,freqAx,timeAx] = showSpect3d(specgram,sampFreq,shiftSize) 18 | % 19 | % [inputs] 20 | % specgram: STFT matrix ([nFreqs x nTime] for a monaural spectrogram, 21 | % and [nFreqs x nTime x channels] for a multichannel 22 | % spectrogram, where number of frequency bins is sampFreq/2+1, 23 | % and both complex-valued and nonnegative spectrograms are supported.) 24 | % sampFreq: sampling frequency [Hz] 25 | % shiftSize: length of window shift 26 | % 27 | % [outputs] 28 | % figHdl: figure handle 29 | % freqAx: frequency axis (1 x nbin) 30 | % timeAx: time axis (1 x nframe) 31 | 32 | % Check errors and set default values 33 | [nFreq, nTime, nCh] = size(specgram); 34 | if ~isreal(specgram) % for complex spectrogram 35 | specgram = real(abs(specgram).^2); % calculate power spectrogram 36 | end 37 | if (nargin < 2) 38 | freqAx = 1:nFreq; 39 | timeAx = 1:nTime; 40 | elseif (nargin < 3) 41 | error('Too few input arguments.\nIf you input sampFreq, shiftSise is also required.\n'); 42 | else 43 | freqAx = linspace(0, sampFreq/2, nFreq); 44 | timeAx = linspace(0, shiftSize/sampFreq*nTime, nTime); 45 | end 46 | 47 | % Draw spectrogram surface 48 | logSpecgram = 10*log10(specgram); 49 | minVal = min(min(min(logSpecgram))); 50 | maxVal = max(max(max(logSpecgram))); 51 | for iCh = 1:nCh 52 | figHdl(iCh) = figure; 53 | surf(timeAx, freqAx, 10*log10(specgram(:,:,iCh)), 'edgecolor', 'none'); 54 | axis tight; box on; 55 | caxis([(minVal - maxVal)/6, maxVal]); % moderately define color map range 56 | view(0, 90); 57 | set(gca, 'FontName', 'Times', 'FontSize', 16); 58 | if nCh ~= 1 59 | title( sprintf('%dch spectrogram',iCh), 'FontName', 'Arial', 'FontSize', 16 ); 60 | end 61 | if (nargin < 2) 62 | xlabel('Time frame', 'FontName', 'Arial', 'FontSize', 16); 63 | ylabel('Frequency bin', 'FontName', 'Arial', 'FontSize', 16); 64 | else 65 | xlabel('Time [s]', 'FontName', 'Arial', 'FontSize', 16); 66 | ylabel('Frequency [Hz]', 'FontName', 'Arial', 'FontSize', 16); 67 | end 68 | end 69 | end 70 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% --------------------------------------------------------------------------------