├── ISTFT.m
├── PCA.m
├── README.md
├── SNRmix.m
├── STFT.m
├── example_STFTandISTFT.m
├── example_foCepstrum.m
├── example_foYin.m
├── example_musicSpect.m
├── foCepstrum.m
├── foYin.m
├── input
    ├── a.wav
    ├── bass.wav
    ├── drums.wav
    ├── guitar.wav
    └── piano.wav
├── musicSpect.m
├── showSpect.m
└── showSpect3d.m


/ISTFT.m:
--------------------------------------------------------------------------------
  1 | function sig = ISTFT(specgram,shiftSize,analyWin,orgLen)
  2 | %
  3 | % Inverse short-time Fourier transform
  4 | % Synthesis window is calculated based on minimal distortion principle,
  5 | % which is described below:
  6 | % D. Griffin and J. Lim, "Signal estimation from modified short-time
  7 | % Fourier transform," IEEE Transactions on Acoustics, Speech, and Signal
  8 | % Processing, vol. 32, no. 2, pp. 236-243, 1984.
  9 | %
 10 | % Coded by D. Kitamura (d-kitamura@ieee.org)
 11 | %
 12 | % See also:
 13 | % http://d-kitamura.net
 14 | %
 15 | % [syntax]
 16 | %   sig = ISTFT(specgram,shiftSize)
 17 | %   sig = ISTFT(specgram,shiftSize,analyWin)
 18 | %   sig = ISTFT(specgram,shiftSize,analyWin,orgLen)
 19 | %
 20 | % [inputs]
 21 | %     specgram: STFT of input signal (frequency bins (fftSize/2+1) x time frames x channels)
 22 | %    shiftSize: frame shift length
 23 | %     analyWin: analysis window function used in STFT (fftSize x 1) or choose used analysis window function from below:
 24 | %               'hamming'    : Hamming window (default)
 25 | %               'hann'       : von Hann window
 26 | %               'rectangular': rectangular window
 27 | %               'blackman'   : Blackman window
 28 | %               'sine'       : sine window
 29 | %       orgLen: length of original signal (before zero padding) (default: the same as that of output signal)
 30 | %
 31 | % [outputs]
 32 | %          sig: time-domain waveform of input spectrogram (signal x channels)
 33 | %
 34 | 
 35 | % Arguments check and set default values
 36 | arguments
 37 |     specgram (:,:,:) {mustBeNumeric}
 38 |     shiftSize (1,1) double {mustBeInteger(shiftSize)}
 39 |     analyWin
 40 |     orgLen (1,1) double {mustBeInteger(orgLen)}
 41 | end
 42 | 
 43 | % Error check
 44 | [nFreq, nFrame, nCh] = size(specgram);
 45 | fftSize = (nFreq-1) * 2; % fft length used in STFT
 46 | if nCh > nFreq; error('Input spectrogram might be wrong. The size of it must be (freq x frame x ch).\n'); end
 47 | if isreal(specgram); error('Input spectrogram might be wrong. It does not complex-valued matrix.\n'); end
 48 | if mod(nFreq,2) == 0; error('The number of rows of sectrogram must be an odd number because it is (fftSize/2)+1.\n'); end
 49 | if mod(fftSize,shiftSize) ~= 0; error('fftSize must be dividable by shiftSize.\n'); end
 50 | if nargin < 3
 51 |     analyWin = local_hamming(fftSize); % default window
 52 | else
 53 |     if isnumeric(analyWin)
 54 |         if size(analyWin, 1) ~= fftSize; error('The length of synthesis window must be the same as fftSize used in STFT.\n'); end
 55 |     else
 56 |         switch analyWin
 57 |             case 'hamming'; analyWin = local_hamming(fftSize);
 58 |             case 'hann'; analyWin = local_hann(fftSize);
 59 |             case 'rectangular'; analyWin = local_rectangular(fftSize);
 60 |             case 'blackman'; analyWin = local_blackman(fftSize);
 61 |             case 'sine'; analyWin = local_sine(fftSize);
 62 |             otherwise; error('Input window type is not supported. Type "help ISTFT" and check options.\n');
 63 |         end
 64 |     end
 65 | end
 66 | 
 67 | % Calculate optimal synthesis window based on minimal distortion principle
 68 | synthWin = local_optSynthWin(analyWin, shiftSize);
 69 | 
 70 | % Inverse STFT
 71 | tmpSig = zeros((nFrame-1)*shiftSize+fftSize, nCh); % memory allocation (zero-padded signal, length x nch)
 72 | specgram(1,:,:) = specgram(1,:,:)/2; % DC component
 73 | specgram(fftSize/2+1,:,:) = specgram(fftSize/2+1,:,:)/2; % Nyquist frequency component
 74 | for iCh = 1:nCh
 75 |     shortTimeSig = real(ifft(specgram(:,:,iCh), fftSize) .* synthWin) * 2;
 76 |     for iFrame = 1:nFrame % overlap add of short-time signals
 77 |         startPoint = (iFrame-1)*shiftSize;
 78 |         tmpSig(startPoint+1:startPoint+fftSize,iCh) = tmpSig(startPoint+1:startPoint+fftSize,iCh) + shortTimeSig(:,iFrame);
 79 |     end
 80 | end
 81 | sig = tmpSig(fftSize-shiftSize+1:(nFrame-1)*shiftSize+fftSize, :); % discard padded zeros at beginning of signal, which are added in STFT
 82 | 
 83 | % Discarding padded zeros at the end of the signal
 84 | if exist('orgLen', 'var')
 85 |     sig = sig(1:orgLen,:);
 86 | end
 87 | end
 88 | 
 89 | %% Local functions
 90 | function synthWin = local_optSynthWin(analyWin,shiftSize) % based on minimal distortion principle
 91 | fftSize = size(analyWin,1);
 92 | synthWin = zeros(fftSize,1);
 93 | for i = 1:shiftSize
 94 |     amp = 0;
 95 |     for j = 1:fftSize/shiftSize
 96 |         amp = amp + analyWin(i+(j-1)*shiftSize,1)*analyWin(i+(j-1)*shiftSize,1);
 97 |     end
 98 |     for j = 1:fftSize/shiftSize
 99 |         synthWin(i+(j-1)*shiftSize,1) = analyWin(i+(j-1)*shiftSize,1)/amp;
100 |     end
101 | end
102 | end
103 | 
104 | function win = local_hamming(fftSize)
105 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window)
106 | win = 0.54*ones(fftSize,1) - 0.46*cos(2.0*pi*t(1:fftSize));
107 | end
108 | 
109 | function win = local_hann(fftSize)
110 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window)
111 | win = max(0.5*ones(fftSize,1) - 0.5*cos(2.0*pi*t(1:fftSize)),eps);
112 | end
113 | 
114 | function win = local_rectangular(fftSize)
115 | win = ones(fftSize,1);
116 | end
117 | 
118 | function win = local_blackman(fftSize)
119 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window)
120 | win = max(0.42*ones(fftSize,1) - 0.5*cos(2.0*pi*t(1:fftSize)) + 0.08*cos(4.0*pi*t(1:fftSize)),eps);
121 | end
122 | 
123 | function win = local_sine(fftSize)
124 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window)
125 | win = max(sin(pi*t(1:fftSize)),eps);
126 | end
127 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


--------------------------------------------------------------------------------
/PCA.m:
--------------------------------------------------------------------------------
 1 | function [Y,Z,eigVal,eigVec] = PCA(X,dim,centering)
 2 | %
 3 | % Principal component analysis
 4 | % This function supports both PCAs with and without data centering.
 5 | %
 6 | % Coded by D. Kitamura (d-kitamura@ieee.org)
 7 | %
 8 | % See also:
 9 | % http://d-kitamura.net
10 | %
11 | % [syntax]
12 | %   [Y,Z,eigVal,eigVec] = PCA(X)
13 | %   [Y,Z,eigVal,eigVec] = PCA(X,dim)
14 | %   [Y,Z,eigVal,eigVec] = PCA(X,dim,centering)
15 | %
16 | % [inputs]
17 | %          X: input data ( K (variables) x N (samples) )
18 | %        dim: number of dimensions to which X is projected
19 | %  centering: centering X before applying PCA or not (true or false, default: true)
20 | %
21 | % [outputs]
22 | %          Y: output matrix (dim x N)
23 | %          Z: transformation matrix (dim x K, Y = ZX)
24 | %     eigVal: all eigenvalues (K x 1)
25 | %     eigVec: all eigenvectors (K x K)
26 | 
27 | % Check errors and set default values
28 | if (nargin < 3)
29 |     centering = true; % Default setting
30 | end
31 | 
32 | [K,N] = size(X); % variables x samples
33 | if centering
34 |     cX = X - mean(X,2); % Data centering (using implicit expansion)
35 | %     cX = X - repmat(mean(X,2),1,N); % Data centering (prior to R2016b)
36 | else
37 |     cX = X; % Do not apply centering
38 | end
39 | V = cX*(cX')/N; % Covariance matrix of data matrix
40 | [P,D] = eig(V); % Eigenvalue decomposition (V = P*D*inv(P), P includes eigenvectors and D is a diagonal matrix with eigenvalues)
41 | 
42 | % Sort eigenvalues in descending order
43 | eigVal = diag(D);
44 | [eigVal,idx] = sort(eigVal,'descend');
45 | D = D(idx,idx);
46 | P = P(:,idx);
47 | 
48 | % Pick up top-dim eigenvalues and their eigenvectors
49 | reducedD = D(1:dim,1:dim);
50 | reducedP = P(:,1:dim); 
51 | 
52 | Y = reducedP'*cX; % Output matrix
53 | Z = reducedP'; % Transformation matrix
54 | eigVec = P; % All eigenvectors
55 | end
56 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Tools for audio signal processing
 2 | 
 3 | ## About
 4 | Sample MATLAB script of audio signal processing tools including short-time Fourier transform (STFT) and its inversion.
 5 | 
 6 | ## Contents
 7 | - input [dir]:              includes test audio signals (dry source signals)
 8 | - example_foCepstrum.m:     example script that estimates fo (fundamental frequency) based on cepstrum analysis
 9 | - example_foYin.m:          example script that estimates fo (fundamental frequency) based on YIN
10 | - example_musicSpect.m:     example script that calculates MUSIC spectrum
11 | - example_STFTandISTFT.m:   example script that applies SNRmix, STFT, and inverse STFT
12 | - foCepstrum.m:             estimate fo (fundamental frequency) based on cepstrum analysis
13 | - foYin.m:                  estimate fo (fundamental frequency) based on YIN
14 | - ISTFT.m:                  inverse short-time Fourier transform
15 | - musicSpect.m:             calculation of MUSIC spectrum
16 | - PCA.m:                    principal component analysis
17 | - showSpect.m:              show spectrogram
18 | - SNRmix.m:                 mix two signals with a desired signal-to-noise ratio
19 | - STFT.m:                   short-time Fourier transform
20 | 
21 | ## Usage Note
22 | STFT returns only 0Hz to Nyquist frequency components to avoid redundant calculation.
23 | 
24 | In inverse STFT, optimal synthesis window is calculated and applied. This optimal synthesis window is based on a minimal distortion principle described below:
25 | * D. Griffin and J. Lim, "Signal estimation from modified short-time Fourier transform," IEEE Transactions on Acoustics, Speech, and Signal Processing, vol. 32, no. 2, pp. 236-243, 1984.
26 | 
27 | ## See Also
28 | * HP: http://d-kitamura.net


--------------------------------------------------------------------------------
/SNRmix.m:
--------------------------------------------------------------------------------
 1 | function [mix,outSignal,outNoise,coef] = SNRmix(inSignal,inNoise,SNR)
 2 | %
 3 | % Mixing two signals with a desired signal-to-noise ratio (SNR)
 4 | % This function supports multichannel signals.
 5 | %
 6 | % Coded by D. Kitamura (d-kitamura@ieee.org)
 7 | %
 8 | % See also:
 9 | % http://d-kitamura.net
10 | %
11 | % [syntax]
12 | %   [mix,outSignal,outNoise,coef] = SNRmix(inSignal,inNoise)
13 | %   [mix,outSignal,outNoise,coef] = SNRmix(inSignal,inNoise,SNR)
14 | %
15 | % [inputs]
16 | %    inSignal: input signal (length x ch)
17 | %     inNoise: input noise (length x ch)
18 | %         SNR: desired SNR [dB] (default = 0)
19 | %
20 | % [outputs]
21 | %         mix: mixed signal with desired SNR (length x ch)
22 | %   outSignal: signal in the mixture signal (length x ch)
23 | %    outNoize: noise in the mixture signal (length x ch)
24 | %        coef: mixing coefficient (scalar)
25 | 
26 | % Check errors and set default values
27 | [length, nch ] = size( inSignal );
28 | if size(inNoise,1) ~= length || size(inNoise,2) ~= nch
29 |     error('The size of two input signals are not the same.\n')
30 | end
31 | if (nargin<2)
32 |     error('Too few input arguments.\n');
33 | end
34 | if (nargin<3)
35 |     SNR = 0;
36 | end
37 | 
38 | if length < nch
39 |     [mix,outSignal,outNoise,coef] = SNRmix(inSignal.',inNoise.',SNR);
40 | else
41 |     squareSums = zeros(2,1);
42 |     for m = 1 : nch
43 |         squareSums(1,1) = squareSums(1,1) + ( inSignal(:,m)' * inSignal(:,m) );
44 |         squareSums(2,1) = squareSums(2,1) + ( inNoise(:,m)' * inNoise(:,m) );
45 |     end
46 |     inSNR = 10*log10( ( squareSums(1,:) ) ./ ( squareSums(2,:) ) );
47 |     coef = ( 10 ^ ( ( inSNR - SNR )  / 20 ) );
48 |     outSignal = inSignal;
49 |     outNoise = inNoise .* coef;
50 |     mix = outSignal + outNoise;
51 |     normCoef = max(max(abs(mix)));
52 |     if  normCoef >= 1
53 |         mix = mix ./ normCoef;
54 |         outNoise = outNoise ./ normCoef;
55 |         outSignal = outSignal ./ normCoef;
56 |         fprintf('The signals are normalized in SNRmix.\n');
57 |     end
58 | end
59 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


--------------------------------------------------------------------------------
/STFT.m:
--------------------------------------------------------------------------------
  1 | function [specgram,analyWin,sigLen] = STFT(sig,fftSize,shiftSize,analyWin)
  2 | %
  3 | % Short-time Fourier transform
  4 | %
  5 | % Coded by D. Kitamura (d-kitamura@ieee.org)
  6 | %
  7 | % See also:
  8 | % http://d-kitamura.net
  9 | %
 10 | % [syntax]
 11 | %   [specgram,analyWin,sigLen] = STFT(sig,fftSize,shiftSize)
 12 | %   [specgram,analyWin,sigLen] = STFT(sig,fftSize,shiftSize,analyWin)
 13 | %
 14 | % [inputs]
 15 | %          sig: input signal (length x channels)
 16 | %      fftSize: window length [points] in STFT (scalar, even number)
 17 | %    shiftSize: shift length [points] in STFT (scalar)
 18 | %     analyWin: arbitrary analysis window function in STFT (fftSize x 1) or choose used analysis window function from below:
 19 | %               'hamming'    : Hamming window (default)
 20 | %               'hann'       : von Hann window
 21 | %               'rectangular': rectangular window
 22 | %               'blackman'   : Blackman window
 23 | %               'sine'       : sine window
 24 | %
 25 | % [outputs]
 26 | %     specgram: spectrogram of input signal (frequency bins (fftSize/2+1) x time frames x channels)
 27 | %     analyWin: analysis window function used in STFT (fftSize x 1) and can be used for calculating optimal synthesis window
 28 | %       sigLen: length of original signal without zero padding
 29 | %
 30 | 
 31 | % Arguments check and set default values
 32 | arguments
 33 |     sig (:,:) double
 34 |     fftSize (1,1) double {mustBeInteger(fftSize)}
 35 |     shiftSize (1,1) double {mustBeInteger(shiftSize)}
 36 |     analyWin
 37 | end
 38 | 
 39 | % Errors check
 40 | [sigLen, nCh] = size(sig); % get signal length and number of channels
 41 | if sigLen < nCh; error('The size of input signal might be wrong. The signal must be length x channels size.\n'); end
 42 | if mod(fftSize,2) ~= 0; error('fftSize must be an even number.\n'); end
 43 | if mod(fftSize,shiftSize) ~= 0; error('fftSize must be dividable by shiftSize.\n'); end
 44 | if nargin < 4
 45 |     analyWin = local_hamming(fftSize); % default window
 46 | else
 47 |     if isnumeric(analyWin)
 48 |         if size(analyWin, 1) ~= fftSize; error('The length of analysis window must be the same as fftSize.\n'); end
 49 |     else
 50 |         switch analyWin
 51 |             case 'hamming'; analyWin = local_hamming(fftSize);
 52 |             case 'hann'; analyWin = local_hann(fftSize);
 53 |             case 'rectangular'; analyWin = local_rectangular(fftSize);
 54 |             case 'blackman'; analyWin = local_blackman(fftSize);
 55 |             case 'sine'; analyWin = local_sine(fftSize);
 56 |             otherwise; error('Input winType is not supported. Type "help STFT" and check options.\n');
 57 |         end
 58 |     end
 59 | end
 60 | 
 61 | % Pad zeros at the beginning and ending of the input signal
 62 | zeroPadSize = fftSize - shiftSize; % size of zero padding
 63 | padSig = [zeros(zeroPadSize,nCh); sig; zeros(fftSize,nCh)]; % padding zeros
 64 | padSigLen = size(padSig,1); % zero-padded signal length
 65 | 
 66 | % Calculate STFT
 67 | nFrame = floor((padSigLen - fftSize + shiftSize) / shiftSize); % number of time frames in spectrogram
 68 | specgram = zeros(fftSize/2+1, nFrame, nCh); % memory allocation (nFreq x nFrames x nCh)
 69 | shortTimeSig = zeros(fftSize, nFrame); % memory allocation (nFreq x nFrames x nCh)
 70 | for iCh = 1:nCh
 71 |     for iFrame = 1:nFrame % get short-time signals by framing
 72 |         startPoint = (iFrame-1)*shiftSize; % start point of short-time signal
 73 |         shortTimeSig(:,iFrame) = padSig(startPoint+1:startPoint+fftSize, iCh); % store short-time signal
 74 |     end
 75 |     tmp = fft(shortTimeSig .* analyWin); % get DFT spectra of windowed short-time signals
 76 |     specgram(:,:,iCh) = tmp(1:fftSize/2+1, :); % store spectrum (only from DC to Nyquist frequency components)
 77 | end
 78 | end
 79 | 
 80 | %% Local functions
 81 | function win = local_hamming(fftSize)
 82 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window)
 83 | win = 0.54*ones(fftSize,1) - 0.46*cos(2.0*pi*t(1:fftSize));
 84 | end
 85 | 
 86 | function win = local_hann(fftSize)
 87 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window)
 88 | win = max(0.5*ones(fftSize,1) - 0.5*cos(2.0*pi*t(1:fftSize)),eps);
 89 | end
 90 | 
 91 | function win = local_rectangular(fftSize)
 92 | win = ones(fftSize,1);
 93 | end
 94 | 
 95 | function win = local_blackman(fftSize)
 96 | t = linspace(0, 1,fftSize+1).'; % periodic (produce L+1 window and return L window)
 97 | win = max(0.42*ones(fftSize,1) - 0.5*cos(2.0*pi*t(1:fftSize)) + 0.08*cos(4.0*pi*t(1:fftSize)),eps);
 98 | end
 99 | 
100 | function win = local_sine(fftSize)
101 | t = linspace(0, 1, fftSize+1).'; % periodic (produce L+1 window and return L window)
102 | win = max(sin(pi*t(1:fftSize)),eps);
103 | end
104 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


--------------------------------------------------------------------------------
/example_STFTandISTFT.m:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | % Sample program for applying STFT and ISTFT to audio signals             %
 3 | %                                                                         %
 4 | % Coded by D. Kitamura (d-kitamura@ieee.org)                              %
 5 | %                                                                         %
 6 | % See also:                                                               %
 7 | % http://d-kitamura.net                                                   %
 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 9 | 
10 | clear; % clear memory (workspace variables)
11 | close all; % close all plot figures
12 | 
13 | % Parameters
14 | wavPath1 = sprintf('./input/drums.wav'); % file path of wav signal
15 | wavPath2 = sprintf('./input/piano.wav'); % file path of wav signal
16 | 
17 | % Read audio files
18 | [s1,fs] = audioread(wavPath1); % fs: sampling frequency [Hz], s1 is a vector of size "length x channels"
19 | [s2,fs] = audioread(wavPath2); % s1, s2, and s3 are column vectors because sample wave files are monaural
20 | 
21 | % Mixing with SNR = 0 [dB]
22 | SNR = 0;
23 | [x,s1,s2,coef] = SNRmix(s1,s2,SNR); % mixture signal of size "1 x length"
24 | 
25 | % Apply short-time Fourier transform (STFT)
26 | fftSize = 2048;
27 | shiftSize = fftSize/4;
28 | winType = 'hamming';
29 | [S1,analyWin,orgLen1] = STFT(s1,fftSize,shiftSize,winType);
30 | [S2,analyWin,orgLen2] = STFT(s2,fftSize,shiftSize,winType);
31 | [X,analyWin,orgLenX] = STFT(x,fftSize,shiftSize,winType);
32 | 
33 | % Show spectrograms
34 | showSpect(S1,fs,shiftSize);
35 | showSpect(S2,fs,shiftSize);
36 | showSpect(X,fs,shiftSize);
37 | 
38 | % Apply inverse STFT (ISTFT)
39 | y1 = ISTFT(S1,shiftSize,analyWin,orgLen1);
40 | y2 = ISTFT(S2,shiftSize,analyWin,orgLen2);
41 | z = ISTFT(X,shiftSize,analyWin,orgLenX);
42 | 
43 | % Numerical error caused by calculations in STFT and ISTFT
44 | err1 = sum((s1-y1).^2)
45 | err2 = sum((s2-y2).^2)
46 | err3 = sum((x-z).^2)
47 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
48 | 


--------------------------------------------------------------------------------
/example_foCepstrum.m:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | % Sample program for estimating Fo (fundamental frequency) based on       %
 3 | % cepstrum analysis                                                       %
 4 | %                                                                         %
 5 | % Coded by D. Kitamura (d-kitamura@ieee.org)                              %
 6 | %                                                                         %
 7 | % See also:                                                               %
 8 | % http://d-kitamura.net                                                   %
 9 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
10 | 
11 | clear; close all; clc;
12 | addpath('./input'); 
13 | 
14 | % parameters
15 | fileName = 'a.wav'; % audio file name
16 | fftSize = 4096; % FFT length (short-time length) for analysis 
17 | foMin = 80; % minimum frequency [Hz] for analysis
18 | foMax = 400; % maximum frequency [Hz] for analysis
19 | 
20 | % Read wav fale
21 | [signal,fs] = audioread(fileName); % fs is a sampling frequency [Hz]
22 | 
23 | % Fo estimation
24 | estFo = foCepstrum(signal,fs,foMin,foMax,fftSize,'rectangular');
25 | 
26 | fprintf('Estimated Fo is %.5f [Hz].\n', estFo);
27 | 


--------------------------------------------------------------------------------
/example_foYin.m:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | % Sample program for estimating Fo (fundamental frequency) based on YIN   %
 3 | %                                                                         %
 4 | % Coded by D. Kitamura (d-kitamura@ieee.org)                              %
 5 | %                                                                         %
 6 | % See also:                                                               %
 7 | % http://d-kitamura.net                                                   %
 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 9 | 
10 | clear; close all; clc;
11 | 
12 | % Parameter setting
13 | samplingFreq = 1000; % sampling frequency [Hz]
14 | samplingTime = 1/samplingFreq; % sampling time [s]
15 | sigTime = 0.5; % signal length [s]
16 | timeAxis = 0:samplingTime:sigTime; % time axis
17 | fo = 15; % signal frequency [Hz]
18 | omega = 2*pi*fo; % angular frequency [rad/s]
19 | sigma = 0.8; % amplitude of noise signal
20 | threshold = 0.1; % threshold value in YIN (b/w 0 and 1)
21 | foMin = 10; % minimum frequency for Fo estimation [Hz]
22 | foMax = 20; % maximum frequency for Fo estimation [Hz]
23 | 
24 | % Produce signals
25 | sig = sin(omega*timeAxis).'; % sine wave signal
26 | noisySig = sig + sigma*randn(size(timeAxis)).'; % observed noisy signal
27 | 
28 | % Plot signals
29 | plot(timeAxis, noisySig); hold on; plot(timeAxis, sig, 'LineWidth', 2); % plotting signals
30 | xlabel('Time [s]'); ylabel('Amplitude'); % add axis labels
31 | legend('Observed noisy signal', 'True signal', 'Location', 'northeast'); % add legends
32 | 
33 | % Fundamental frequency estimation based on YIN
34 | estFo = foYin(noisySig,threshold,samplingFreq, foMin, foMax);
35 | fprintf('True Fo: %.2f Hz\nEstimated Fo: %.2f Hz\nError rate: %.2f %%\n', fo, estFo, 100*abs(fo-estFo)/fo);
36 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
37 | 


--------------------------------------------------------------------------------
/example_musicSpect.m:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | % Sample program for calculating MUSIC spectrum                           %
 3 | %                                                                         %
 4 | % Coded by D. Kitamura (d-kitamura@ieee.org)                              %
 5 | %                                                                         %
 6 | % See also:                                                               %
 7 | % http://d-kitamura.net                                                   %
 8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 9 | 
10 | clear; close all; clc;
11 | 
12 | % Fix random seed
13 | seed = 2; % seed
14 | RandStream.setGlobalStream(RandStream('mt19937ar','Seed',seed)); % set pseudo random stream (mt19937ar)
15 | 
16 | % Parameters
17 | fs = 100; % sampling frequency [Hz]
18 | n = (0:1/fs:10)'; % discrete time index
19 | f1 = 15; % frequency 1 [Hz]
20 | f2 = 35; % frequency 2 [Hz]
21 | sigma = 5; % amplitude of white noise
22 | order = 2; % number of (real-valued) sinusoidal waves
23 | windowSize = 512; % length of short-time signal (frame)
24 | fftSize = 512; % FFT length for calculating MUSIC spectrum
25 | shiftSize = 1; % shift length for short-time signals (frames)
26 | 
27 | % Produce noisy signal
28 | sig = cos(2*pi*f1*n) + sin(2*pi*f2*n) + sigma*randn(size(n));
29 | 
30 | % Plot signal
31 | figure; plot(n,sig);
32 | xlabel('Time'); ylabel('Amplitude');
33 | title('Noisy signal'); grid on;
34 | 
35 | % Plot amplitude spectrum of signal
36 | figure; plot(20*log10(abs(fft(sig))));
37 | xlabel('Frequency [Hz]'); ylabel('Power [dB]');
38 | title('Fourier power spectrum'); grid on;
39 | 
40 | % Calculation of MUSIC spectrum based on sub-space method
41 | [P,f] = musicSpect(sig,order,fs,windowSize,fftSize,shiftSize);
42 | figure; plot(f,20*log10(P));
43 | xlim([0,fs/2]);
44 | xlabel('Frequency [Hz]'); ylabel('Power [dB]');
45 | title('Pseudospectrum Estimate via MUSIC'); grid on;
46 | 
47 | % MUSIC spectrum using MATLAB built-in function (see: https://jp.mathworks.com/help/signal/ref/pmusic.html)
48 | % short-time signals are produced with shiftSize=1 and apply SVD
49 | dim = 2*order; % 2 times of number of sinusoidal waves when signal is real-valued
50 | fftSize = windowSize; % FFT size for calculating MUSIC spectrum
51 | [P,f] = pmusic(sig,dim,fftSize,fs,windowSize);
52 | figure; plot(f,20*log10(abs(P)));
53 | xlabel('Frequency [Hz]'); ylabel('Power [dB]');
54 | title('Pseudospectrum Estimate via MUSIC (MATLAB built-in)'); grid on;
55 | 


--------------------------------------------------------------------------------
/foCepstrum.m:
--------------------------------------------------------------------------------
  1 | function estFo = foCepstrum(signal,fs,foMin,foMax,fftSize,window)
  2 | %
  3 | % Estimation of Fo (fundamental frequency) based on cepstrum analysis
  4 | %
  5 | % Coded by D. Kitamura (d-kitamura@ieee.org)
  6 | %
  7 | % See also:
  8 | % http://d-kitamura.net
  9 | %
 10 | % [syntax]
 11 | %   estFo = foCepstrum(signal,fs,foMin,foMax)
 12 | %   estFo = foCepstrum(signal,fs,foMin,foMax,fftSize)
 13 | %   estFo = foCepstrum(signal,fs,foMin,foMax,fftSize,window)
 14 | %
 15 | % [inputs]
 16 | %       signal: input signal (sigLen x 1)
 17 | %           fs: sampling frequency [Hz]
 18 | %        foMin: minimum frequency for analysis (default: 0 [Hz])
 19 | %        foMax: maximum frequency for analysis (default: fs/2 [Hz])
 20 | %      fftSize: length of short-time signal for calculating cepstrum (scaler)
 21 | %       window: arbitrary analysis window function (fftSize x 1) or choose function from below:
 22 | %               'hamming'    : Hamming window (default)
 23 | %               'hann'       : von Hann window
 24 | %               'rectangular': rectangular window
 25 | %               'blackman'   : Blackman window
 26 | %               'sine'       : sine window
 27 | %
 28 | % [outputs]
 29 | %        estFo: estimated fo (scaler [Hz])
 30 | %
 31 | 
 32 | [sigLen,nCh] = size(signal);
 33 | cepMax = round(fs/foMin+1); % Maximum order of cepstrum for analysis
 34 | cepMin = round(fs/foMax+1); % Minimum order of cepstrum for analysis
 35 | 
 36 | % Check errors and set default values
 37 | if (nargin < 4)
 38 |     error('Too few input arguments.\n');
 39 | end
 40 | if nCh > sigLen
 41 |     signal = signal';
 42 | end
 43 | if nCh ~= 1
 44 |     error('foCepstrum only supports single-channel signal.\n');
 45 | end
 46 | if (nargin < 5)
 47 |     fftSize = 2^(nextpow2(round(sigLen/2))); % default analysis length
 48 | end
 49 | if fftSize < cepMax
 50 |     error('fftSize (analysis length) is too short or foMin is too low.\n');
 51 | end
 52 | if (nargin < 6)
 53 |     window = hamming_local(fftSize); % default analysis window
 54 | else
 55 |     if isnumeric(window)
 56 |         if size(window, 1) ~= fftSize
 57 |             error('The length of analysis window must be the same as that of fftSize.\n');
 58 |         end
 59 |     else
 60 |         switch window
 61 |             case 'hamming'
 62 |                 window = hamming_local(fftSize);
 63 |             case 'hann'
 64 |                 window = hann_local(fftSize);
 65 |             case 'rectangular'
 66 |                 window = rectangular_local(fftSize);
 67 |             case 'blackman'
 68 |                 window = blackman_local(fftSize);
 69 |             case 'sine'
 70 |                 window = sine_local(fftSize);
 71 |             otherwise
 72 |                 error('Input window type is not supported. Check options.\n')
 73 |         end
 74 |     end
 75 | end
 76 | 
 77 | % Find a short-time signal whose power is maximum
 78 | powMax = 0; indStart = 1;
 79 | for ind = 1:fftSize:sigLen-fftSize
 80 |     pow = sum(signal(ind:ind+fftSize-1).^2); % signal power of short-time signal
 81 |     if pow > powMax
 82 |         powMax = pow;
 83 |         indStart = ind;
 84 |     end
 85 | end
 86 | analySignal = signal(indStart:indStart+fftSize-1); % short-time signal
 87 | 
 88 | % Calculate cepstrum of short-time signal
 89 | windowedAnalySignal = analySignal.*window; % windowing
 90 | spectrum = fft(windowedAnalySignal); % FFT
 91 | logAbsSpectrum = log(max(abs(spectrum),eps)); % absolute, eps flooring, and log
 92 | cepstrum = real(ifft(logAbsSpectrum)); % inverse FFT
 93 | 
 94 | % Find maximum cepstrum and get its order (quefrency)
 95 | [~, indCep] = max(cepstrum(cepMin:cepMax));
 96 | maxQuef = indCep + cepMin - 2;
 97 | 
 98 | % Convert quefrency to frequency
 99 | estFo = fs/maxQuef;
100 | 
101 | end
102 | 
103 | %% Local functions
104 | function analyWindow = hamming_local(fftSize)
105 | t = linspace(0,1,fftSize+1).'; % periodic (produce L+1 window and return L window)
106 | analyWindow = 0.54*ones(fftSize,1) - 0.46*cos(2.0*pi*t(1:fftSize));
107 | end
108 | 
109 | function analyWindow = hann_local(fftSize)
110 | t = linspace(0,1,fftSize+1).'; % periodic (produce L+1 window and return L window)
111 | analyWindow = max(0.5*ones(fftSize,1) - 0.5*cos(2.0*pi*t(1:fftSize)),eps);
112 | end
113 | 
114 | function analyWindow = rectangular_local(fftSize)
115 | analyWindow = ones(fftSize,1);
116 | end
117 | 
118 | function analyWindow = blackman_local(fftSize)
119 | t = linspace(0,1,fftSize+1).'; % periodic (produce L+1 window and return L window)
120 | analyWindow = max(0.42*ones(fftSize,1) - 0.5*cos(2.0*pi*t(1:fftSize)) + 0.08*cos(4.0*pi*t(1:fftSize)),eps);
121 | end
122 | 
123 | function analyWindow = sine_local(fftSize)
124 | t = linspace(0,1,fftSize+1).'; % periodic (produce L+1 window and return L window)
125 | analyWindow = max(sin(pi*t(1:fftSize)),eps);
126 | end
127 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


--------------------------------------------------------------------------------
/foYin.m:
--------------------------------------------------------------------------------
 1 | function estFo = foYin(sig,threshold,sampFreq,foMin,foMax)
 2 | %
 3 | % Estimation of Fo (fundamental frequency) based on YIN
 4 | %
 5 | % Coded by D. Kitamura (d-kitamura@ieee.org)
 6 | %
 7 | % See also:
 8 | % http://d-kitamura.net
 9 | % M. Mauch and S. Dixon, "PYIN: A fundamental frequency estimator using probabilistic threshold distributions," Proc. ICASSP, pp. 659-663, 2014.
10 | % A. Cheveigne and H. Kawahara, "YIN, a fundamental frequency estimator for speech and music," The Journal of the Acoustical Society of America, vol. 111, no. 4, pp. 1917–1930, 2002.
11 | %
12 | % [syntax]
13 | %   estFo = foYin(sig,threshold,fs)
14 | %   estFo = foYin(sig,threshold,fs,foMin)
15 | %   estFo = foYin(sig,threshold,fs,foMin,foMax)
16 | %
17 | % [inputs]
18 | %          sig: input signal (sigLen x 1)
19 | %     sampFreq: sampling frequency [Hz]
20 | %        foMin: minimum frequency for analysis (default: 0 [Hz])
21 | %        foMax: maximum frequency for analysis (default: fs/2 [Hz])
22 | %
23 | % [outputs]
24 | %        estFo: estimated fo (scaler [Hz])
25 | %
26 | 
27 | % Check arguments and set default values
28 | arguments
29 |     sig (:,1) double
30 |     threshold (1,1) double
31 |     sampFreq (1,1) double
32 |     foMin (1,1) double = 0;
33 |     foMax (1,1) double = sampFreq/2;
34 | end
35 | 
36 | % Check errors
37 | if threshold < 0; error('Threshold value in YIN must be nonnegative.\n'); end
38 | if sampFreq <= 0; error('Sampling frequency must be positive.\n'); end
39 | if foMin < 0; error('Minimum frequency must be nonnegative.\n'); end
40 | if foMax <= 0; error('Maximum frequency must be positive.\n'); end
41 | 
42 | % Initialization
43 | sigLen = size(sig, 1); % signal length
44 | sampTime = 1/sampFreq; % sampling time
45 | lagRange = floor(sigLen/2); % range of lag (denoted W in the papers, a half of signal length)
46 | laggedSig = zeros(lagRange, lagRange); % matrix for lagged signals
47 | indMin = round(1/foMax/sampTime); % index that corresponds to foMax (minimum of estimated lag)
48 | indMax = round(1/foMin/sampTime); % index that corresponds to foMin (maximum of estimated lag)
49 | if indMax > lagRange; indMax = lagRange; end % replace indMax to lagRange when indMax exceeds maximum index
50 | 
51 | % Calculate cumulative-mean-normalized squared difference between original and lagged signals
52 | for lag = 1:lagRange
53 |     laggedSig(:,lag) = sig(1+lag:lagRange+lag,1); % lagged signals (rows: signal, columns: lag)
54 | end
55 | diff = sum((sig(1:lagRange,1) - laggedSig).^2, 1).'; % squared difference between original and lagged signals (with each lag length)
56 | cumMeanDiff = cumsum(diff)./(1:1:lagRange).'; % cumulative mean
57 | normDiff = diff./cumMeanDiff; % cumulative-mean normalization
58 | 
59 | % Estimation of fundamental frequency Fo
60 | validInd = find(normDiff(indMin:indMax,1) <= threshold); % get indexes of elements that satisfy normDiff<=threshold in the range [indMin, indMax]
61 | if isempty(validInd) % if there is no index that satisfies normDiff<=threshold in the range [indMin, indMax]
62 |     [~, minInd] = min(normDiff(indMin:indMax,1)); % get index whose normDiff is the minumum
63 |     estTo = (minInd(1)+indMin-1) * sampTime; % estimated fundamental period To, where indMin-1 is added because minInd(1) is an index for normDiff(indMin:indMax,1) (limited range)
64 | else % if there exist indexes that satisfy normDiff<=threshold in the range [indMin, indMax]
65 |     estTo = (validInd(1)+indMin-1) * sampTime; % estimated fundamental period To, where indMin-1 is added because minInd(1) is an index for normDiff(indMin:indMax,1) (limited range)
66 | end
67 | estFo = 1/estTo; % estimated fundamental frequency Fo
68 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


--------------------------------------------------------------------------------
/input/a.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d-kitamura/audioSignalProcessTools/b676d059e1afa5dcfeab6335941b61cb35d385ff/input/a.wav


--------------------------------------------------------------------------------
/input/bass.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d-kitamura/audioSignalProcessTools/b676d059e1afa5dcfeab6335941b61cb35d385ff/input/bass.wav


--------------------------------------------------------------------------------
/input/drums.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d-kitamura/audioSignalProcessTools/b676d059e1afa5dcfeab6335941b61cb35d385ff/input/drums.wav


--------------------------------------------------------------------------------
/input/guitar.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d-kitamura/audioSignalProcessTools/b676d059e1afa5dcfeab6335941b61cb35d385ff/input/guitar.wav


--------------------------------------------------------------------------------
/input/piano.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d-kitamura/audioSignalProcessTools/b676d059e1afa5dcfeab6335941b61cb35d385ff/input/piano.wav


--------------------------------------------------------------------------------
/musicSpect.m:
--------------------------------------------------------------------------------
 1 | function [musicSpect,freqAxis] = musicSpect(signal,order,fs,windowSize,fftSize,shiftSize)
 2 | %
 3 | % Estimation of MUSIC spectrum based on sub-space method
 4 | %
 5 | % Coded by D. Kitamura (d-kitamura@ieee.org)
 6 | %
 7 | % See also:
 8 | % http://d-kitamura.net
 9 | %
10 | % [syntax]
11 | %   [musicSpect,freqAxis] = musicSpect(signal,order,fs,windowSize)
12 | %   [musicSpect,freqAxis] = musicSpect(signal,order,fs,windowSize,fftSize)
13 | %   [musicSpect,freqAxis] = musicSpect(signal,order,fs,windowSize,fftSize,shiftSize)
14 | %
15 | % [inputs]
16 | %       signal: input signal (sigLen x 1)
17 | %        order: number of (real-valued) sinusoidal waves in signal (scaler)
18 | %           fs: sampling frequency [Hz]
19 | %   windowSize: length of short-time signal frame (scaler)
20 | %      fftSize: length of Fourier transform for calculating MUSIC spectrum (scaler)
21 | %    shiftSize: shift length of frames (default: 1)
22 | %
23 | % [outputs]
24 | %   musicSpect: pseudo spectrum (MUSIC spectrum) of input signal (frequency bins (windowSize) x 1)
25 | %     freqAxis: frequency axis vector (windowSize x 1)
26 | %
27 | 
28 | % Check errors and set default values
29 | if (nargin < 4)
30 |     error('Too few input arguments.\n');
31 | end
32 | if (size(signal,2) > 1)
33 |     error ('Input argument "signal" must be a column vector.\n');
34 | end
35 | if ~isreal(signal)
36 |     error ('Input argument "signal" must be a real-valued vector.\n');
37 | end
38 | if (nargin < 5)
39 |     fftSize = windowSize; % default
40 | end
41 | if (nargin < 6)
42 |     shiftSize = 1; % default
43 | end
44 | 
45 | sigLen = size(signal,1);
46 | 
47 | % short-time framing (break signal into short-time signal pieces)
48 | sigZeroPad = [signal;zeros(windowSize-1,1)]; % zero padding
49 | nFrames = ceil(sigLen/shiftSize); % number of frames
50 | shortTimeSig = zeros(windowSize, nFrames); % memory allocation
51 | for frame = 1:nFrames
52 |     startPoint = (frame-1)*shiftSize+1;
53 |     endPoint = startPoint+windowSize-1;
54 |     shortTimeSig(:,frame) = sigZeroPad(startPoint:endPoint);
55 | end
56 | 
57 | % MUSIC spectrum calculation
58 | covMat = (shortTimeSig*shortTimeSig')/nFrames; % sample covariance matrix
59 | [eigVec,eigVal] = eig(covMat); % eigenvalue decomposition (covMat = eigVec * eigVal * eigVec')
60 | [~, ind] = sort(diag(eigVal), 'descend'); % sort eigenvalues in descending order and get sorted index
61 | sortEigVec = eigVec(:,ind); % sort eigenvectors (column vectors of eigVec) in descending order
62 | noiseEigVec = sortEigVec(:,2*order+1:end); % noise eigenvectors
63 | fftNoiseEigVec = abs(fft(noiseEigVec,fftSize)).^2; % power spectrum of noise eigenvectors (the denominator of MUSIC spectrum is a sum of inner product of noise eigenvector and Fourier basis, which is equal to DFT)
64 | musicSpect = 1./sum(fftNoiseEigVec,2); % pseudo spectrum (MUSIC spectrum)
65 | freqAxis = 0:fs/windowSize:fs-1/windowSize; % frequency axis
66 | 
67 | end
68 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


--------------------------------------------------------------------------------
/showSpect.m:
--------------------------------------------------------------------------------
 1 | function [figHdl,freqAx,timeAx] = showSpect(specgram,sampFreq,shiftSize)
 2 | %
 3 | % Show spectrogram from time-frequency matrix 
 4 | % This function supports both complex and nonnegative input and both
 5 | % monaural and multichannel spectrograms.
 6 | % For a multichannel spectrogram, the order of its indexes must be
 7 | % [nfreqs x nframes x nch].
 8 | % Note that color map range is moderately defined. Tune by yourself.
 9 | %
10 | % Coded by D. Kitamura (d-kitamura@ieee.org)
11 | %
12 | % See also:
13 | % http://d-kitamura.net
14 | %
15 | % [syntax]
16 | %   [figHdl,freqAx,timeAx] = showSpect3d(specgram)
17 | %   [figHdl,freqAx,timeAx] = showSpect3d(specgram,sampFreq,shiftSize)
18 | %
19 | % [inputs]
20 | %   specgram: STFT matrix ([nFreqs x nTime] for a monaural spectrogram, 
21 | %             and [nFreqs x nTime x channels] for a multichannel 
22 | %             spectrogram, where number of frequency bins is sampFreq/2+1, 
23 | %             and both complex-valued and nonnegative spectrograms are supported.)
24 | %   sampFreq: sampling frequency [Hz]
25 | %  shiftSize: length of window shift
26 | %
27 | % [outputs]
28 | %     figHdl: figure handle
29 | %     freqAx: frequency axis (1 x nbin)
30 | %     timeAx: time axis (1 x nframe)
31 | 
32 | % Check errors and set default values
33 | [nFreq, nTime, nCh] = size(specgram);
34 | if ~isreal(specgram) % for complex spectrogram
35 |     specgram = real(abs(specgram).^2); % calculate power spectrogram
36 | end
37 | if (nargin < 2)
38 |     freqAx = 1:nFreq;
39 |     timeAx = 1:nTime;
40 | elseif (nargin < 3)
41 |     error('Too few input arguments.\nIf you input sampFreq, shiftSise is also required.\n');
42 | else
43 |     freqAx = linspace(0, sampFreq/2, nFreq);
44 |     timeAx = linspace(0, shiftSize/sampFreq*nTime, nTime);
45 | end
46 | 
47 | % Draw spectrogram surface
48 | logSpecgram = 10*log10(specgram);
49 | minVal = min(min(min(logSpecgram)));
50 | maxVal = max(max(max(logSpecgram)));
51 | for iCh = 1:nCh
52 |     figHdl(iCh) = figure;
53 |     imagesc(timeAx, freqAx, 10*log10(specgram(:,:,iCh)));
54 |     axis tight; box on;
55 |     caxis([(minVal - maxVal)/6, maxVal]); % moderately define color map range
56 |     set(gca, 'YDir', 'normal'); % inverte virtical axis
57 |     set(gca, 'FontName', 'Times', 'FontSize', 16);
58 |     if nCh ~= 1
59 |         title( sprintf('%dch spectrogram',iCh), 'FontName', 'Arial', 'FontSize', 16 );
60 |     end
61 |     if (nargin < 2)
62 |         xlabel('Time frame', 'FontName', 'Arial', 'FontSize', 16);
63 |         ylabel('Frequency bin', 'FontName', 'Arial', 'FontSize', 16);
64 |     else
65 |         xlabel('Time [s]', 'FontName', 'Arial', 'FontSize', 16);
66 |         ylabel('Frequency [Hz]', 'FontName', 'Arial', 'FontSize', 16);
67 |     end
68 | end
69 | end
70 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


--------------------------------------------------------------------------------
/showSpect3d.m:
--------------------------------------------------------------------------------
 1 | function [figHdl,freqAx,timeAx] = showSpect3d(specgram,sampFreq,shiftSize)
 2 | %
 3 | % Show spectrogram from time-frequency matrix 
 4 | % This function supports both complex and nonnegative input and both
 5 | % monaural and multichannel spectrograms.
 6 | % For a multichannel spectrogram, the order of its indexes must be
 7 | % [nfreqs x nframes x nch].
 8 | % Note that color map range is moderately defined. Tune by yourself.
 9 | %
10 | % Coded by D. Kitamura (d-kitamura@ieee.org)
11 | %
12 | % See also:
13 | % http://d-kitamura.net
14 | %
15 | % [syntax]
16 | %   [figHdl,freqAx,timeAx] = showSpect3d(specgram)
17 | %   [figHdl,freqAx,timeAx] = showSpect3d(specgram,sampFreq,shiftSize)
18 | %
19 | % [inputs]
20 | %   specgram: STFT matrix ([nFreqs x nTime] for a monaural spectrogram, 
21 | %             and [nFreqs x nTime x channels] for a multichannel 
22 | %             spectrogram, where number of frequency bins is sampFreq/2+1, 
23 | %             and both complex-valued and nonnegative spectrograms are supported.)
24 | %   sampFreq: sampling frequency [Hz]
25 | %  shiftSize: length of window shift
26 | %
27 | % [outputs]
28 | %     figHdl: figure handle
29 | %     freqAx: frequency axis (1 x nbin)
30 | %     timeAx: time axis (1 x nframe)
31 | 
32 | % Check errors and set default values
33 | [nFreq, nTime, nCh] = size(specgram);
34 | if ~isreal(specgram) % for complex spectrogram
35 |     specgram = real(abs(specgram).^2); % calculate power spectrogram
36 | end
37 | if (nargin < 2)
38 |     freqAx = 1:nFreq;
39 |     timeAx = 1:nTime;
40 | elseif (nargin < 3)
41 |     error('Too few input arguments.\nIf you input sampFreq, shiftSise is also required.\n');
42 | else
43 |     freqAx = linspace(0, sampFreq/2, nFreq);
44 |     timeAx = linspace(0, shiftSize/sampFreq*nTime, nTime);
45 | end
46 | 
47 | % Draw spectrogram surface
48 | logSpecgram = 10*log10(specgram);
49 | minVal = min(min(min(logSpecgram)));
50 | maxVal = max(max(max(logSpecgram)));
51 | for iCh = 1:nCh
52 |     figHdl(iCh) = figure;
53 |     surf(timeAx, freqAx, 10*log10(specgram(:,:,iCh)), 'edgecolor', 'none');
54 |     axis tight; box on;
55 |     caxis([(minVal - maxVal)/6, maxVal]); % moderately define color map range
56 |     view(0, 90);
57 |     set(gca, 'FontName', 'Times', 'FontSize', 16);
58 |     if nCh ~= 1
59 |         title( sprintf('%dch spectrogram',iCh), 'FontName', 'Arial', 'FontSize', 16 );
60 |     end
61 |     if (nargin < 2)
62 |         xlabel('Time frame', 'FontName', 'Arial', 'FontSize', 16);
63 |         ylabel('Frequency bin', 'FontName', 'Arial', 'FontSize', 16);
64 |     else
65 |         xlabel('Time [s]', 'FontName', 'Arial', 'FontSize', 16);
66 |         ylabel('Frequency [Hz]', 'FontName', 'Arial', 'FontSize', 16);
67 |     end
68 | end
69 | end
70 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% EOF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


--------------------------------------------------------------------------------