├── avenergy.m
├── beat.m
├── beatavg.m
├── bts2time.m
├── caldiag.m
├── chorusdetection.asv
├── chorusdetection.m
├── chromagram_E.m
├── chromagram_IF.m
├── chromagram_P.m
├── chrombeatftrs.asv
├── chrombeatftrs.m
├── chromenhance.m
├── delete0.asv
├── delete0.m
├── distsc.m
├── fft2melmx.m
├── fftOneSide.m
├── hz2octs.m
├── ifgram.m
├── ifptrack.m
├── isenhan.m
├── lmin.m
├── localmax.m
├── locseg.asv
├── locseg.m
├── main.asv
├── main.m
├── matcentre.m
├── mfccbeatftrs.asv
├── mfccbeatftrs.m
├── octs2hz.m
├── readme.txt
├── sdm.m
├── tempo.m
└── tokenize.m


/avenergy.m:
--------------------------------------------------------------------------------
 1 | function [score] = avenergy(mono2, aven, fs, bts, seggroup, ind)
 2 | %AVENERGY Calcualte the average energy as a score
 3 | %   mono - music signal
 4 | %   fs - sampling rate
 5 | %   bts - beats
 6 | %   seggroup - the group contains the interesting segments
 7 | %   ind - index in seggroup
 8 | 
 9 | en = mean(mono2(round(fs*bts(seggroup(ind,1))):round(fs*bts(seggroup(ind,3)))));
10 | score = en/aven;
11 | 
12 | end
13 | 
14 | 


--------------------------------------------------------------------------------
/beat.m:
--------------------------------------------------------------------------------
  1 | function [b,onsetenv,D,cumscore] = beat(d,sr,startbpm,tightness,doplot)
  2 | % [b,onsetenv,D,cumscore] = beat(d,sr,startbpm,tightness,doplot)
  3 | %   b returns the times (in sec) of the beats in the waveform d, samplerate sr.
  4 | %   startbpm specifies the target tempo.  If it is a two-element
  5 | %   vector, it is taken as the mode of a tempo search window, with 
  6 | %   the second envelope being the spread (in octaves) of the
  7 | %   search, and the best tempo is calculated (with tempo.m).
  8 | %   tightness controls how tightly the start tempo is enforced
  9 | %   within the beat (default 6, larger = more rigid); if it is a 
 10 | %   two-element vector the second parameter is alpha, the strength 
 11 | %   of transition costs relative to local match (0..1, default 0.7).
 12 | %   doplot enables diagnostic plots; if it has two elements, they
 13 | %   are the time range (in sec) for the diagnostic plots.
 14 | %   onsetenv returns the raw onset detection envelope
 15 | %   D returns the mel-spectrogram, 
 16 | %   cumscore returns the per-frame cumulated dynamic-programming score.
 17 | % 2006-08-25 dpwe@ee.columbia.edu
 18 | % uses: localmax
 19 | 
 20 | %   Copyright (c) 2006 Columbia University.
 21 | % 
 22 | %   This file is part of LabROSA-coversongID
 23 | % 
 24 | %   LabROSA-coversongID is free software; you can redistribute it and/or modify
 25 | %   it under the terms of the GNU General Public License version 2 as
 26 | %   published by the Free Software Foundation.
 27 | % 
 28 | %   LabROSA-coversongID is distributed in the hope that it will be useful,
 29 | %   but
 30 | %   WITHOUT ANY WARRANTY; without even the implied warranty of
 31 | %   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 32 | %   General Public License for more details.
 33 | % 
 34 | %   You should have received a copy of the GNU General Public License
 35 | %   along with LabROSA-coversongID; if not, write to the Free Software
 36 | %   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 37 | %   02110-1301 USA
 38 | % 
 39 | %   See the file "COPYING" for the text of the license.
 40 | 
 41 | if nargin < 3;   startbpm = 0; end
 42 | if nargin < 4;   tightness = 0; end
 43 | if nargin < 5;   doplot = 0; end
 44 | 
 45 | if length(startbpm) == 2
 46 |   temposd = startbpm(2);
 47 |   startbpm = startbpm(1);
 48 | else
 49 |   temposd = 0; 
 50 | end
 51 | if length(tightness) == 2
 52 |   alpha = tightness(2);
 53 |   tightness = tightness(1);
 54 | else
 55 |   alpha = 0.8
 56 | end
 57 | if tightness == 0;  tightness = 6; end
 58 | 
 59 | % Have we been given an envelope (nonnegative waveform)
 60 | if min(d) >= 0
 61 |   onsetenv = d;
 62 |   sgsrate = sr;
 63 |   disp(['beat: treating input as onset strength envelope']);
 64 | else
 65 |   onsetenv = [];
 66 | end
 67 | 
 68 | % debug/plotting options
 69 | plotlims = [];            
 70 | if length(doplot) > 1
 71 |   % specify zoom-in limits too
 72 |   plotlims = doplot;
 73 |   doplot = 1;
 74 | end
 75 | if doplot > 0;  debug = 1; else debug = 0; end
 76 | 
 77 | b = [];
 78 | 
 79 | % Select tempo search either with startbpm = 0 (means use defaults)
 80 | % or startbpm > 0 but temposd > 0 too (means search around startbpm)
 81 | % If onsetenv is empty, have to run tempo too to convert waveform
 82 | % to onsetenv, but we might not use the tempo it picks.
 83 | if startbpm == 0 || temposd > 0 || isempty(onsetenv)
 84 | 
 85 |   if startbpm == 0
 86 |     tempomean = 120;
 87 |   else
 88 |     tempomean = startbpm;
 89 |   end
 90 | 
 91 |   if temposd == 0
 92 |     temposd = 0.7;
 93 |   end
 94 |   
 95 |   % Subfunction estimates global BPM; returns 'onset strength'
 96 |   % waveform onsetenv
 97 |   % If we were given an onsetenv as input, will use that
 98 |   [t,xcr,D,onsetenv,sgsrate] = tempo(d,sr,tempomean,temposd,onsetenv,debug);
 99 |   
100 |   % tempo.m returns the top-2 BPM estimates; use faster one for
101 |   % beat tracking
102 |   if (startbpm == 0 | temposd > 0)
103 |     startbpm = max(t([1 2]));
104 |   end
105 | 
106 |   if debug == 1
107 |     % plot the mel-specgram
108 |     tt = [1:length(onsetenv)]/sgsrate;
109 |     subplot(411)
110 |     imagesc(tt,[1 40],D); axis xy
111 |     subplot(412)
112 |     plot(tt,onsetenv);
113 |   end
114 | 
115 | end
116 | 
117 | % convert startbpm to startpd
118 | startpd = (60*sgsrate)/startbpm;
119 | %disp(['startpd=',num2str(startpd)]);
120 | 
121 | pd = startpd;
122 |   
123 | % Smooth beat events
124 | templt = exp(-0.5*(([-pd:pd]/(pd/32)).^2));
125 | localscore = conv(templt,onsetenv);
126 | localscore = localscore(round(length(templt)/2)+[1:length(onsetenv)]);
127 | 
128 | % DP version:
129 | % backlink(time) is index of best preceding time for this point
130 | % cumscore(time) is total cumulated score to this point
131 | 
132 | backlink = zeros(1,length(localscore));
133 | cumscore = zeros(1,length(localscore));
134 | 
135 | % search range for previous beat
136 | prange = round(-2*pd):-round(pd/2);
137 | 
138 | % Skewed window
139 | txwt = exp(-0.5*((tightness*log(prange/-pd)).^2));
140 | 
141 | starting = 1;
142 | for i = 1:length(localscore)
143 |   
144 |   timerange = i + prange;
145 |   
146 |   % Are we reaching back before time zero?
147 |   zpad = max(0, min(1-timerange(1),length(prange)));
148 | 
149 |   % Search over all possible predecessors and apply transition 
150 |   % weighting
151 |   scorecands = txwt .* [zeros(1,zpad),cumscore(timerange(zpad+1:end))];
152 |   % Find best predecessor beat
153 |   [vv,xx] = max(scorecands);
154 |   % Add on local score
155 |   cumscore(i) = alpha*vv + (1-alpha)*localscore(i);
156 | 
157 |   % special case to catch first onset
158 | %  if starting == 1 & localscore(i) > 100*abs(vv)
159 |   if starting == 1 & localscore(i) < 0.01*max(localscore);
160 |     backlink(i) = -1;
161 |   else
162 |     backlink(i) = timerange(xx);
163 |     % prevent it from resetting, even through a stretch of silence
164 |     starting = 0;
165 |   end
166 |   
167 | end
168 | 
169 | %%%% Backtrace
170 | 
171 | % Cumulated score is stabilized to lie in constant range, 
172 | % so just look for one near the end that has a reasonable score
173 | medscore = median(cumscore(localmax(cumscore)));
174 | bestendx = max(find(cumscore .* localmax(cumscore) > 0.5*medscore));
175 | 
176 | b = bestendx;
177 | 
178 | while backlink(b(end)) > 0
179 |   b = [b,backlink(b(end))];
180 | end
181 | 
182 | b = fliplr(b);
183 | 
184 | % return beat times in secs
185 | b = b / sgsrate;
186 | 
187 | % Debug visualization
188 | if doplot == 1
189 |   subplot(411)
190 |   hold on;
191 |   plot([b;b],[0;40]*ones(1,length(b)),'w');
192 |   hold off;
193 |   subplot(412)
194 |   hold on;
195 |   plot([b;b],[-5;20]*ones(1,length(b)),'g');
196 |   hold off;
197 | 
198 |   % redo 3rd pane as xcorr with templt
199 |   subplot(413)
200 |   tt = [1:length(localscore)]/sgsrate;
201 |   plot(tt,localscore);
202 |   hold on; plot([b;b],[min(localscore);max(localscore)]*ones(1,length(b)),'g'); hold off
203 |    
204 |   if length(plotlims) > 0
205 |     for i = 1:3;
206 |       subplot(4,1,i)
207 |       ax = axis;
208 |       ax([1 2]) = plotlims;
209 |       axis(ax);
210 |     end
211 |   end
212 |   
213 | end
214 | 


--------------------------------------------------------------------------------
/beatavg.m:
--------------------------------------------------------------------------------
 1 | function X = beatavg(Y,bts)
 2 | % X = beatavg(Y,bys)
 3 | %    Calculate average of columns of Y according to grid defined 
 4 | %    (real-valued) column indices in vector bts.
 5 | %    For folding spectrograms down into beat-sync features.
 6 | % 2006-09-26 dpwe@ee.columbia.edu
 7 | 
 8 | % beat-based segments
 9 | %bts = beattrack(d,sr);
10 | nbts = length(bts);
11 | bttime = mean(diff(bts));
12 | % map beats to specgram slices
13 | ncols = size(Y,2);
14 | coltimes = [0:(ncols-1)];
15 | cols2beats = zeros(nbts, ncols);
16 | btse = [bts,max(coltimes)];
17 | for b = 1:nbts
18 |   cols2beats(b,:) = ((coltimes >= btse(b)) & (coltimes < btse(b+1)))*1/(btse(b+1)-btse(b));
19 | end
20 | 
21 | % The actual desired output
22 | X = Y * cols2beats';
23 | 


--------------------------------------------------------------------------------
/bts2time.m:
--------------------------------------------------------------------------------
1 | function [timegroup] = bts2time(seggroup, bts)
2 | %BTS2TIME convert bts to time
3 | 
4 | [m, n] = size(seggroup);
5 | timegroup = bts(seggroup);
6 | 
7 | end
8 | 
9 | 


--------------------------------------------------------------------------------
/caldiag.m:
--------------------------------------------------------------------------------
  1 | function [ bimar, index ] = caldiag(sdmar, num, debug, deplot)
  2 | %CALDIAG calculate the possible diagonal, return as the binarized matrix
  3 | %   sdmar - feature self-distance matrix
  4 | %   num - number of minima
  5 | %   debug - 1 for plot the diagonals, 2 for low pass
  6 | 
  7 | if nargin < 4
  8 |     deplot = 0;
  9 | end
 10 | 
 11 | if nargin <3
 12 |     debug = 0;
 13 | end
 14 | 
 15 | len = length(sdmar);
 16 | dig = zeros(len-1,1);
 17 | for i = 1:len-1
 18 |     dig(i) = sum(diag(sdmar, -i))/(len-i);
 19 | end
 20 | 
 21 | if debug ~= 0
 22 |     %low pass the dig to "detrend"
 23 |     dig_lp = filter(ones(50,1)/50, 1, dig);
 24 |     dig = dig-dig_lp;
 25 | end
 26 | 
 27 | [minima, index] = lmin(dig, 2);
 28 | 
 29 | if length(minima) > num
 30 |     while(1)
 31 |         add = find(minima == max(minima), length(minima)-num, 'first');
 32 |         minima(:, add) = [];
 33 |         index(:, add) = [];
 34 |         if(length(minima) == num)
 35 |             break;
 36 |         end
 37 |     end
 38 | end
 39 | 
 40 | if deplot ~= 0
 41 |     figure;
 42 |     plot(dig); grid; hold on;
 43 |     plot(index, dig(index), 'r+');
 44 | end
 45 | 
 46 | all_len = length(diag(sdmar,-index(1)));
 47 | longvec = diag(sdmar,-index(1))';
 48 | for i = 2:length(index)
 49 |     all_len = all_len+length(diag(sdmar,-index(i)));
 50 |     longvec = [longvec, diag(sdmar,-index(i))'];
 51 | end
 52 | 
 53 | longvec = sort(longvec);
 54 | threshold = longvec(round(0.2*all_len));
 55 | bimar = -ones(len,len);
 56 | 
 57 | for i = 1:length(index)
 58 |     temp = diag(sdmar,-index(i));
 59 |     for j = 1:length(diag(sdmar,-index(i)))
 60 |         if temp(j) > threshold
 61 |             bimar(index(i)+j,j) = 1;
 62 |         else
 63 |             bimar(index(i)+j,j) = 0;
 64 |         end
 65 |     end
 66 | end
 67 | 
 68 | if deplot ~= 0
 69 |     figure; imshow(mat2gray(bimar));title('binarized matrix');
 70 | end
 71 | 
 72 | %enhance the binarized matrix
 73 | for i = 1:length(index)
 74 |     temp = diag(bimar,-index(i));
 75 |     j = 1;
 76 |     while length(temp) >= 25 || j <= length(temp)
 77 |         if temp(j) == 0
 78 |             j = j + 1;
 79 |             if j+25-1 > length(temp)
 80 |                 break;
 81 |             end
 82 |             continue;
 83 |         end
 84 |         if j+25-1 > length(temp)
 85 |             break;
 86 |         end
 87 |         kernel = temp(j:j+25-1);
 88 |         if isenhan(kernel)
 89 |             for k = 0:24
 90 |                 bimar(index(i)+j+k, j+k) = 1;
 91 |             end
 92 |             j = j+25-1;
 93 |         end
 94 |         j = j + 1;
 95 |         if j+25-1 > length(temp)
 96 |             break;
 97 |         end        
 98 |     end
 99 | end
100 | 
101 | if deplot ~= 0
102 |     figure; imshow(mat2gray(bimar));title('binarized matrix - after enhancement');
103 | end
104 | 
105 | end
106 | 
107 | 
108 | 


--------------------------------------------------------------------------------
/chorusdetection.asv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/chorusdetection.asv


--------------------------------------------------------------------------------
/chorusdetection.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/chorusdetection.m


--------------------------------------------------------------------------------
/chromagram_E.m:
--------------------------------------------------------------------------------
 1 | function Y = chromagram_E(d,sr,fftlen,nbin,f_ctr,f_sd)
 2 | % Y = chromagram_E(d,sr,fftlen,nbin)
 3 | %  Calculate a "chromagram" of the sound in d (at sampling rate sr)
 4 | %  Use windows of fftlen points, hopped by ffthop points
 5 | %  Divide the octave into nbin steps
 6 | %  Weight with center frequency f_ctr (in Hz) and gaussian SD f_sd (in octaves)
 7 | % 2006-09-26 dpwe@ee.columbia.edu
 8 | 
 9 | if nargin < 3;   fftlen = 2048; end
10 | if nargin < 4;   nbin = 12; end
11 | if nargin < 5;   f_ctr = 1000; end
12 | if nargin < 6;   f_sd = 1; end
13 | 
14 | fftwin = fftlen/2;
15 | ffthop = fftlen/4;  % always for this
16 | 
17 | D = abs(specgram(d,fftlen,sr,fftwin,(fftwin-ffthop)));
18 | 
19 | A0 = 27.5; % Hz
20 | A440 = 440; % Hz
21 | 
22 | f_ctr_log = log(f_ctr/A0) / log(2);
23 | 
24 | CM = fft2chromamx(fftlen, nbin, sr, A440, f_ctr_log, f_sd);
25 | % Chop extra dims
26 | CM = CM(:,1:(fftlen/2)+1);
27 | 
28 | Y = CM*D;
29 | 


--------------------------------------------------------------------------------
/chromagram_IF.m:
--------------------------------------------------------------------------------
 1 | function Y = chromagram_IF(d,sr,fftlen,nbin,f_ctr,f_sd)
 2 | % Y = chromagram_IF(d,sr,fftlen,nbin,f_ctr,f_sd)
 3 | %  Calculate a "chromagram" of the sound in d (at sampling rate sr)
 4 | %  Use windows of fftlen points, hopped by ffthop points
 5 | %  Divide the octave into nbin steps
 6 | %  Weight with center frequency f_ctr (in Hz) and gaussian SD f_sd
 7 | %  (in octaves)
 8 | %  Use instantaneous frequency to keep only real harmonics.
 9 | % 2006-09-26 dpwe@ee.columbia.edu
10 | 
11 | %   Copyright (c) 2006 Columbia University.
12 | % 
13 | %   This file is part of LabROSA-coversongID
14 | % 
15 | %   LabROSA-coversongID is free software; you can redistribute it and/or modify
16 | %   it under the terms of the GNU General Public License version 2 as
17 | %   published by the Free Software Foundation.
18 | % 
19 | %   LabROSA-coversongID is distributed in the hope that it will be useful, but
20 | %   WITHOUT ANY WARRANTY; without even the implied warranty of
21 | %   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22 | %   General Public License for more details.
23 | % 
24 | %   You should have received a copy of the GNU General Public License
25 | %   along with LabROSA-coversongID; if not, write to the Free Software
26 | %   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | %   02110-1301 USA
28 | % 
29 | %   See the file "COPYING" for the text of the license.
30 | 
31 | if nargin < 3;   fftlen = 2048; end
32 | if nargin < 4;   nbin = 12; end
33 | if nargin < 5;   f_ctr = 1000; end
34 | if nargin < 6;   f_sd = 1; end
35 | 
36 | A0 = 27.5; % Hz
37 | A440 = 440; % Hz
38 | f_ctr_log = log(f_ctr/A0) / log(2);
39 | 
40 | fminl = octs2hz(hz2octs(f_ctr)-2*f_sd);
41 | fminu = octs2hz(hz2octs(f_ctr)-f_sd);
42 | fmaxl = octs2hz(hz2octs(f_ctr)+f_sd);
43 | fmaxu = octs2hz(hz2octs(f_ctr)+2*f_sd);
44 | 
45 | ffthop = fftlen/4;
46 | nchr = 12;
47 | 
48 | % Calculate spectrogram and IF gram pitch tracks...
49 | [p,m]=ifptrack(d,fftlen,sr,fminl,fminu,fmaxl,fmaxu); 
50 | 
51 | [nbins,ncols] = size(p);
52 | 
53 | %disp(['ncols = ',num2str(ncols)]);
54 | 
55 | % chroma-quantized IF sinusoids
56 | Pocts = hz2octs(p+(p==0));
57 | Pocts(p(:)==0) = 0;
58 | % Figure best tuning alignment
59 | nzp = find(p(:)>0);
60 | %hist(nchr*Pmapo(nzp)-round(nchr*Pmapo(nzp)),100)
61 | [hn,hx] = hist(nchr*Pocts(nzp)-round(nchr*Pocts(nzp)),100);
62 | centsoff = hx(find(hn == max(hn)));
63 | % Adjust tunings to align better with chroma
64 | Pocts(nzp) = Pocts(nzp) - centsoff(1)/nchr;
65 | 
66 | % Quantize to chroma bins
67 | PoctsQ = Pocts;
68 | PoctsQ(nzp) = round(nchr*Pocts(nzp))/nchr;
69 | 
70 | % map IF pitches to chroma bins
71 | Pmapc = round(nchr*(PoctsQ - floor(PoctsQ)));
72 | Pmapc(p(:) == 0) = -1; 
73 | Pmapc(Pmapc(:) == nchr) = 0;
74 | 
75 | Y = zeros(nchr,ncols);
76 | for t = 1:ncols;
77 |   Y(:,t)=(repmat([0:(nchr-1)]',1,size(Pmapc,1))==repmat(Pmapc(:,t)',nchr,1))*m(:,t);
78 | end
79 | 


--------------------------------------------------------------------------------
/chromagram_P.m:
--------------------------------------------------------------------------------
 1 | function Y = chromagram_P(d,sr,fftlen,nbin,f_ctr,f_sd)
 2 | % Y = chromagram_E(d,sr,fftlen,nbin)
 3 | %  Calculate a "chromagram" of the sound in d (at sampling rate sr)
 4 | %  Use windows of fftlen points, hopped by ffthop points
 5 | %  Divide the octave into nbin steps
 6 | %  Weight with center frequency f_ctr (in Hz) and gaussian SD f_sd (in octaves)
 7 | % 2006-09-26 dpwe@ee.columbia.edu
 8 | 
 9 | if nargin < 3;   fftlen = 2048; end
10 | if nargin < 4;   nbin = 12; end
11 | if nargin < 5;   f_ctr = 1000; end
12 | if nargin < 6;   f_sd = 1; end
13 | 
14 | fftwin = fftlen/2;
15 | ffthop = fftlen/4;  % always for this
16 | 
17 | D = abs(specgram(d,fftlen,sr,fftwin,(fftwin-ffthop)));
18 | 
19 | [nr,nc] = size(D);
20 | 
21 | A0 = 27.5; % Hz
22 | A440 = 440; % Hz
23 | 
24 | f_ctr_log = log(f_ctr/A0) / log(2);
25 | 
26 | CM = fft2chromamx(fftlen, nbin, sr, A440, f_ctr_log, f_sd);
27 | % Chop extra dims
28 | CM = CM(:,1:(fftlen/2)+1);
29 | 
30 | % Keep only local maxes in freq
31 | Dm = (D > D([1,[1:nr-1]],:)) & (D >= D([[2:nr],nr],:));
32 | Y = CM*(D.*Dm);
33 | 


--------------------------------------------------------------------------------
/chrombeatftrs.asv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/chrombeatftrs.asv


--------------------------------------------------------------------------------
/chrombeatftrs.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/chrombeatftrs.m


--------------------------------------------------------------------------------
/chromenhance.m:
--------------------------------------------------------------------------------
 1 | function [ chromhance_mar ] = chromenhance(chroma_mar, debug)
 2 | %CHROMENHANCE enhance the chroma feature
 3 | 
 4 | if nargin < 2
 5 |     debug = 0;
 6 | end
 7 | 
 8 | [m,n] = size(chroma_mar);
 9 | chromhance_mar = zeros(m,n);
10 | for i = 1:length(chroma_mar)
11 |     for j = 1:i
12 |         dirmean = matcentre(chroma_mar, i, j);
13 |         if min(dirmean) == dirmean(1) || min(dirmean) == dirmean(2)
14 |             chromhance_mar(i,j) = chroma_mar(i,j)+min(dirmean);
15 |         else
16 |             chromhance_mar(i,j) = chroma_mar(i,j)+max(dirmean);
17 |         end
18 |     end
19 | end
20 |         
21 | if debug ~= 0; 
22 |     figure; imshow(mat2gray(chromhance_mar)); title('chroma SDM - after enhancement');
23 | end
24 | 
25 | end
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/delete0.asv:
--------------------------------------------------------------------------------
 1 | function [ftr] = delete0(ftr)
 2 | %DELETE0 delete the 0 vector at the end of matrix ftr
 3 | 
 4 | [m, n] = size(ftr);           
 5 | i = n;
 6 | while norm(ftr(:, i)) == 0
 7 |     i = i-1;
 8 | end
 9 | 
10 | for j = 0:n-i-1
11 |     ftr(:, n-j) = [];
12 | end
13 | 
14 | end
15 | 
16 | 


--------------------------------------------------------------------------------
/delete0.m:
--------------------------------------------------------------------------------
 1 | function [ftr] = delete0(ftr)
 2 | %DELETE0 delete the 0 vector at the end of matrix ftr
 3 | 
 4 | [m, n] = size(ftr);           
 5 | i = n;
 6 | while norm(ftr(:, i)) == 0
 7 |     i = i-1;
 8 | end
 9 | 
10 | for j = 0:n-i-1
11 |     ftr(:, n-j) = [];
12 | end
13 | 
14 | end
15 | 
16 | 


--------------------------------------------------------------------------------
/distsc.m:
--------------------------------------------------------------------------------
1 | function score = distsc(avedis, sdm, seggroup, ind)
2 | %DISTSC Average distance score
3 | 
4 | med = median(diag(sdm(seggroup(ind,1):seggroup(ind,3),seggroup(ind,2):seggroup(ind,4))));
5 | score = 1-med/avedis;
6 | 
7 | end
8 | 
9 | 


--------------------------------------------------------------------------------
/fft2melmx.m:
--------------------------------------------------------------------------------
  1 | function [wts,binfrqs] = fft2melmx(nfft, sr, nfilts, width, minfrq, maxfrq, htkmel, constamp)
  2 | % wts = fft2melmx(nfft, sr, nfilts, width, minfrq, maxfrq, htkmel, constamp)
  3 | %      Generate a matrix of weights to combine FFT bins into Mel
  4 | %      bins.  nfft defines the source FFT size at sampling rate sr.
  5 | %      Optional nfilts specifies the number of output bands required 
  6 | %      (else one per bark), and width is the constant width of each 
  7 | %      band relative to standard Mel (default 1).
  8 | %      While wts has nfft columns, the second half are all zero. 
  9 | %      Hence, Mel spectrum is fft2melmx(nfft,sr)*abs(fft(xincols,nfft));
 10 | %      minfrq is the frequency (in Hz) of the lowest band edge;
 11 | %      default is 0, but 133.33 is a common standard (to skip LF).
 12 | %      maxfrq is frequency in Hz of upper edge; default sr/2.
 13 | %      You can exactly duplicate the mel matrix in Slaney's mfcc.m
 14 | %      as fft2melmx(512, 8000, 40, 1, 133.33, 6855.5, 0);
 15 | %      htkmel=1 means use HTK's version of the mel curve, not Slaney's.
 16 | %      constamp=1 means make integration windows peak at 1, not sum to 1.
 17 | % 2004-09-05  dpwe@ee.columbia.edu  based on fft2barkmx
 18 | 
 19 | if nargin < 2;     sr = 8000;      end
 20 | if nargin < 3;     nfilts = 40;    end
 21 | if nargin < 4;     width = 1.0;    end
 22 | if nargin < 5;     minfrq = 0;     end  % default bottom edge at 0
 23 | if nargin < 6;     maxfrq = sr/2;  end  % default top edge at nyquist
 24 | if nargin < 7;     htkmel = 0;     end
 25 | if nargin < 8;     constamp = 0;   end
 26 | 
 27 | 
 28 | wts = zeros(nfilts, nfft);
 29 | 
 30 | % Center freqs of each FFT bin
 31 | fftfrqs = [0:(nfft/2)]/nfft*sr;
 32 | 
 33 | % 'Center freqs' of mel bands - uniformly spaced between limits
 34 | minmel = hz2mel(minfrq, htkmel);
 35 | maxmel = hz2mel(maxfrq, htkmel);
 36 | binfrqs = mel2hz(minmel+[0:(nfilts+1)]/(nfilts+1)*(maxmel-minmel), htkmel);
 37 | 
 38 | binbin = round(binfrqs/sr*(nfft-1));
 39 | 
 40 | for i = 1:nfilts
 41 | %  fs = mel2hz(i + [-1 0 1], htkmel);
 42 |   fs = binfrqs(i+[0 1 2]);
 43 |   % scale by width
 44 |   fs = fs(2)+width*(fs - fs(2));
 45 |   % lower and upper slopes for all bins
 46 |   loslope = (fftfrqs - fs(1))/(fs(2) - fs(1));
 47 |   hislope = (fs(3) - fftfrqs)/(fs(3) - fs(2));
 48 |   % .. then intersect them with each other and zero
 49 | %  wts(i,:) = 2/(fs(3)-fs(1))*max(0,min(loslope, hislope));
 50 |   wts(i,1+[0:(nfft/2)]) = max(0,min(loslope, hislope));
 51 | 
 52 |   % actual algo and weighting in feacalc (more or less)
 53 | %  wts(i,:) = 0;
 54 | %  ww = binbin(i+2)-binbin(i);
 55 | %  usl = binbin(i+1)-binbin(i);
 56 | %  wts(i,1+binbin(i)+[1:usl]) = 2/ww * [1:usl]/usl;
 57 | %  dsl = binbin(i+2)-binbin(i+1);
 58 | %  wts(i,1+binbin(i+1)+[1:(dsl-1)]) = 2/ww * [(dsl-1):-1:1]/dsl;
 59 | % need to disable weighting below if you use this one
 60 | 
 61 | end
 62 | 
 63 | if (constamp == 0)
 64 |   % Slaney-style mel is scaled to be approx constant E per channel
 65 |   wts = diag(2./(binfrqs(2+[1:nfilts])-binfrqs(1:nfilts)))*wts;
 66 | end
 67 | 
 68 | % Make sure 2nd half of FFT is zero
 69 | wts(:,(nfft/2+1):nfft) = 0;
 70 | % seems like a good idea to avoid aliasing
 71 | 
 72 | 
 73 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 74 | function f = mel2hz(z, htk)
 75 | %   f = mel2hz(z, htk)
 76 | %   Convert 'mel scale' frequencies into Hz
 77 | %   Optional htk = 1 means use the HTK formula
 78 | %   else use the formula from Slaney's mfcc.m
 79 | % 2005-04-19 dpwe@ee.columbia.edu
 80 | 
 81 | if nargin < 2
 82 |   htk = 0;
 83 | end
 84 | 
 85 | if htk == 1
 86 |   f = 700*(10.^(z/2595)-1);
 87 | else
 88 |   
 89 |   f_0 = 0; % 133.33333;
 90 |   f_sp = 200/3; % 66.66667;
 91 |   brkfrq = 1000;
 92 |   brkpt  = (brkfrq - f_0)/f_sp;  % starting mel value for log region
 93 |   logstep = exp(log(6.4)/27); % the magic 1.0711703 which is the ratio needed to get from 1000 Hz to 6400 Hz in 27 steps, and is *almost* the ratio between 1000 Hz and the preceding linear filter center at 933.33333 Hz (actually 1000/933.33333 = 1.07142857142857 and  exp(log(6.4)/27) = 1.07117028749447)
 94 | 
 95 |   linpts = (z < brkpt);
 96 | 
 97 |   f = 0*z;
 98 | 
 99 |   % fill in parts separately
100 |   f(linpts) = f_0 + f_sp*z(linpts);
101 |   f(~linpts) = brkfrq*exp(log(logstep)*(z(~linpts)-brkpt));
102 | 
103 | end
104 | 
105 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
106 | function z = hz2mel(f,htk)
107 | %  z = hz2mel(f,htk)
108 | %  Convert frequencies f (in Hz) to mel 'scale'.
109 | %  Optional htk = 1 uses the mel axis defined in the HTKBook
110 | %  otherwise use Slaney's formula
111 | % 2005-04-19 dpwe@ee.columbia.edu
112 | 
113 | if nargin < 2
114 |   htk = 0;
115 | end
116 | 
117 | if htk == 1
118 |   z = 2595 * log10(1+f/700);
119 | else
120 |   % Mel fn to match Slaney's Auditory Toolbox mfcc.m
121 | 
122 |   f_0 = 0; % 133.33333;
123 |   f_sp = 200/3; % 66.66667;
124 |   brkfrq = 1000;
125 |   brkpt  = (brkfrq - f_0)/f_sp;  % starting mel value for log region
126 |   logstep = exp(log(6.4)/27); % the magic 1.0711703 which is the ratio needed to get from 1000 Hz to 6400 Hz in 27 steps, and is *almost* the ratio between 1000 Hz and the preceding linear filter center at 933.33333 Hz (actually 1000/933.33333 = 1.07142857142857 and  exp(log(6.4)/27) = 1.07117028749447)
127 | 
128 |   linpts = (f < brkfrq);
129 | 
130 |   z = 0*f;
131 | 
132 |   % fill in parts separately
133 |   z(linpts) = (f(linpts) - f_0)/f_sp;
134 |   z(~linpts) = brkpt+(log(f(~linpts)/brkfrq))./log(logstep);
135 | 
136 | end
137 | 


--------------------------------------------------------------------------------
/fftOneSide.m:
--------------------------------------------------------------------------------
 1 | function [magSpec, phaseSpec, freq, powerSpecInDb]=fftOneSide(signal, fs, plotOpt)
 2 | % fftOneSide: One-sided FFT for real signals
 3 | %	Usage: [magSpec, phaseSpec, freq, powerSpecInDb]=fftOneSide(signal, fs, plotOpt)
 4 | %
 5 | %	For example:
 6 | %		[y, fs]=wavread('welcome.wav');
 7 | %		frameSize=512;
 8 | %		startIndex=2047;
 9 | %		signal=y(startIndex:startIndex+frameSize+1);
10 | %		signal=signal.*hamming(length(signal));
11 | %		plotOpt=1;
12 | %		[magSpec, phaseSpec, freq, powerSpecInDb]=fftOneSide(signal, fs, plotOpt);
13 | 
14 | %	Roger Jang, 20060411, 20070506
15 | 
16 | if nargin<1, selfdemo; return; end
17 | if nargin<2, fs=1; end
18 | if nargin<3, plotOpt=0; end
19 | 
20 | N = length(signal);			% Signal length
21 | freqStep = fs/N;			% Frequency resolution
22 | time = (0:N-1)/fs;			% Time vector
23 | z = fft(signal);			% Spectrum
24 | freq = freqStep*(0:N/2)';		% Frequency vector
25 | z = z(1:length(freq));			% One side
26 | z(2:end-1)=2*z(2:end-1);		% Assuming N is even, symmetric data is multiplied by 2
27 | magSpec=abs(z);				% Magnitude spectrum
28 | phaseSpec=unwrap(angle(z));		% Phase spectrum
29 | powerSpecInDb=20*log(magSpec+realmin);	% Power in db
30 | 
31 | if plotOpt
32 | 	% ====== Plot time-domain signals
33 | 	subplot(3,1,1);
34 | 	plot(time, signal, '.-');
35 | 	title(sprintf('Input signals (fs=%d)', fs));
36 | 	xlabel('Time (seconds)'); ylabel('Amplitude'); axis tight
37 | 	% ====== Plot spectral power
38 | 	subplot(3,1,2);
39 | 	plot(freq, powerSpecInDb, '.-'); grid on
40 | 	title('Power spectrum');
41 | 	xlabel('Frequency (Hz)'); ylabel('Power (db)'); axis tight
42 | 	% ====== Plot phase
43 | 	subplot(3,1,3);
44 | 	plot(freq, phaseSpec, '.-'); grid on
45 | 	title('Phase');
46 | 	xlabel('Frequency (Hz)'); ylabel('Phase (Radian)'); axis tight
47 | end
48 | 
49 | % ====== Self demo
50 | function selfdemo
51 | [y, fs]=wavread('welcome.wav');
52 | frameSize=512;
53 | startIndex=2047;
54 | signal=y(startIndex:startIndex+frameSize+1);
55 | signal=signal.*hamming(length(signal));
56 | %signal=[signal; zeros(frameSize, 1)];
57 | [magSpec, phaseSpec, freq, powerSpecInDb]=feval(mfilename, signal, fs, 1);


--------------------------------------------------------------------------------
/hz2octs.m:
--------------------------------------------------------------------------------
 1 | function octs = hz2octs(freq, A440)
 2 | % octs = hz2octs(freq, A440)
 3 | % Convert a frequency in Hz into a real number counting 
 4 | % the octaves above A0. So hz2octs(440) = 4.0
 5 | % Optional A440 specifies the Hz to be treated as middle A (default 440).
 6 | % 2006-06-29 dpwe@ee.columbia.edu for fft2chromamx
 7 | 
 8 | if nargin < 2;   A440 = 440; end
 9 | 
10 | % A4 = A440 = 440 Hz, so A0 = 440/16 Hz
11 | octs = log(freq./(A440/16))./log(2);
12 | 
13 | 


--------------------------------------------------------------------------------
/ifgram.m:
--------------------------------------------------------------------------------
  1 | function [F,D] = ifgram(X, N, W, H, SR)
  2 | % [F,D] = ifgram(X, N, W, H, SR)       Instantaneous frequency by phase deriv.
  3 | %    X is a 1-D signal.  Process with N-point FFTs applying a W-point 
  4 | %    window, stepping by H points; return (N/2)+1 channels with the 
  5 | %    instantaneous frequency (as a proportion of the sampling rate) 
  6 | %    obtained as the time-derivative of the phase of the complex spectrum
  7 | %    as described by Toshihiko Abe, Takao Kobayashi, and Satoshi Imai
  8 | %    "Robust Pitch Estimation with Harmonics Enhancement in Noisy 
  9 | %    Environments Based on Instantaneous Frequency" ICSLP 1996
 10 | %    http://www.kbys.ip.titech.ac.jp/research/pdf/icslp96-pitch.pdf
 11 | %    See also Abe's 2006 IEEE TASLP paper 14(4) 1292-1300.
 12 | % 
 13 | %    Same arguments and some common code as dpwebox/stft.m.
 14 | %    Calculates regular STFT as side effect - returned in D.
 15 | % after 1998may02 dpwe@icsi.berkeley.edu
 16 | % 2001-03-05 dpwe@ee.columbia.edu  revised version
 17 | % 2001-12-13 dpwe@ee.columbia.edu  Fixed to work when N != W
 18 | % $Header: $
 19 | 
 20 | %   Copyright (c) 2006 Columbia University.
 21 | % 
 22 | %   This file is part of LabROSA-coversongID
 23 | % 
 24 | %   LabROSA-coversongID is free software; you can redistribute it and/or modify
 25 | %   it under the terms of the GNU General Public License version 2 as
 26 | %   published by the Free Software Foundation.
 27 | % 
 28 | %   LabROSA-coversongID is distributed in the hope that it will be useful, but
 29 | %   WITHOUT ANY WARRANTY; without even the implied warranty of
 30 | %   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 31 | %   General Public License for more details.
 32 | % 
 33 | %   You should have received a copy of the GNU General Public License
 34 | %   along with LabROSA-coversongID; if not, write to the Free Software
 35 | %   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 36 | %   02110-1301 USA
 37 | % 
 38 | %   See the file "COPYING" for the text of the license.
 39 | 
 40 | if nargin < 2;  N = 256; end
 41 | if nargin < 3;  W = N;   end
 42 | if nargin < 4;  H = W/2; end
 43 | if nargin < 5;  SR = 1;  end
 44 | 
 45 | s = length(X);
 46 | % Make sure it's a single row
 47 | if size(X,1) > 1
 48 |   X = X';
 49 | end
 50 | 
 51 | %win = [0,hanning(W-1)'];
 52 | win = 0.5*(1-cos([0:(W-1)]/W*2*pi));
 53 | 
 54 | % Window for discrete differentiation
 55 | T = W/SR;
 56 | dwin = -pi / T * sin([0:(W-1)]/W*2*pi);
 57 | 
 58 | % sum(win) takes out integration due to window, 2 compensates for neg frq
 59 | norm = 2/sum(win);
 60 | 
 61 | % How many complete windows?
 62 | nhops = 1 + floor((s - W)/H);
 63 | 
 64 | F = zeros(1 + N/2, nhops);
 65 | D = zeros(1 + N/2, nhops);
 66 | 
 67 | nmw1 = floor( (N-W)/2 );
 68 | nmw2 = N-W - nmw1;
 69 | 
 70 | ww = 2*pi*[0:(N-1)]*SR/N;
 71 | 
 72 | for h = 1:nhops
 73 |   u = X((h-1)*H + [1:W]);
 74 | %  if(h==0)
 75 | %	plot(u)
 76 | %  end
 77 |   % Apply windows now, while the length is right
 78 |   wu = win.*u;
 79 |   du = dwin.*u;
 80 |   
 81 |   % Pad or truncate samples if N != W
 82 |   if N > W
 83 |     wu = [zeros(1,nmw1),wu,zeros(1,nmw2)];
 84 |     du = [zeros(1,nmw1),du,zeros(1,nmw2)];
 85 |   end
 86 |   if N < W
 87 |     wu = wu(-nmw1+[1:N]);
 88 |     du = du(-nmw1+[1:N]);
 89 |   end
 90 |   % FFTs of straight samples plus differential-weighted ones
 91 |   t1 = fft(fftshift(du));
 92 |   t2 = fft(fftshift(wu));
 93 |   % Scale down to factor out length & window effects
 94 |   D(:,h) = t2(1:(1 + N/2))'*norm;
 95 | 
 96 |   % Calculate instantaneous frequency from phase of differential spectrum
 97 |   t = t1 + j*(ww.*t2);
 98 |   a = real(t2);
 99 |   b = imag(t2);
100 |   da = real(t);
101 |   db = imag(t);
102 |   instf = (1/(2*pi))*(a.*db - b.*da)./((a.*a + b.*b)+(abs(t2)==0));
103 |   % 1/2pi converts rad/s into cycles/s
104 |   % sampling rate already factored in as constant in dwin & ww
105 |   % so result is in Hz
106 |   
107 |   F(:,h) = instf(1:(1 + N/2))';
108 |     
109 | end;
110 | 
111 | 


--------------------------------------------------------------------------------
/ifptrack.m:
--------------------------------------------------------------------------------
  1 | function [p,m,S] = ifptrack(d,w,sr,fminl,fminu,fmaxl,fmaxu)
  2 | % [p,m,S] = ifptrack(d,w,sr,fminl,fminu,fmaxl,fmaxu)
  3 | %     Pitch track based on inst freq.
  4 | %     Look for adjacent bins with same inst freq.
  5 | %     d is the input waveform.  sr is its sample rate
  6 | %     w is the basic STFT DFT length (window is half, hop is 1/4)
  7 | %     S returns the underlying complex STFT.
  8 | %     fmin,fmax define ramps at edge of sensitivity
  9 | % 2006-05-03 dpwe@ee.columbia.edu
 10 | 
 11 | %   Copyright (c) 2006 Columbia University.
 12 | % 
 13 | %   This file is part of LabROSA-coversongID
 14 | % 
 15 | %   LabROSA-coversongID is free software; you can redistribute it and/or modify
 16 | %   it under the terms of the GNU General Public License version 2 as
 17 | %   published by the Free Software Foundation.
 18 | % 
 19 | %   LabROSA-coversongID is distributed in the hope that it will be useful, but
 20 | %   WITHOUT ANY WARRANTY; without even the implied warranty of
 21 | %   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 22 | %   General Public License for more details.
 23 | % 
 24 | %   You should have received a copy of the GNU General Public License
 25 | %   along with LabROSA-coversongID; if not, write to the Free Software
 26 | %   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 27 | %   02110-1301 USA
 28 | % 
 29 | %   See the file "COPYING" for the text of the license.
 30 | 
 31 | % downweight fundamentals below here
 32 | if nargin < 4; fminl = 150; end
 33 | if nargin < 5; fminu = 300; end
 34 | % highest frequency we look to
 35 | if nargin < 6; fmaxl = 2000; end
 36 | if nargin < 7; fmaxu = 4000; end
 37 | 
 38 | 
 39 | % Calculate the inst freq gram
 40 | [I,S] = ifgram(d,w,w/2,w/4,sr);
 41 | 
 42 | % Only look at bins up to 2 kHz
 43 | maxbin = round(fmaxu * (w/sr) );
 44 | %maxbin = size(I,1)
 45 | minbin = round(fminl * (w/sr) );
 46 | 
 47 | % Find plateaus in ifgram - stretches where delta IF is < thr
 48 | ddif = [I(2:maxbin, :);I(maxbin,:)] - [I(1,:);I(1:(maxbin-1),:)];
 49 | 
 50 | % expected increment per bin = sr/w, threshold at 3/4 that
 51 | dgood = abs(ddif) < .75*sr/w;
 52 | 
 53 | % delete any single bins (both above and below are zero);
 54 | dgood = dgood .* ([dgood(2:maxbin,:);dgood(maxbin,:)] >  0 | [dgood(1,:);dgood(1:(maxbin-1),:)] > 0);
 55 | 
 56 | % check it out
 57 | %p = dgood;
 58 | 
 59 | % reconstruct just pitchy cells?
 60 | %r = istft(p.*S,w,w/2,w/4);
 61 | 
 62 | p = 0*dgood;
 63 | m = 0*dgood;
 64 | 
 65 | % For each frame, extract all harmonic freqs & magnitudes
 66 | for t = 1:size(I,2)
 67 |   ds = dgood(:,t)';
 68 |   lds = length(ds);
 69 |   % find nonzero regions in this vector
 70 |   st = find(([0,ds(1:(lds-1))]==0) & (ds > 0));
 71 |   en = find((ds > 0) & ([ds(2:lds),0] == 0));
 72 |   npks = length(st);
 73 |   frqs = zeros(1,npks);
 74 |   mags = zeros(1,npks);
 75 |   for i = 1:length(st)
 76 |     bump = abs(S(st(i):en(i),t));
 77 |     frqs(i) = (bump'*I(st(i):en(i),t))/(sum(bump)+(sum(bump)==0));
 78 |     mags(i) = sum(bump);
 79 |     if frqs(i) > fmaxu
 80 |       mags(i) = 0;
 81 |       frqs(i) = 0;
 82 |     elseif frqs(i) > fmaxl
 83 |       mags(i) = mags(i) * max(0, (fmaxu - frqs(i))/(fmaxu-fmaxl));
 84 |     end
 85 |     % downweight magnitudes below? 200 Hz
 86 |     if frqs(i) < fminl
 87 |       mags(i) = 0;
 88 |       frqs(i) = 0;
 89 |     elseif frqs(i) < fminu
 90 |       % 1 octave fade-out
 91 |       mags(i) = mags(i) * (frqs(i) - fminl)/(fminu-fminl);
 92 |     end
 93 |     if frqs(i) < 0 
 94 |       mags(i) = 0;
 95 |       frqs(i) = 0;
 96 |     end
 97 |     
 98 |   end
 99 | 
100 | % then just keep the largest at each frame (for now)
101 | %  [v,ix] = max(mags);
102 | %  p(t) = frqs(ix);
103 | %  m(t) = mags(ix);
104 |   % No, keep them all
105 |   %bin = st;
106 |   bin = round((st+en)/2);
107 |   p(bin,t) = frqs;
108 |   m(bin,t) = mags;
109 | end
110 | 
111 | %% Pull out the max in each column
112 | %[mm,ix] = max(m);
113 | %% idiom to retrieve different element from each column
114 | %[nr,nc] = size(p);
115 | %pp = p((nr*[0:(nc-1)])+ix);
116 | %mm = m((nr*[0:(nc-1)])+ix);
117 | % r = synthtrax(pp,mm,sr,w/4);
118 | 
119 | %p = pp;
120 | %m = mm;
121 | 
122 | 


--------------------------------------------------------------------------------
/isenhan.m:
--------------------------------------------------------------------------------
 1 | function [enhanflg] = isenhan( kernel )
 2 | %ISENHAN Determine whether kernel should be enhanced
 3 | %   Detailed explanation goes here
 4 | 
 5 | len = length(kernel);
 6 | count = 0;
 7 | 
 8 | for i = 1:len
 9 |     if kernel(i) == 1
10 |         count = count+1;
11 |     end
12 | end
13 | 
14 | if count/len >= 0.65 && (kernel(len-1) == 1 || kernel(len) == 1)
15 |     enhanflg = 1;
16 | else
17 |     enhanflg = 0;
18 | end
19 | end
20 | 
21 | 


--------------------------------------------------------------------------------
/lmin.m:
--------------------------------------------------------------------------------
 1 | function [lmval,indd]=lmin(xx,filt)
 2 | %LMIN 	function [lmval,indd]=lmin(x,filt)
 3 | %	Find local minima in vector X, where LMVAL is the output
 4 | %	vector with minima values, INDD is the corresponding indeces 
 5 | %	FILT is the number of passes of the small running average filter
 6 | %	in order to get rid of small peaks.  Default value FILT =0 (no
 7 | %	filtering). FILT in the range from 1 to 3 is usially sufficient to 
 8 | %	remove most of a small peaks
 9 | %	Examples:
10 | %	xx=0:0.01:35; y=sin(xx) + cos(xx ./3); 
11 | %	plot(xx,y); grid; hold on;
12 | %	[a b]=lmin(y,2)
13 | %	 plot(xx(a),y(a),'r+')
14 | %	see also LMAX, MAX, MIN
15 | 	
16 | %
17 | %**************************************************|
18 | % 	Serge Koptenko, Guigne International Ltd., |
19 | %	phone (709)895-3819, fax (709)895-3822     |
20 | %--------------06/03/97----------------------------|
21 | 
22 | x=xx;
23 | len_x = length(x);
24 | 	fltr=[1 1 1]/3;
25 |   if nargin <2, filt=0; 
26 | 	else
27 | x1=x(1); x2=x(len_x); 
28 | 
29 | 	for jj=1:filt,
30 | 	c=conv(fltr,x);
31 | 	x=c(2:len_x+1);
32 | 	x(1)=x1;  
33 |         x(len_x)=x2; 
34 | 	end
35 |   end
36 | 
37 | lmval=[];
38 | indd=[];
39 | i=2;		% start at second data point in time series
40 | 
41 |     while i < len_x-1,
42 | 	if x(i) < x(i-1)
43 | 	   if x(i) < x(i+1)	% definite min
44 | lmval =[lmval x(i)];
45 | indd = [ indd i];
46 | 
47 | 	   elseif x(i)==x(i+1)&x(i)==x(i+2)	% 'long' flat spot
48 | %lmval =[lmval x(i)];	%1   comment these two lines for strict case 
49 | %indd = [ indd i];	%2 when only  definite min included
50 | i = i + 2;  		% skip 2 points
51 | 
52 | 	   elseif x(i)==x(i+1)	% 'short' flat spot
53 | %lmval =[lmval x(i)];	%1   comment these two lines for strict case
54 | %indd = [ indd i];	%2 when only  definite min included
55 | i = i + 1;		% skip one point
56 | 	   end
57 | 	end
58 | 	i = i + 1;
59 |     end
60 | 
61 | if filt>0 & ~isempty(indd),
62 | 	if (indd(1)<= 3)|(indd(length(indd))+2>length(xx)), 
63 | 	   rng=1;	%check if index too close to the edge
64 | 	else rng=2;
65 | 	end
66 | 
67 | 	   for ii=1:length(indd), 
68 | 		[val(ii) iind(ii)] = min(xx(indd(ii) -rng:indd(ii) +rng));
69 | 		iind(ii)=indd(ii) + iind(ii)  -rng-1;
70 | 	   end
71 |   indd=iind; lmval=val;
72 | else
73 | end
74 | 
75 | 


--------------------------------------------------------------------------------
/localmax.m:
--------------------------------------------------------------------------------
 1 | function m = localmax(x)
 2 | % return 1 where there are local maxima in x (columnwise).
 3 | % don't include first point, maybe last point
 4 | 
 5 | [nr,nc] = size(x);
 6 | 
 7 | if nr == 1
 8 |   lx = nc;
 9 | elseif nc == 1
10 |   lx = nr;
11 |   x = x';
12 | else
13 |   lx = nr;
14 | end
15 | 
16 | if (nr == 1) || (nc == 1)
17 | 
18 |   m = (x > [x(1),x(1:(lx-1))]) & (x >= [x(2:lx),1+x(lx)]);
19 | 
20 |   if nc == 1
21 |     % retranspose
22 |     m = m';
23 |   end
24 |   
25 | else
26 |   % matrix
27 |   lx = nr;
28 |   m = (x > [x(1,:);x(1:(lx-1),:)]) & (x >= [x(2:lx,:);1+x(lx,:)]);
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/locseg.asv:
--------------------------------------------------------------------------------
  1 | function [chorus, seggroup, scoretab] = locseg(bimar, index, bts, sdmar, mono, fs, debug)
  2 | %LOCSEG Locate interesting segmengs(which is likely to contain the
  3 | %chorus). A heuristic scoring method is adoptted to find the most likely
  4 | %segment.
  5 | %   bimar - binarized matrix
  6 | %   index - index for the diagonals
  7 | %   bts - beat for measuring time
  8 | %   debug - 0 for nothing, 1 for remove close segments, 2 for adding score 6,
  9 | %   3 for adding score 3, 4 for add both score 3 and score 6
 10 | 
 11 | if nargin < 7
 12 |     debug = 0;
 13 | end
 14 | 
 15 | chorus = zeros(1,4);
 16 | %find all the segments longer than 4s
 17 | count = 0;
 18 | segflg = 0;
 19 | for i = 1:length(index)
 20 |     temp = diag(bimar, -index(i));
 21 |     for j = 1:length(temp)
 22 |         %the beginning of one segment
 23 |         if temp(j) == 1 && segflg == 0
 24 |             chorus(1) = index(i)+j;
 25 |             chorus(2) = j;
 26 |             segflg = 1;
 27 |             continue;
 28 |         end
 29 |         %the end of one segment
 30 |         if temp(j) == 0 && segflg == 1
 31 |             chorus(3) = index(i)+j;
 32 |             chorus(4) = j;
 33 |             %determine whether this segment is longer than 4s
 34 |             if bts(chorus(3))-bts(chorus(1)) >= 4 && bts(chorus(4))-bts(chorus(2)) >= 4
 35 |                 if count == 0
 36 |                     seggroup = chorus;
 37 |                 else
 38 |                     seggroup = [seggroup;chorus];
 39 |                 end
 40 |                 count = count+1;
 41 |             end
 42 |             segflg = 0;
 43 |             continue;
 44 |         end
 45 |     end
 46 | end
 47 | 
 48 | if debug == 1
 49 |     %for each diagonal segment found in the binarized matrix, the method
 50 |     %looks for diagonal segments which are located close to it.
 51 |     clostab = zeros(count, count+2);
 52 |     for i = 1:count
 53 |         closrec = 3;
 54 |         for j = 1:count
 55 |             if i == j
 56 |                 continue;
 57 |             end
 58 |             if seggroup(j,1)>=seggroup(i,1)-5 && seggroup(j,3)<=seggroup(i,3)+20 && abs(seggroup(j,2)-seggroup(i,2))<=20 && seggroup(j,4)<=seggroup(i,4)+5
 59 |                 clostab(i,1) = clostab(i,1)+1;
 60 |                 clostab(j,2) = clostab(j,2)+1;
 61 |                 clostab(i,closrec) = j;
 62 |                 closrec = closrec+1;
 63 |             end
 64 |         end
 65 |     end
 66 |     %Remove the extra segments
 67 |     %current not considering
 68 | end
 69 | 
 70 | %scoring scheme
 71 | scoretab = zeros(count,1);
 72 | 
 73 | %prework for 4th score
 74 | mono2 = (mono.^2);
 75 | aven = mean(mono2);
 76 | avedis = mean(mean(sdmar));
 77 | 
 78 | %prework for 5th score
 79 | if debug == 2 || debug == 4
 80 |     occurnum = zeros(count, 1);
 81 |     for i = 1:count
 82 |         for j = 1:count
 83 |             if j == i
 84 |                 continue;
 85 |             elseif abs(seggroup(i,2)-seggroup(j,2))<=0.2*abs(seggroup(j,2)-seggroup(j,4)) && abs(seggroup(i,4)-seggroup(j,4))<=0.2*abs(seggroup(j,2)-seggroup(j,4))
 86 |                 occurnum(i) = occurnum(i)+1;
 87 |             end
 88 |         end
 89 |     end
 90 | end
 91 | 
 92 | %prework for 2nd score
 93 | if debug == 3 || debug == 4
 94 |     %find the segment group - 3 segment with one locating under and one
 95 |     %locating right
 96 |     groupcount = 0;
 97 |     group = zeros(1,3);
 98 |     for i = 1:count
 99 |         for j = 1:count
100 |             if j == i
101 |                 continue;
102 |             elseif seggroup(j,1)>=seggroup(i,3) && ~(seggroup(i,4)<=seggroup(j,2)||seggroup(i,2)>=seggroup(j,4))
103 |                 for k = 1:count
104 |                     if k == i || k == j
105 |                         continue;
106 |                     elseif ~(seggroup(j,3)<=seggroup(k,1)||seggroup(j,1)>=seggroup(k,3))
107 |                         if groupcount == 0
108 |                             group = [i,j,k];
109 |                         else 
110 |                             group = [group;i,j,k];
111 |                         end
112 |                     end
113 |                 end
114 |             end
115 |         end
116 |     end
117 |     [m,~] = size(group);
118 |     sc3 = zeros(m,2);
119 |     sc3(:,1) = group(:,2);
120 |     for n = 1:m
121 |         xb = seggroup(group(n,2),4)-seggroup(group(n,2),2);
122 |         xu = seggroup(group(n,1),4)-seggroup(group(n,1),2);
123 |         xr = seggroup(group(n,3),4)-seggroup(group(n,3),2);
124 |         theta1 = 1-2*abs(seggroup(group(n,1),4)-seggroup(group(n,2),4))/(xb+xu);
125 |         if seggroup(group(n,2),2)<seggroup(group(n,1),2)
126 |             theta2 = 1-(seggroup(group(n,1),2)-seggroup(group(n,2),2))/xb;
127 |         elseif seggroup(group(n,2),2)>=seggroup(group(n,1),4)
128 |             theta2 = 1-(seggroup(group(n,2),2)-seggroup(group(n,1),4))/xb;
129 |         else 
130 |             theta2 = 1;
131 |         end
132 |         theta3 = 1-abs(xr-xb)/xb;
133 |         theta4 = 1-2*min(abs(seggroup(group(n,2),1)-seggroup(group(n,3),1)),abs(seggroup(group(n,2),3)-seggroup(group(n,3),3)))/(xb+xr);
134 |         theta = (theta1+theta2+theta3+theta4)/4;
135 |         sc3(n,2) = theta;
136 |     end
137 | end
138 | 
139 | for i = 1:count
140 |     %1st - position score
141 |     s1 = 1-abs(seggroup(i,2)+0.5*(seggroup(i,3)-seggroup(i,1))-round(length(bts)/4))/(round(length(bts)/4));
142 |     s2 = 1-abs(seggroup(i,1)+0.5*(seggroup(i,3)-seggroup(i,1))-round(3*length(bts)/4))/(round(length(bts)/4));
143 |     %2nd - relation to other repetitions
144 |     if debug == 3 || debug == 4
145 |         if isempty(find(sc3(:,1)==i))
146 |              s3 = 0;
147 |         else 
148 |             s3 = max(sc3(find(sc3(:,1)==i),2));
149 |         end
150 |     else
151 |         s3 = 0;
152 |     end
153 |     
154 |     %3rd - average energy
155 |     s4 = avenergy(mono2, aven, fs, bts, seggroup, i);
156 |     %4th - average distance
157 |     s5 = distsc(avedis, sdmar, seggroup, i);
158 |     %5th - number of times the repetition occurs
159 |     if debug == 2 || debug == 4
160 |         s6 = occurnum(i)/max(occurnum);
161 |     else
162 |         s6 = 0;
163 |     end
164 |     fprintf('The %d th segment:\n', i);
165 |     fprintf('s1:%.2d, s2:%.2d, s3:%.2d, s4:%.2d, s5:%.2d, s6:%.2d,',s1,s2,s3,s4,s5,s6);
166 |     scoretab(i) = 0.5*(s1+s2+s4+s6)+s3+s5;
167 |     fprintf('s:%.2d\n',scoretab(i));    
168 | end
169 | 
170 | %the segment with the most score be considered for chorus
171 | chorus = seggroup(scoretab == max(scoretab),:);
172 | end
173 | 
174 | 


--------------------------------------------------------------------------------
/locseg.m:
--------------------------------------------------------------------------------
  1 | function [chorus, seggroup, scoretab] = locseg(bimar, index, bts, sdmar, mono, fs, debug)
  2 | %LOCSEG Locate interesting segmengs(which is likely to contain the
  3 | %chorus). A heuristic scoring method is adoptted to find the most likely
  4 | %segment.
  5 | %   bimar - binarized matrix
  6 | %   index - index for the diagonals
  7 | %   bts - beat for measuring time
  8 | %   debug - 0 for nothing, 1 for remove close segments, 2 for adding score 6,
  9 | %   3 for adding score 3, 4 for add both score 3 and score 6
 10 | 
 11 | if nargin < 7
 12 |     debug = 0;
 13 | end
 14 | 
 15 | chorus = zeros(1,4);
 16 | %find all the segments longer than 4s
 17 | count = 0;
 18 | segflg = 0;
 19 | for i = 1:length(index)
 20 |     temp = diag(bimar, -index(i));
 21 |     for j = 1:length(temp)
 22 |         %the beginning of one segment
 23 |         if temp(j) == 1 && segflg == 0
 24 |             chorus(1) = index(i)+j;
 25 |             chorus(2) = j;
 26 |             segflg = 1;
 27 |             continue;
 28 |         end
 29 |         %the end of one segment
 30 |         if temp(j) == 0 && segflg == 1
 31 |             chorus(3) = index(i)+j;
 32 |             chorus(4) = j;
 33 |             %determine whether this segment is longer than 4s
 34 |             if bts(chorus(3))-bts(chorus(1)) >= 4 && bts(chorus(4))-bts(chorus(2)) >= 4
 35 |                 if count == 0
 36 |                     seggroup = chorus;
 37 |                 else
 38 |                     seggroup = [seggroup;chorus];
 39 |                 end
 40 |                 count = count+1;
 41 |             end
 42 |             segflg = 0;
 43 |             continue;
 44 |         end
 45 |     end
 46 | end
 47 | 
 48 | if debug == 1
 49 |     %for each diagonal segment found in the binarized matrix, the method
 50 |     %looks for diagonal segments which are located close to it.
 51 |     clostab = zeros(count, count+2);
 52 |     for i = 1:count
 53 |         closrec = 3;
 54 |         for j = 1:count
 55 |             if i == j
 56 |                 continue;
 57 |             end
 58 |             if seggroup(j,1)>=seggroup(i,1)-5 && seggroup(j,3)<=seggroup(i,3)+20 && abs(seggroup(j,2)-seggroup(i,2))<=20 && seggroup(j,4)<=seggroup(i,4)+5
 59 |                 clostab(i,1) = clostab(i,1)+1;
 60 |                 clostab(j,2) = clostab(j,2)+1;
 61 |                 clostab(i,closrec) = j;
 62 |                 closrec = closrec+1;
 63 |             end
 64 |         end
 65 |     end
 66 |     %Remove the extra segments
 67 |     %current not considering
 68 | end
 69 | 
 70 | %scoring scheme
 71 | scoretab = zeros(count,1);
 72 | 
 73 | %prework for 4th score
 74 | mono2 = (mono.^2);
 75 | aven = mean(mono2);
 76 | avedis = mean(mean(sdmar));
 77 | 
 78 | %prework for 5th score
 79 | if debug == 2 || debug == 4
 80 |     occurnum = zeros(count, 1);
 81 |     for i = 1:count
 82 |         for j = 1:count
 83 |             if j == i
 84 |                 continue;
 85 |             elseif abs(seggroup(i,2)-seggroup(j,2))<=0.2*abs(seggroup(j,2)-seggroup(j,4)) && abs(seggroup(i,4)-seggroup(j,4))<=0.2*abs(seggroup(j,2)-seggroup(j,4))
 86 |                 occurnum(i) = occurnum(i)+1;
 87 |             end
 88 |         end
 89 |     end
 90 | end
 91 | 
 92 | %prework for 2nd score
 93 | if debug == 3 || debug == 4
 94 |     %find the segment group - 3 segment with one locating under and one
 95 |     %locating right
 96 |     groupcount = 0;
 97 |     group = zeros(1,3);
 98 |     for i = 1:count
 99 |         for j = 1:count
100 |             if j == i
101 |                 continue;
102 |             elseif seggroup(j,1)>=seggroup(i,3) && ~(seggroup(i,4)<=seggroup(j,2)||seggroup(i,2)>=seggroup(j,4))
103 |                 for k = 1:count
104 |                     if k == i || k == j
105 |                         continue;
106 |                     elseif ~(seggroup(j,3)<=seggroup(k,1)||seggroup(j,1)>=seggroup(k,3))
107 |                         if groupcount == 0
108 |                             group = [i,j,k];
109 |                         else 
110 |                             group = [group;i,j,k];
111 |                         end
112 |                     end
113 |                 end
114 |             end
115 |         end
116 |     end
117 |     [m,~] = size(group);
118 |     sc3 = zeros(m,2);
119 |     sc3(:,1) = group(:,2);
120 |     for n = 1:m
121 |         xb = seggroup(group(n,2),4)-seggroup(group(n,2),2);
122 |         xu = seggroup(group(n,1),4)-seggroup(group(n,1),2);
123 |         xr = seggroup(group(n,3),4)-seggroup(group(n,3),2);
124 |         theta1 = 1-2*abs(seggroup(group(n,1),4)-seggroup(group(n,2),4))/(xb+xu);
125 |         if seggroup(group(n,2),2)<seggroup(group(n,1),2)
126 |             theta2 = 1-(seggroup(group(n,1),2)-seggroup(group(n,2),2))/xb;
127 |         elseif seggroup(group(n,2),2)>=seggroup(group(n,1),4)
128 |             theta2 = 1-(seggroup(group(n,2),2)-seggroup(group(n,1),4))/xb;
129 |         else 
130 |             theta2 = 1;
131 |         end
132 |         theta3 = 1-abs(xr-xb)/xb;
133 |         theta4 = 1-2*min(abs(seggroup(group(n,2),1)-seggroup(group(n,3),1)),abs(seggroup(group(n,2),3)-seggroup(group(n,3),3)))/(xb+xr);
134 |         theta = (theta1+theta2+theta3+theta4)/4;
135 |         sc3(n,2) = theta;
136 |     end
137 | end
138 | 
139 | for i = 1:count
140 |     %1st - position score
141 |     s1 = 1-abs(seggroup(i,2)+0.5*(seggroup(i,3)-seggroup(i,1))-round(length(bts)/4))/(round(length(bts)/4));
142 |     s2 = 1-abs(seggroup(i,1)+0.5*(seggroup(i,3)-seggroup(i,1))-round(3*length(bts)/4))/(round(length(bts)/4));
143 |     %2nd - relation to other repetitions
144 |     if debug == 3 || debug == 4
145 |         if isempty(find(sc3(:,1)==i))
146 |              s3 = 0;
147 |         else 
148 |             s3 = max(sc3(find(sc3(:,1)==i),2));
149 |         end
150 |     else
151 |         s3 = 0;
152 |     end
153 |     
154 |     %3rd - average energy
155 |     s4 = avenergy(mono2, aven, fs, bts, seggroup, i);
156 |     %4th - average distance
157 |     s5 = distsc(avedis, sdmar, seggroup, i);
158 |     %5th - number of times the repetition occurs
159 |     if debug == 2 || debug == 4
160 |         s6 = occurnum(i)/max(occurnum);
161 |     else
162 |         s6 = 0;
163 |     end
164 |     %fprintf('The %d th segment:\n', i);
165 |     %fprintf('s1:%.2d, s2:%.2d, s3:%.2d, s4:%.2d, s5:%.2d, s6:%.2d,',s1,s2,s3,s4,s5,s6);
166 |     scoretab(i) = 0.5*(s1+s2+s4+s6)+s3+s5;
167 |     %fprintf('s:%.2d\n',scoretab(i));    
168 | end
169 | 
170 | %the segment with the most score be considered for chorus
171 | chorus = seggroup(scoretab == max(scoretab),:);
172 | end
173 | 
174 | 


--------------------------------------------------------------------------------
/main.asv:
--------------------------------------------------------------------------------
 1 | %output the detection result
 2 | fout = fopen('Result.txt', 'w');
 3 | 
 4 | ctime = chorusdetection('C:\Users\Cheerz\Desktop\07. Viva La Vida.wav');
 5 | 
 6 | %output the result to result.txt
 7 | fprintf(fout, '%s', 
 8 | for i = 1:4
 9 |     fprintf(fout, '%f', ctime(i));
10 |     fprintf(fout, '%s', ' ');
11 | end
12 |     
13 | 


--------------------------------------------------------------------------------
/main.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/main.m


--------------------------------------------------------------------------------
/matcentre.m:
--------------------------------------------------------------------------------
 1 | function [dirmean] = matcentre(chroma_mar, i, j)
 2 | %MATCENTRE the intermediate process of chroma enhancement
 3 | 
 4 | kernel = zeros(5,5);
 5 | dirmean = zeros(6,1);
 6 | len = length(chroma_mar);
 7 | 
 8 | for m = -2:2
 9 |     for n = -2:2
10 |         if i+m<=0 || i+m>len || j+n<=0 || j+n>len
11 |             continue;
12 |         end
13 |         kernel(m+3,n+3) = chroma_mar(i+m,j+n);
14 |     end
15 | end
16 | 
17 | %Six directional local mean values are calculated along the upper-left, 
18 | %lower-right, right, left, upper, and lower dimensions of the kernel
19 | 
20 | dirmean(1) = mean([kernel(1,1),kernel(2,2)]);
21 | dirmean(2) = mean([kernel(4,4),kernel(5,5)]);
22 | dirmean(3) = mean([kernel(3,4),kernel(3,5)]);
23 | dirmean(4) = mean([kernel(3,1),kernel(3,2)]);
24 | dirmean(5) = mean([kernel(1,3),kernel(2,3)]);
25 | dirmean(6) = mean([kernel(4,3),kernel(5,3)]);
26 | 
27 | end
28 | 
29 | 


--------------------------------------------------------------------------------
/mfccbeatftrs.asv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/mfccbeatftrs.asv


--------------------------------------------------------------------------------
/mfccbeatftrs.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/mfccbeatftrs.m


--------------------------------------------------------------------------------
/octs2hz.m:
--------------------------------------------------------------------------------
 1 | function hz = octs2hz(octs,A440)
 2 | % hz = octs2hz(octs,A440)
 3 | % Convert a real-number octave 
 4 | % into a frequency in Hzfrequency in Hz into a real number counting 
 5 | % the octaves above A0. So hz2octs(440) = 4.0.
 6 | % Optional A440 specifies the Hz to be treated as middle A (default 440).
 7 | % 2006-06-29 dpwe@ee.columbia.edu for fft2chromamx
 8 | 
 9 | if nargin < 2;   A440 = 440; end
10 | 
11 | % A4 = A440 = 440 Hz, so A0 = 440/16 Hz
12 | 
13 | hz = (A440/16).*(2.^octs);
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/readme.txt:
--------------------------------------------------------------------------------
1 | a music segmentation algorithm that I proposed and implemented as my undergraduate project. 
2 | The basic function is: 
3 | 1. a song is loaded to the system, 
4 | 2. the system will calculate the chroma(harmonic) and MFCC(timbre) features of the audio input 
5 | 3. find the segmentation label by using similarity matrix. 
6 | 4. Then output the segmented time information of this song 
7 | 


--------------------------------------------------------------------------------
/sdm.m:
--------------------------------------------------------------------------------
 1 | function [sdmar] = sdm(ftr, debug)
 2 | %SDM calculate the self-distance matrix of the input vector ftr, sdmar
 3 | %returns the results
 4 | 
 5 | if nargin < 2; debug = 0;end
 6 | 
 7 | [~, vecnum] = size(ftr);
 8 | sdmar = zeros(vecnum, vecnum);
 9 | 
10 | for i = 1:vecnum;
11 |     for j = 1:vecnum;
12 |         sdmar(i,j) = sqrt((sum(ftr(:,i)-ftr(:,j)).^2));  
13 |     end
14 | end
15 | 
16 | if debug ~= 0; 
17 |     figure; imshow(mat2gray(sdmar));
18 | end
19 | end
20 | 


--------------------------------------------------------------------------------
/tempo.m:
--------------------------------------------------------------------------------
  1 | function [t,xcr,D,onsetenv,sgsrate] = tempo(d,sr,tmean,tsd,onsetenv,debug)
  2 | % [t,xcr,D,onsetenv,sgsrate] = tempo(d,sr,tmean,tsd,onsetenv,debug)
  3 | %    Estimate the overall tempo of a track for the MIREX McKinney
  4 | %    contest.  
  5 | %    d is the input audio at sampling rate sr.  tmean is the mode
  6 | %    for BPM weighting (in bpm) and tsd is its spread (in octaves).
  7 | %    onsetenv is an already-calculated onset envelope (so d is
  8 | %    ignored).  debug causes a debugging plot.
  9 | %    Output t(1) is the lower BPM estimate, t(2) is the faster,
 10 | %    t(3) is the relative weight for t(1) compared to t(2).
 11 | %    xcr is the windowed autocorrelation from which the BPM peaks were picked.
 12 | %    D is the mel-freq spectrogram
 13 | %    onsetenv is the "onset strength waveform", used for beat tracking
 14 | %    sgsrate is the sampling rate of onsetenv and D.
 15 | %
 16 | % 2006-08-25 dpwe@ee.columbia.edu
 17 | % uses: localmax, fft2melmx
 18 | 
 19 | %   Copyright (c) 2006 Columbia University.
 20 | % 
 21 | %   This file is part of LabROSA-coversongID
 22 | % 
 23 | %   LabROSA-coversongID is free software; you can redistribute it and/or modify
 24 | %   it under the terms of the GNU General Public License version 2 as
 25 | %   published by the Free Software Foundation.
 26 | % 
 27 | %   LabROSA-coversongID is distributed in the hope that it will be useful, but
 28 | %   WITHOUT ANY WARRANTY; without even the implied warranty of
 29 | %   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 30 | %   General Public License for more details.
 31 | % 
 32 | %   You should have received a copy of the GNU General Public License
 33 | %   along with LabROSA-coversongID; if not, write to the Free Software
 34 | %   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 35 | %   02110-1301 USA
 36 | % 
 37 | %   See the file "COPYING" for the text of the license.
 38 | 
 39 | if nargin < 3;   tmean = 120; end
 40 | if nargin < 4;   tsd = 3.0; end
 41 | if nargin < 5;   onsetenv = []; end
 42 | if nargin < 6;   debug = 0; end
 43 | 
 44 | sro = 8000;
 45 | % specgram: 256 bin @ 8kHz = 32 ms / 4 ms hop
 46 | swin = 256;
 47 | shop = 32;
 48 | % mel channels
 49 | nmel = 40;
 50 | % sample rate for specgram frames (granularity for rest of processing)
 51 | sgsrate = sro/shop;
 52 | % autoco out to 4 s
 53 | acmax = round(4*sgsrate);
 54 | 
 55 | D = 0;
 56 |   
 57 | if isempty(onsetenv)
 58 |   % no onsetenv provided - have to calculate it
 59 | 
 60 |   % resample to 8 kHz
 61 |   if (sr ~= sro)
 62 |     gg = gcd(sro,sr);
 63 |     d = resample(d,sro/gg,sr/gg);
 64 |     sr = sro;
 65 |   end
 66 | 
 67 |   D = specgram(d,swin,sr,swin,swin-shop);
 68 |     
 69 |   % Construct db-magnitude-mel-spectrogram
 70 |   mlmx = fft2melmx(swin,sr,nmel);
 71 |   D = 20*log10(max(1e-10,mlmx(:,1:(swin/2+1))*abs(D)));
 72 | 
 73 |   % Only look at the top 80 dB
 74 |   D = max(D, max(max(D))-80);
 75 | 
 76 |   % The raw onset decision waveform
 77 |   mm = (mean(max(0,diff(D')')));
 78 |   eelen = length(mm);
 79 | 
 80 |   % dc-removed mm
 81 |   onsetenv = filter([1 -1], [1 -.99],mm);
 82 | 
 83 | end  % of onsetenv calc block
 84 | 
 85 | % Find rough global period
 86 | % Only use the 1st 90 sec to estimate global pd (avoid glitches?)
 87 | 
 88 | maxdur = 90; % sec
 89 | maxcol = min(round(maxdur*sgsrate),length(onsetenv));
 90 | 
 91 | xcr = xcorr(onsetenv(1:maxcol),onsetenv(1:maxcol),acmax);
 92 | 
 93 | % find local max in the global ac
 94 | rawxcr = xcr(acmax+1+[0:acmax]);
 95 | 
 96 | % window it around default bpm
 97 | xcrwin = exp(-.5*((log((60*sgsrate./([0:acmax]+0.1)/tmean))/log(2)*tsd).^2));
 98 | xcr = rawxcr.*xcrwin;
 99 | 
100 | xpks = localmax(xcr);  
101 | % will not include any peaks in first down slope (before goes below
102 | % zero for the first time)
103 | xpks(1:min(find(xcr<0))) = 0;
104 | % largest local max away from zero
105 | maxpk = max(xcr(xpks));
106 | 
107 | % ?? then period is shortest period with a peak that approaches the max
108 | %maxpkthr = 0.4;
109 | %startpd = -1 + min(find( (xpks.*xcr) > maxpkthr*maxpk ) );
110 | %startpd = -1 + (find( (xpks.*xcr) > maxpkthr*maxpk ) );
111 | 
112 | % no, just largest peak after windowing
113 | startpd = -1 + find((xpks.*xcr) == max(xpks.*xcr));
114 | 
115 | % ??Choose acceptable peak closest to 120 bpm
116 | %[vv,spix] = min(abs(60./(startpd/sgsrate) - 120));
117 | %startpd = startpd(spix);
118 | % No, just choose shortest acceptable peak
119 | startpd = startpd(1);
120 | 
121 | t = 60/(startpd/sgsrate);
122 | 
123 | % Choose best peak out of .33 .5 2 3 x this period
124 | candpds = round([.33 .5 2 3]*startpd);
125 | candpds = candpds(candpds < acmax);
126 | 
127 | [vv,xx] = max(xcr(1+candpds));
128 | 
129 | startpd2 = candpds(xx);
130 | vvm = xcr(1+startpd);
131 | pratio = vvm/(vvm+vv);
132 | 
133 | t = [60/(startpd/sgsrate) 60/(startpd2/sgsrate) pratio];
134 | 
135 | % ensure results are lowest-first
136 | if t(2) < t(1)
137 |   t([1 2]) = t([2 1]);
138 |   t(3) = 1-t(3);
139 | end  
140 | 
141 | if debug > 0
142 | 
143 |   % Report results and plot weighted autocorrelation with picked peaks
144 |   disp(['Global bt pd = ',num2str(t(1)),' @ ',num2str(t(3)),' / ',num2str(t(2)),' bpm']);
145 | 
146 |   subplot(414)
147 |   plot([0:acmax],xcr,'-b', ...
148 |        [0:acmax],xcrwin*maxpk,'-r', ...
149 |        [startpd startpd], [min(xcr) max(xcr)], '-g', ...
150 |        [startpd2 startpd2], [min(xcr) max(xcr)], '-c');
151 |   grid;
152 | 
153 | end
154 | 
155 | % Read in all the tempo settings
156 | % for i = 1:20; f = fopen(['mirex-beattrack/train/train',num2str(i),'-tempo.txt']); r(i,:) = fscanf(f, '%f\n'); fclose(f); end
157 | 


--------------------------------------------------------------------------------
/tokenize.m:
--------------------------------------------------------------------------------
1 | function a = tokenize(s,t)
2 | % Break space-separated string into cell array of strings.
3 | % Optional second arg gives alternate separator (default ' ')
4 | % 2004-09-18 dpwe@ee.columbia.edu
5 | if nargin < 2;  t = ' '; end
6 | a = [];
7 | p = 1;
8 | n = 1;
9 | l = length(s);


--------------------------------------------------------------------------------