├── avenergy.m ├── beat.m ├── beatavg.m ├── bts2time.m ├── caldiag.m ├── chorusdetection.asv ├── chorusdetection.m ├── chromagram_E.m ├── chromagram_IF.m ├── chromagram_P.m ├── chrombeatftrs.asv ├── chrombeatftrs.m ├── chromenhance.m ├── delete0.asv ├── delete0.m ├── distsc.m ├── fft2melmx.m ├── fftOneSide.m ├── hz2octs.m ├── ifgram.m ├── ifptrack.m ├── isenhan.m ├── lmin.m ├── localmax.m ├── locseg.asv ├── locseg.m ├── main.asv ├── main.m ├── matcentre.m ├── mfccbeatftrs.asv ├── mfccbeatftrs.m ├── octs2hz.m ├── readme.txt ├── sdm.m ├── tempo.m └── tokenize.m /avenergy.m: -------------------------------------------------------------------------------- 1 | function [score] = avenergy(mono2, aven, fs, bts, seggroup, ind) 2 | %AVENERGY Calcualte the average energy as a score 3 | % mono - music signal 4 | % fs - sampling rate 5 | % bts - beats 6 | % seggroup - the group contains the interesting segments 7 | % ind - index in seggroup 8 | 9 | en = mean(mono2(round(fs*bts(seggroup(ind,1))):round(fs*bts(seggroup(ind,3))))); 10 | score = en/aven; 11 | 12 | end 13 | 14 | -------------------------------------------------------------------------------- /beat.m: -------------------------------------------------------------------------------- 1 | function [b,onsetenv,D,cumscore] = beat(d,sr,startbpm,tightness,doplot) 2 | % [b,onsetenv,D,cumscore] = beat(d,sr,startbpm,tightness,doplot) 3 | % b returns the times (in sec) of the beats in the waveform d, samplerate sr. 4 | % startbpm specifies the target tempo. If it is a two-element 5 | % vector, it is taken as the mode of a tempo search window, with 6 | % the second envelope being the spread (in octaves) of the 7 | % search, and the best tempo is calculated (with tempo.m). 8 | % tightness controls how tightly the start tempo is enforced 9 | % within the beat (default 6, larger = more rigid); if it is a 10 | % two-element vector the second parameter is alpha, the strength 11 | % of transition costs relative to local match (0..1, default 0.7). 12 | % doplot enables diagnostic plots; if it has two elements, they 13 | % are the time range (in sec) for the diagnostic plots. 14 | % onsetenv returns the raw onset detection envelope 15 | % D returns the mel-spectrogram, 16 | % cumscore returns the per-frame cumulated dynamic-programming score. 17 | % 2006-08-25 dpwe@ee.columbia.edu 18 | % uses: localmax 19 | 20 | % Copyright (c) 2006 Columbia University. 21 | % 22 | % This file is part of LabROSA-coversongID 23 | % 24 | % LabROSA-coversongID is free software; you can redistribute it and/or modify 25 | % it under the terms of the GNU General Public License version 2 as 26 | % published by the Free Software Foundation. 27 | % 28 | % LabROSA-coversongID is distributed in the hope that it will be useful, 29 | % but 30 | % WITHOUT ANY WARRANTY; without even the implied warranty of 31 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 32 | % General Public License for more details. 33 | % 34 | % You should have received a copy of the GNU General Public License 35 | % along with LabROSA-coversongID; if not, write to the Free Software 36 | % Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 37 | % 02110-1301 USA 38 | % 39 | % See the file "COPYING" for the text of the license. 40 | 41 | if nargin < 3; startbpm = 0; end 42 | if nargin < 4; tightness = 0; end 43 | if nargin < 5; doplot = 0; end 44 | 45 | if length(startbpm) == 2 46 | temposd = startbpm(2); 47 | startbpm = startbpm(1); 48 | else 49 | temposd = 0; 50 | end 51 | if length(tightness) == 2 52 | alpha = tightness(2); 53 | tightness = tightness(1); 54 | else 55 | alpha = 0.8 56 | end 57 | if tightness == 0; tightness = 6; end 58 | 59 | % Have we been given an envelope (nonnegative waveform) 60 | if min(d) >= 0 61 | onsetenv = d; 62 | sgsrate = sr; 63 | disp(['beat: treating input as onset strength envelope']); 64 | else 65 | onsetenv = []; 66 | end 67 | 68 | % debug/plotting options 69 | plotlims = []; 70 | if length(doplot) > 1 71 | % specify zoom-in limits too 72 | plotlims = doplot; 73 | doplot = 1; 74 | end 75 | if doplot > 0; debug = 1; else debug = 0; end 76 | 77 | b = []; 78 | 79 | % Select tempo search either with startbpm = 0 (means use defaults) 80 | % or startbpm > 0 but temposd > 0 too (means search around startbpm) 81 | % If onsetenv is empty, have to run tempo too to convert waveform 82 | % to onsetenv, but we might not use the tempo it picks. 83 | if startbpm == 0 || temposd > 0 || isempty(onsetenv) 84 | 85 | if startbpm == 0 86 | tempomean = 120; 87 | else 88 | tempomean = startbpm; 89 | end 90 | 91 | if temposd == 0 92 | temposd = 0.7; 93 | end 94 | 95 | % Subfunction estimates global BPM; returns 'onset strength' 96 | % waveform onsetenv 97 | % If we were given an onsetenv as input, will use that 98 | [t,xcr,D,onsetenv,sgsrate] = tempo(d,sr,tempomean,temposd,onsetenv,debug); 99 | 100 | % tempo.m returns the top-2 BPM estimates; use faster one for 101 | % beat tracking 102 | if (startbpm == 0 | temposd > 0) 103 | startbpm = max(t([1 2])); 104 | end 105 | 106 | if debug == 1 107 | % plot the mel-specgram 108 | tt = [1:length(onsetenv)]/sgsrate; 109 | subplot(411) 110 | imagesc(tt,[1 40],D); axis xy 111 | subplot(412) 112 | plot(tt,onsetenv); 113 | end 114 | 115 | end 116 | 117 | % convert startbpm to startpd 118 | startpd = (60*sgsrate)/startbpm; 119 | %disp(['startpd=',num2str(startpd)]); 120 | 121 | pd = startpd; 122 | 123 | % Smooth beat events 124 | templt = exp(-0.5*(([-pd:pd]/(pd/32)).^2)); 125 | localscore = conv(templt,onsetenv); 126 | localscore = localscore(round(length(templt)/2)+[1:length(onsetenv)]); 127 | 128 | % DP version: 129 | % backlink(time) is index of best preceding time for this point 130 | % cumscore(time) is total cumulated score to this point 131 | 132 | backlink = zeros(1,length(localscore)); 133 | cumscore = zeros(1,length(localscore)); 134 | 135 | % search range for previous beat 136 | prange = round(-2*pd):-round(pd/2); 137 | 138 | % Skewed window 139 | txwt = exp(-0.5*((tightness*log(prange/-pd)).^2)); 140 | 141 | starting = 1; 142 | for i = 1:length(localscore) 143 | 144 | timerange = i + prange; 145 | 146 | % Are we reaching back before time zero? 147 | zpad = max(0, min(1-timerange(1),length(prange))); 148 | 149 | % Search over all possible predecessors and apply transition 150 | % weighting 151 | scorecands = txwt .* [zeros(1,zpad),cumscore(timerange(zpad+1:end))]; 152 | % Find best predecessor beat 153 | [vv,xx] = max(scorecands); 154 | % Add on local score 155 | cumscore(i) = alpha*vv + (1-alpha)*localscore(i); 156 | 157 | % special case to catch first onset 158 | % if starting == 1 & localscore(i) > 100*abs(vv) 159 | if starting == 1 & localscore(i) < 0.01*max(localscore); 160 | backlink(i) = -1; 161 | else 162 | backlink(i) = timerange(xx); 163 | % prevent it from resetting, even through a stretch of silence 164 | starting = 0; 165 | end 166 | 167 | end 168 | 169 | %%%% Backtrace 170 | 171 | % Cumulated score is stabilized to lie in constant range, 172 | % so just look for one near the end that has a reasonable score 173 | medscore = median(cumscore(localmax(cumscore))); 174 | bestendx = max(find(cumscore .* localmax(cumscore) > 0.5*medscore)); 175 | 176 | b = bestendx; 177 | 178 | while backlink(b(end)) > 0 179 | b = [b,backlink(b(end))]; 180 | end 181 | 182 | b = fliplr(b); 183 | 184 | % return beat times in secs 185 | b = b / sgsrate; 186 | 187 | % Debug visualization 188 | if doplot == 1 189 | subplot(411) 190 | hold on; 191 | plot([b;b],[0;40]*ones(1,length(b)),'w'); 192 | hold off; 193 | subplot(412) 194 | hold on; 195 | plot([b;b],[-5;20]*ones(1,length(b)),'g'); 196 | hold off; 197 | 198 | % redo 3rd pane as xcorr with templt 199 | subplot(413) 200 | tt = [1:length(localscore)]/sgsrate; 201 | plot(tt,localscore); 202 | hold on; plot([b;b],[min(localscore);max(localscore)]*ones(1,length(b)),'g'); hold off 203 | 204 | if length(plotlims) > 0 205 | for i = 1:3; 206 | subplot(4,1,i) 207 | ax = axis; 208 | ax([1 2]) = plotlims; 209 | axis(ax); 210 | end 211 | end 212 | 213 | end 214 | -------------------------------------------------------------------------------- /beatavg.m: -------------------------------------------------------------------------------- 1 | function X = beatavg(Y,bts) 2 | % X = beatavg(Y,bys) 3 | % Calculate average of columns of Y according to grid defined 4 | % (real-valued) column indices in vector bts. 5 | % For folding spectrograms down into beat-sync features. 6 | % 2006-09-26 dpwe@ee.columbia.edu 7 | 8 | % beat-based segments 9 | %bts = beattrack(d,sr); 10 | nbts = length(bts); 11 | bttime = mean(diff(bts)); 12 | % map beats to specgram slices 13 | ncols = size(Y,2); 14 | coltimes = [0:(ncols-1)]; 15 | cols2beats = zeros(nbts, ncols); 16 | btse = [bts,max(coltimes)]; 17 | for b = 1:nbts 18 | cols2beats(b,:) = ((coltimes >= btse(b)) & (coltimes < btse(b+1)))*1/(btse(b+1)-btse(b)); 19 | end 20 | 21 | % The actual desired output 22 | X = Y * cols2beats'; 23 | -------------------------------------------------------------------------------- /bts2time.m: -------------------------------------------------------------------------------- 1 | function [timegroup] = bts2time(seggroup, bts) 2 | %BTS2TIME convert bts to time 3 | 4 | [m, n] = size(seggroup); 5 | timegroup = bts(seggroup); 6 | 7 | end 8 | 9 | -------------------------------------------------------------------------------- /caldiag.m: -------------------------------------------------------------------------------- 1 | function [ bimar, index ] = caldiag(sdmar, num, debug, deplot) 2 | %CALDIAG calculate the possible diagonal, return as the binarized matrix 3 | % sdmar - feature self-distance matrix 4 | % num - number of minima 5 | % debug - 1 for plot the diagonals, 2 for low pass 6 | 7 | if nargin < 4 8 | deplot = 0; 9 | end 10 | 11 | if nargin <3 12 | debug = 0; 13 | end 14 | 15 | len = length(sdmar); 16 | dig = zeros(len-1,1); 17 | for i = 1:len-1 18 | dig(i) = sum(diag(sdmar, -i))/(len-i); 19 | end 20 | 21 | if debug ~= 0 22 | %low pass the dig to "detrend" 23 | dig_lp = filter(ones(50,1)/50, 1, dig); 24 | dig = dig-dig_lp; 25 | end 26 | 27 | [minima, index] = lmin(dig, 2); 28 | 29 | if length(minima) > num 30 | while(1) 31 | add = find(minima == max(minima), length(minima)-num, 'first'); 32 | minima(:, add) = []; 33 | index(:, add) = []; 34 | if(length(minima) == num) 35 | break; 36 | end 37 | end 38 | end 39 | 40 | if deplot ~= 0 41 | figure; 42 | plot(dig); grid; hold on; 43 | plot(index, dig(index), 'r+'); 44 | end 45 | 46 | all_len = length(diag(sdmar,-index(1))); 47 | longvec = diag(sdmar,-index(1))'; 48 | for i = 2:length(index) 49 | all_len = all_len+length(diag(sdmar,-index(i))); 50 | longvec = [longvec, diag(sdmar,-index(i))']; 51 | end 52 | 53 | longvec = sort(longvec); 54 | threshold = longvec(round(0.2*all_len)); 55 | bimar = -ones(len,len); 56 | 57 | for i = 1:length(index) 58 | temp = diag(sdmar,-index(i)); 59 | for j = 1:length(diag(sdmar,-index(i))) 60 | if temp(j) > threshold 61 | bimar(index(i)+j,j) = 1; 62 | else 63 | bimar(index(i)+j,j) = 0; 64 | end 65 | end 66 | end 67 | 68 | if deplot ~= 0 69 | figure; imshow(mat2gray(bimar));title('binarized matrix'); 70 | end 71 | 72 | %enhance the binarized matrix 73 | for i = 1:length(index) 74 | temp = diag(bimar,-index(i)); 75 | j = 1; 76 | while length(temp) >= 25 || j <= length(temp) 77 | if temp(j) == 0 78 | j = j + 1; 79 | if j+25-1 > length(temp) 80 | break; 81 | end 82 | continue; 83 | end 84 | if j+25-1 > length(temp) 85 | break; 86 | end 87 | kernel = temp(j:j+25-1); 88 | if isenhan(kernel) 89 | for k = 0:24 90 | bimar(index(i)+j+k, j+k) = 1; 91 | end 92 | j = j+25-1; 93 | end 94 | j = j + 1; 95 | if j+25-1 > length(temp) 96 | break; 97 | end 98 | end 99 | end 100 | 101 | if deplot ~= 0 102 | figure; imshow(mat2gray(bimar));title('binarized matrix - after enhancement'); 103 | end 104 | 105 | end 106 | 107 | 108 | -------------------------------------------------------------------------------- /chorusdetection.asv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/chorusdetection.asv -------------------------------------------------------------------------------- /chorusdetection.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/chorusdetection.m -------------------------------------------------------------------------------- /chromagram_E.m: -------------------------------------------------------------------------------- 1 | function Y = chromagram_E(d,sr,fftlen,nbin,f_ctr,f_sd) 2 | % Y = chromagram_E(d,sr,fftlen,nbin) 3 | % Calculate a "chromagram" of the sound in d (at sampling rate sr) 4 | % Use windows of fftlen points, hopped by ffthop points 5 | % Divide the octave into nbin steps 6 | % Weight with center frequency f_ctr (in Hz) and gaussian SD f_sd (in octaves) 7 | % 2006-09-26 dpwe@ee.columbia.edu 8 | 9 | if nargin < 3; fftlen = 2048; end 10 | if nargin < 4; nbin = 12; end 11 | if nargin < 5; f_ctr = 1000; end 12 | if nargin < 6; f_sd = 1; end 13 | 14 | fftwin = fftlen/2; 15 | ffthop = fftlen/4; % always for this 16 | 17 | D = abs(specgram(d,fftlen,sr,fftwin,(fftwin-ffthop))); 18 | 19 | A0 = 27.5; % Hz 20 | A440 = 440; % Hz 21 | 22 | f_ctr_log = log(f_ctr/A0) / log(2); 23 | 24 | CM = fft2chromamx(fftlen, nbin, sr, A440, f_ctr_log, f_sd); 25 | % Chop extra dims 26 | CM = CM(:,1:(fftlen/2)+1); 27 | 28 | Y = CM*D; 29 | -------------------------------------------------------------------------------- /chromagram_IF.m: -------------------------------------------------------------------------------- 1 | function Y = chromagram_IF(d,sr,fftlen,nbin,f_ctr,f_sd) 2 | % Y = chromagram_IF(d,sr,fftlen,nbin,f_ctr,f_sd) 3 | % Calculate a "chromagram" of the sound in d (at sampling rate sr) 4 | % Use windows of fftlen points, hopped by ffthop points 5 | % Divide the octave into nbin steps 6 | % Weight with center frequency f_ctr (in Hz) and gaussian SD f_sd 7 | % (in octaves) 8 | % Use instantaneous frequency to keep only real harmonics. 9 | % 2006-09-26 dpwe@ee.columbia.edu 10 | 11 | % Copyright (c) 2006 Columbia University. 12 | % 13 | % This file is part of LabROSA-coversongID 14 | % 15 | % LabROSA-coversongID is free software; you can redistribute it and/or modify 16 | % it under the terms of the GNU General Public License version 2 as 17 | % published by the Free Software Foundation. 18 | % 19 | % LabROSA-coversongID is distributed in the hope that it will be useful, but 20 | % WITHOUT ANY WARRANTY; without even the implied warranty of 21 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | % General Public License for more details. 23 | % 24 | % You should have received a copy of the GNU General Public License 25 | % along with LabROSA-coversongID; if not, write to the Free Software 26 | % Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | % 02110-1301 USA 28 | % 29 | % See the file "COPYING" for the text of the license. 30 | 31 | if nargin < 3; fftlen = 2048; end 32 | if nargin < 4; nbin = 12; end 33 | if nargin < 5; f_ctr = 1000; end 34 | if nargin < 6; f_sd = 1; end 35 | 36 | A0 = 27.5; % Hz 37 | A440 = 440; % Hz 38 | f_ctr_log = log(f_ctr/A0) / log(2); 39 | 40 | fminl = octs2hz(hz2octs(f_ctr)-2*f_sd); 41 | fminu = octs2hz(hz2octs(f_ctr)-f_sd); 42 | fmaxl = octs2hz(hz2octs(f_ctr)+f_sd); 43 | fmaxu = octs2hz(hz2octs(f_ctr)+2*f_sd); 44 | 45 | ffthop = fftlen/4; 46 | nchr = 12; 47 | 48 | % Calculate spectrogram and IF gram pitch tracks... 49 | [p,m]=ifptrack(d,fftlen,sr,fminl,fminu,fmaxl,fmaxu); 50 | 51 | [nbins,ncols] = size(p); 52 | 53 | %disp(['ncols = ',num2str(ncols)]); 54 | 55 | % chroma-quantized IF sinusoids 56 | Pocts = hz2octs(p+(p==0)); 57 | Pocts(p(:)==0) = 0; 58 | % Figure best tuning alignment 59 | nzp = find(p(:)>0); 60 | %hist(nchr*Pmapo(nzp)-round(nchr*Pmapo(nzp)),100) 61 | [hn,hx] = hist(nchr*Pocts(nzp)-round(nchr*Pocts(nzp)),100); 62 | centsoff = hx(find(hn == max(hn))); 63 | % Adjust tunings to align better with chroma 64 | Pocts(nzp) = Pocts(nzp) - centsoff(1)/nchr; 65 | 66 | % Quantize to chroma bins 67 | PoctsQ = Pocts; 68 | PoctsQ(nzp) = round(nchr*Pocts(nzp))/nchr; 69 | 70 | % map IF pitches to chroma bins 71 | Pmapc = round(nchr*(PoctsQ - floor(PoctsQ))); 72 | Pmapc(p(:) == 0) = -1; 73 | Pmapc(Pmapc(:) == nchr) = 0; 74 | 75 | Y = zeros(nchr,ncols); 76 | for t = 1:ncols; 77 | Y(:,t)=(repmat([0:(nchr-1)]',1,size(Pmapc,1))==repmat(Pmapc(:,t)',nchr,1))*m(:,t); 78 | end 79 | -------------------------------------------------------------------------------- /chromagram_P.m: -------------------------------------------------------------------------------- 1 | function Y = chromagram_P(d,sr,fftlen,nbin,f_ctr,f_sd) 2 | % Y = chromagram_E(d,sr,fftlen,nbin) 3 | % Calculate a "chromagram" of the sound in d (at sampling rate sr) 4 | % Use windows of fftlen points, hopped by ffthop points 5 | % Divide the octave into nbin steps 6 | % Weight with center frequency f_ctr (in Hz) and gaussian SD f_sd (in octaves) 7 | % 2006-09-26 dpwe@ee.columbia.edu 8 | 9 | if nargin < 3; fftlen = 2048; end 10 | if nargin < 4; nbin = 12; end 11 | if nargin < 5; f_ctr = 1000; end 12 | if nargin < 6; f_sd = 1; end 13 | 14 | fftwin = fftlen/2; 15 | ffthop = fftlen/4; % always for this 16 | 17 | D = abs(specgram(d,fftlen,sr,fftwin,(fftwin-ffthop))); 18 | 19 | [nr,nc] = size(D); 20 | 21 | A0 = 27.5; % Hz 22 | A440 = 440; % Hz 23 | 24 | f_ctr_log = log(f_ctr/A0) / log(2); 25 | 26 | CM = fft2chromamx(fftlen, nbin, sr, A440, f_ctr_log, f_sd); 27 | % Chop extra dims 28 | CM = CM(:,1:(fftlen/2)+1); 29 | 30 | % Keep only local maxes in freq 31 | Dm = (D > D([1,[1:nr-1]],:)) & (D >= D([[2:nr],nr],:)); 32 | Y = CM*(D.*Dm); 33 | -------------------------------------------------------------------------------- /chrombeatftrs.asv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/chrombeatftrs.asv -------------------------------------------------------------------------------- /chrombeatftrs.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/chrombeatftrs.m -------------------------------------------------------------------------------- /chromenhance.m: -------------------------------------------------------------------------------- 1 | function [ chromhance_mar ] = chromenhance(chroma_mar, debug) 2 | %CHROMENHANCE enhance the chroma feature 3 | 4 | if nargin < 2 5 | debug = 0; 6 | end 7 | 8 | [m,n] = size(chroma_mar); 9 | chromhance_mar = zeros(m,n); 10 | for i = 1:length(chroma_mar) 11 | for j = 1:i 12 | dirmean = matcentre(chroma_mar, i, j); 13 | if min(dirmean) == dirmean(1) || min(dirmean) == dirmean(2) 14 | chromhance_mar(i,j) = chroma_mar(i,j)+min(dirmean); 15 | else 16 | chromhance_mar(i,j) = chroma_mar(i,j)+max(dirmean); 17 | end 18 | end 19 | end 20 | 21 | if debug ~= 0; 22 | figure; imshow(mat2gray(chromhance_mar)); title('chroma SDM - after enhancement'); 23 | end 24 | 25 | end 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /delete0.asv: -------------------------------------------------------------------------------- 1 | function [ftr] = delete0(ftr) 2 | %DELETE0 delete the 0 vector at the end of matrix ftr 3 | 4 | [m, n] = size(ftr); 5 | i = n; 6 | while norm(ftr(:, i)) == 0 7 | i = i-1; 8 | end 9 | 10 | for j = 0:n-i-1 11 | ftr(:, n-j) = []; 12 | end 13 | 14 | end 15 | 16 | -------------------------------------------------------------------------------- /delete0.m: -------------------------------------------------------------------------------- 1 | function [ftr] = delete0(ftr) 2 | %DELETE0 delete the 0 vector at the end of matrix ftr 3 | 4 | [m, n] = size(ftr); 5 | i = n; 6 | while norm(ftr(:, i)) == 0 7 | i = i-1; 8 | end 9 | 10 | for j = 0:n-i-1 11 | ftr(:, n-j) = []; 12 | end 13 | 14 | end 15 | 16 | -------------------------------------------------------------------------------- /distsc.m: -------------------------------------------------------------------------------- 1 | function score = distsc(avedis, sdm, seggroup, ind) 2 | %DISTSC Average distance score 3 | 4 | med = median(diag(sdm(seggroup(ind,1):seggroup(ind,3),seggroup(ind,2):seggroup(ind,4)))); 5 | score = 1-med/avedis; 6 | 7 | end 8 | 9 | -------------------------------------------------------------------------------- /fft2melmx.m: -------------------------------------------------------------------------------- 1 | function [wts,binfrqs] = fft2melmx(nfft, sr, nfilts, width, minfrq, maxfrq, htkmel, constamp) 2 | % wts = fft2melmx(nfft, sr, nfilts, width, minfrq, maxfrq, htkmel, constamp) 3 | % Generate a matrix of weights to combine FFT bins into Mel 4 | % bins. nfft defines the source FFT size at sampling rate sr. 5 | % Optional nfilts specifies the number of output bands required 6 | % (else one per bark), and width is the constant width of each 7 | % band relative to standard Mel (default 1). 8 | % While wts has nfft columns, the second half are all zero. 9 | % Hence, Mel spectrum is fft2melmx(nfft,sr)*abs(fft(xincols,nfft)); 10 | % minfrq is the frequency (in Hz) of the lowest band edge; 11 | % default is 0, but 133.33 is a common standard (to skip LF). 12 | % maxfrq is frequency in Hz of upper edge; default sr/2. 13 | % You can exactly duplicate the mel matrix in Slaney's mfcc.m 14 | % as fft2melmx(512, 8000, 40, 1, 133.33, 6855.5, 0); 15 | % htkmel=1 means use HTK's version of the mel curve, not Slaney's. 16 | % constamp=1 means make integration windows peak at 1, not sum to 1. 17 | % 2004-09-05 dpwe@ee.columbia.edu based on fft2barkmx 18 | 19 | if nargin < 2; sr = 8000; end 20 | if nargin < 3; nfilts = 40; end 21 | if nargin < 4; width = 1.0; end 22 | if nargin < 5; minfrq = 0; end % default bottom edge at 0 23 | if nargin < 6; maxfrq = sr/2; end % default top edge at nyquist 24 | if nargin < 7; htkmel = 0; end 25 | if nargin < 8; constamp = 0; end 26 | 27 | 28 | wts = zeros(nfilts, nfft); 29 | 30 | % Center freqs of each FFT bin 31 | fftfrqs = [0:(nfft/2)]/nfft*sr; 32 | 33 | % 'Center freqs' of mel bands - uniformly spaced between limits 34 | minmel = hz2mel(minfrq, htkmel); 35 | maxmel = hz2mel(maxfrq, htkmel); 36 | binfrqs = mel2hz(minmel+[0:(nfilts+1)]/(nfilts+1)*(maxmel-minmel), htkmel); 37 | 38 | binbin = round(binfrqs/sr*(nfft-1)); 39 | 40 | for i = 1:nfilts 41 | % fs = mel2hz(i + [-1 0 1], htkmel); 42 | fs = binfrqs(i+[0 1 2]); 43 | % scale by width 44 | fs = fs(2)+width*(fs - fs(2)); 45 | % lower and upper slopes for all bins 46 | loslope = (fftfrqs - fs(1))/(fs(2) - fs(1)); 47 | hislope = (fs(3) - fftfrqs)/(fs(3) - fs(2)); 48 | % .. then intersect them with each other and zero 49 | % wts(i,:) = 2/(fs(3)-fs(1))*max(0,min(loslope, hislope)); 50 | wts(i,1+[0:(nfft/2)]) = max(0,min(loslope, hislope)); 51 | 52 | % actual algo and weighting in feacalc (more or less) 53 | % wts(i,:) = 0; 54 | % ww = binbin(i+2)-binbin(i); 55 | % usl = binbin(i+1)-binbin(i); 56 | % wts(i,1+binbin(i)+[1:usl]) = 2/ww * [1:usl]/usl; 57 | % dsl = binbin(i+2)-binbin(i+1); 58 | % wts(i,1+binbin(i+1)+[1:(dsl-1)]) = 2/ww * [(dsl-1):-1:1]/dsl; 59 | % need to disable weighting below if you use this one 60 | 61 | end 62 | 63 | if (constamp == 0) 64 | % Slaney-style mel is scaled to be approx constant E per channel 65 | wts = diag(2./(binfrqs(2+[1:nfilts])-binfrqs(1:nfilts)))*wts; 66 | end 67 | 68 | % Make sure 2nd half of FFT is zero 69 | wts(:,(nfft/2+1):nfft) = 0; 70 | % seems like a good idea to avoid aliasing 71 | 72 | 73 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 74 | function f = mel2hz(z, htk) 75 | % f = mel2hz(z, htk) 76 | % Convert 'mel scale' frequencies into Hz 77 | % Optional htk = 1 means use the HTK formula 78 | % else use the formula from Slaney's mfcc.m 79 | % 2005-04-19 dpwe@ee.columbia.edu 80 | 81 | if nargin < 2 82 | htk = 0; 83 | end 84 | 85 | if htk == 1 86 | f = 700*(10.^(z/2595)-1); 87 | else 88 | 89 | f_0 = 0; % 133.33333; 90 | f_sp = 200/3; % 66.66667; 91 | brkfrq = 1000; 92 | brkpt = (brkfrq - f_0)/f_sp; % starting mel value for log region 93 | logstep = exp(log(6.4)/27); % the magic 1.0711703 which is the ratio needed to get from 1000 Hz to 6400 Hz in 27 steps, and is *almost* the ratio between 1000 Hz and the preceding linear filter center at 933.33333 Hz (actually 1000/933.33333 = 1.07142857142857 and exp(log(6.4)/27) = 1.07117028749447) 94 | 95 | linpts = (z < brkpt); 96 | 97 | f = 0*z; 98 | 99 | % fill in parts separately 100 | f(linpts) = f_0 + f_sp*z(linpts); 101 | f(~linpts) = brkfrq*exp(log(logstep)*(z(~linpts)-brkpt)); 102 | 103 | end 104 | 105 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 106 | function z = hz2mel(f,htk) 107 | % z = hz2mel(f,htk) 108 | % Convert frequencies f (in Hz) to mel 'scale'. 109 | % Optional htk = 1 uses the mel axis defined in the HTKBook 110 | % otherwise use Slaney's formula 111 | % 2005-04-19 dpwe@ee.columbia.edu 112 | 113 | if nargin < 2 114 | htk = 0; 115 | end 116 | 117 | if htk == 1 118 | z = 2595 * log10(1+f/700); 119 | else 120 | % Mel fn to match Slaney's Auditory Toolbox mfcc.m 121 | 122 | f_0 = 0; % 133.33333; 123 | f_sp = 200/3; % 66.66667; 124 | brkfrq = 1000; 125 | brkpt = (brkfrq - f_0)/f_sp; % starting mel value for log region 126 | logstep = exp(log(6.4)/27); % the magic 1.0711703 which is the ratio needed to get from 1000 Hz to 6400 Hz in 27 steps, and is *almost* the ratio between 1000 Hz and the preceding linear filter center at 933.33333 Hz (actually 1000/933.33333 = 1.07142857142857 and exp(log(6.4)/27) = 1.07117028749447) 127 | 128 | linpts = (f < brkfrq); 129 | 130 | z = 0*f; 131 | 132 | % fill in parts separately 133 | z(linpts) = (f(linpts) - f_0)/f_sp; 134 | z(~linpts) = brkpt+(log(f(~linpts)/brkfrq))./log(logstep); 135 | 136 | end 137 | -------------------------------------------------------------------------------- /fftOneSide.m: -------------------------------------------------------------------------------- 1 | function [magSpec, phaseSpec, freq, powerSpecInDb]=fftOneSide(signal, fs, plotOpt) 2 | % fftOneSide: One-sided FFT for real signals 3 | % Usage: [magSpec, phaseSpec, freq, powerSpecInDb]=fftOneSide(signal, fs, plotOpt) 4 | % 5 | % For example: 6 | % [y, fs]=wavread('welcome.wav'); 7 | % frameSize=512; 8 | % startIndex=2047; 9 | % signal=y(startIndex:startIndex+frameSize+1); 10 | % signal=signal.*hamming(length(signal)); 11 | % plotOpt=1; 12 | % [magSpec, phaseSpec, freq, powerSpecInDb]=fftOneSide(signal, fs, plotOpt); 13 | 14 | % Roger Jang, 20060411, 20070506 15 | 16 | if nargin<1, selfdemo; return; end 17 | if nargin<2, fs=1; end 18 | if nargin<3, plotOpt=0; end 19 | 20 | N = length(signal); % Signal length 21 | freqStep = fs/N; % Frequency resolution 22 | time = (0:N-1)/fs; % Time vector 23 | z = fft(signal); % Spectrum 24 | freq = freqStep*(0:N/2)'; % Frequency vector 25 | z = z(1:length(freq)); % One side 26 | z(2:end-1)=2*z(2:end-1); % Assuming N is even, symmetric data is multiplied by 2 27 | magSpec=abs(z); % Magnitude spectrum 28 | phaseSpec=unwrap(angle(z)); % Phase spectrum 29 | powerSpecInDb=20*log(magSpec+realmin); % Power in db 30 | 31 | if plotOpt 32 | % ====== Plot time-domain signals 33 | subplot(3,1,1); 34 | plot(time, signal, '.-'); 35 | title(sprintf('Input signals (fs=%d)', fs)); 36 | xlabel('Time (seconds)'); ylabel('Amplitude'); axis tight 37 | % ====== Plot spectral power 38 | subplot(3,1,2); 39 | plot(freq, powerSpecInDb, '.-'); grid on 40 | title('Power spectrum'); 41 | xlabel('Frequency (Hz)'); ylabel('Power (db)'); axis tight 42 | % ====== Plot phase 43 | subplot(3,1,3); 44 | plot(freq, phaseSpec, '.-'); grid on 45 | title('Phase'); 46 | xlabel('Frequency (Hz)'); ylabel('Phase (Radian)'); axis tight 47 | end 48 | 49 | % ====== Self demo 50 | function selfdemo 51 | [y, fs]=wavread('welcome.wav'); 52 | frameSize=512; 53 | startIndex=2047; 54 | signal=y(startIndex:startIndex+frameSize+1); 55 | signal=signal.*hamming(length(signal)); 56 | %signal=[signal; zeros(frameSize, 1)]; 57 | [magSpec, phaseSpec, freq, powerSpecInDb]=feval(mfilename, signal, fs, 1); -------------------------------------------------------------------------------- /hz2octs.m: -------------------------------------------------------------------------------- 1 | function octs = hz2octs(freq, A440) 2 | % octs = hz2octs(freq, A440) 3 | % Convert a frequency in Hz into a real number counting 4 | % the octaves above A0. So hz2octs(440) = 4.0 5 | % Optional A440 specifies the Hz to be treated as middle A (default 440). 6 | % 2006-06-29 dpwe@ee.columbia.edu for fft2chromamx 7 | 8 | if nargin < 2; A440 = 440; end 9 | 10 | % A4 = A440 = 440 Hz, so A0 = 440/16 Hz 11 | octs = log(freq./(A440/16))./log(2); 12 | 13 | -------------------------------------------------------------------------------- /ifgram.m: -------------------------------------------------------------------------------- 1 | function [F,D] = ifgram(X, N, W, H, SR) 2 | % [F,D] = ifgram(X, N, W, H, SR) Instantaneous frequency by phase deriv. 3 | % X is a 1-D signal. Process with N-point FFTs applying a W-point 4 | % window, stepping by H points; return (N/2)+1 channels with the 5 | % instantaneous frequency (as a proportion of the sampling rate) 6 | % obtained as the time-derivative of the phase of the complex spectrum 7 | % as described by Toshihiko Abe, Takao Kobayashi, and Satoshi Imai 8 | % "Robust Pitch Estimation with Harmonics Enhancement in Noisy 9 | % Environments Based on Instantaneous Frequency" ICSLP 1996 10 | % http://www.kbys.ip.titech.ac.jp/research/pdf/icslp96-pitch.pdf 11 | % See also Abe's 2006 IEEE TASLP paper 14(4) 1292-1300. 12 | % 13 | % Same arguments and some common code as dpwebox/stft.m. 14 | % Calculates regular STFT as side effect - returned in D. 15 | % after 1998may02 dpwe@icsi.berkeley.edu 16 | % 2001-03-05 dpwe@ee.columbia.edu revised version 17 | % 2001-12-13 dpwe@ee.columbia.edu Fixed to work when N != W 18 | % $Header: $ 19 | 20 | % Copyright (c) 2006 Columbia University. 21 | % 22 | % This file is part of LabROSA-coversongID 23 | % 24 | % LabROSA-coversongID is free software; you can redistribute it and/or modify 25 | % it under the terms of the GNU General Public License version 2 as 26 | % published by the Free Software Foundation. 27 | % 28 | % LabROSA-coversongID is distributed in the hope that it will be useful, but 29 | % WITHOUT ANY WARRANTY; without even the implied warranty of 30 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 31 | % General Public License for more details. 32 | % 33 | % You should have received a copy of the GNU General Public License 34 | % along with LabROSA-coversongID; if not, write to the Free Software 35 | % Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 36 | % 02110-1301 USA 37 | % 38 | % See the file "COPYING" for the text of the license. 39 | 40 | if nargin < 2; N = 256; end 41 | if nargin < 3; W = N; end 42 | if nargin < 4; H = W/2; end 43 | if nargin < 5; SR = 1; end 44 | 45 | s = length(X); 46 | % Make sure it's a single row 47 | if size(X,1) > 1 48 | X = X'; 49 | end 50 | 51 | %win = [0,hanning(W-1)']; 52 | win = 0.5*(1-cos([0:(W-1)]/W*2*pi)); 53 | 54 | % Window for discrete differentiation 55 | T = W/SR; 56 | dwin = -pi / T * sin([0:(W-1)]/W*2*pi); 57 | 58 | % sum(win) takes out integration due to window, 2 compensates for neg frq 59 | norm = 2/sum(win); 60 | 61 | % How many complete windows? 62 | nhops = 1 + floor((s - W)/H); 63 | 64 | F = zeros(1 + N/2, nhops); 65 | D = zeros(1 + N/2, nhops); 66 | 67 | nmw1 = floor( (N-W)/2 ); 68 | nmw2 = N-W - nmw1; 69 | 70 | ww = 2*pi*[0:(N-1)]*SR/N; 71 | 72 | for h = 1:nhops 73 | u = X((h-1)*H + [1:W]); 74 | % if(h==0) 75 | % plot(u) 76 | % end 77 | % Apply windows now, while the length is right 78 | wu = win.*u; 79 | du = dwin.*u; 80 | 81 | % Pad or truncate samples if N != W 82 | if N > W 83 | wu = [zeros(1,nmw1),wu,zeros(1,nmw2)]; 84 | du = [zeros(1,nmw1),du,zeros(1,nmw2)]; 85 | end 86 | if N < W 87 | wu = wu(-nmw1+[1:N]); 88 | du = du(-nmw1+[1:N]); 89 | end 90 | % FFTs of straight samples plus differential-weighted ones 91 | t1 = fft(fftshift(du)); 92 | t2 = fft(fftshift(wu)); 93 | % Scale down to factor out length & window effects 94 | D(:,h) = t2(1:(1 + N/2))'*norm; 95 | 96 | % Calculate instantaneous frequency from phase of differential spectrum 97 | t = t1 + j*(ww.*t2); 98 | a = real(t2); 99 | b = imag(t2); 100 | da = real(t); 101 | db = imag(t); 102 | instf = (1/(2*pi))*(a.*db - b.*da)./((a.*a + b.*b)+(abs(t2)==0)); 103 | % 1/2pi converts rad/s into cycles/s 104 | % sampling rate already factored in as constant in dwin & ww 105 | % so result is in Hz 106 | 107 | F(:,h) = instf(1:(1 + N/2))'; 108 | 109 | end; 110 | 111 | -------------------------------------------------------------------------------- /ifptrack.m: -------------------------------------------------------------------------------- 1 | function [p,m,S] = ifptrack(d,w,sr,fminl,fminu,fmaxl,fmaxu) 2 | % [p,m,S] = ifptrack(d,w,sr,fminl,fminu,fmaxl,fmaxu) 3 | % Pitch track based on inst freq. 4 | % Look for adjacent bins with same inst freq. 5 | % d is the input waveform. sr is its sample rate 6 | % w is the basic STFT DFT length (window is half, hop is 1/4) 7 | % S returns the underlying complex STFT. 8 | % fmin,fmax define ramps at edge of sensitivity 9 | % 2006-05-03 dpwe@ee.columbia.edu 10 | 11 | % Copyright (c) 2006 Columbia University. 12 | % 13 | % This file is part of LabROSA-coversongID 14 | % 15 | % LabROSA-coversongID is free software; you can redistribute it and/or modify 16 | % it under the terms of the GNU General Public License version 2 as 17 | % published by the Free Software Foundation. 18 | % 19 | % LabROSA-coversongID is distributed in the hope that it will be useful, but 20 | % WITHOUT ANY WARRANTY; without even the implied warranty of 21 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | % General Public License for more details. 23 | % 24 | % You should have received a copy of the GNU General Public License 25 | % along with LabROSA-coversongID; if not, write to the Free Software 26 | % Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | % 02110-1301 USA 28 | % 29 | % See the file "COPYING" for the text of the license. 30 | 31 | % downweight fundamentals below here 32 | if nargin < 4; fminl = 150; end 33 | if nargin < 5; fminu = 300; end 34 | % highest frequency we look to 35 | if nargin < 6; fmaxl = 2000; end 36 | if nargin < 7; fmaxu = 4000; end 37 | 38 | 39 | % Calculate the inst freq gram 40 | [I,S] = ifgram(d,w,w/2,w/4,sr); 41 | 42 | % Only look at bins up to 2 kHz 43 | maxbin = round(fmaxu * (w/sr) ); 44 | %maxbin = size(I,1) 45 | minbin = round(fminl * (w/sr) ); 46 | 47 | % Find plateaus in ifgram - stretches where delta IF is < thr 48 | ddif = [I(2:maxbin, :);I(maxbin,:)] - [I(1,:);I(1:(maxbin-1),:)]; 49 | 50 | % expected increment per bin = sr/w, threshold at 3/4 that 51 | dgood = abs(ddif) < .75*sr/w; 52 | 53 | % delete any single bins (both above and below are zero); 54 | dgood = dgood .* ([dgood(2:maxbin,:);dgood(maxbin,:)] > 0 | [dgood(1,:);dgood(1:(maxbin-1),:)] > 0); 55 | 56 | % check it out 57 | %p = dgood; 58 | 59 | % reconstruct just pitchy cells? 60 | %r = istft(p.*S,w,w/2,w/4); 61 | 62 | p = 0*dgood; 63 | m = 0*dgood; 64 | 65 | % For each frame, extract all harmonic freqs & magnitudes 66 | for t = 1:size(I,2) 67 | ds = dgood(:,t)'; 68 | lds = length(ds); 69 | % find nonzero regions in this vector 70 | st = find(([0,ds(1:(lds-1))]==0) & (ds > 0)); 71 | en = find((ds > 0) & ([ds(2:lds),0] == 0)); 72 | npks = length(st); 73 | frqs = zeros(1,npks); 74 | mags = zeros(1,npks); 75 | for i = 1:length(st) 76 | bump = abs(S(st(i):en(i),t)); 77 | frqs(i) = (bump'*I(st(i):en(i),t))/(sum(bump)+(sum(bump)==0)); 78 | mags(i) = sum(bump); 79 | if frqs(i) > fmaxu 80 | mags(i) = 0; 81 | frqs(i) = 0; 82 | elseif frqs(i) > fmaxl 83 | mags(i) = mags(i) * max(0, (fmaxu - frqs(i))/(fmaxu-fmaxl)); 84 | end 85 | % downweight magnitudes below? 200 Hz 86 | if frqs(i) < fminl 87 | mags(i) = 0; 88 | frqs(i) = 0; 89 | elseif frqs(i) < fminu 90 | % 1 octave fade-out 91 | mags(i) = mags(i) * (frqs(i) - fminl)/(fminu-fminl); 92 | end 93 | if frqs(i) < 0 94 | mags(i) = 0; 95 | frqs(i) = 0; 96 | end 97 | 98 | end 99 | 100 | % then just keep the largest at each frame (for now) 101 | % [v,ix] = max(mags); 102 | % p(t) = frqs(ix); 103 | % m(t) = mags(ix); 104 | % No, keep them all 105 | %bin = st; 106 | bin = round((st+en)/2); 107 | p(bin,t) = frqs; 108 | m(bin,t) = mags; 109 | end 110 | 111 | %% Pull out the max in each column 112 | %[mm,ix] = max(m); 113 | %% idiom to retrieve different element from each column 114 | %[nr,nc] = size(p); 115 | %pp = p((nr*[0:(nc-1)])+ix); 116 | %mm = m((nr*[0:(nc-1)])+ix); 117 | % r = synthtrax(pp,mm,sr,w/4); 118 | 119 | %p = pp; 120 | %m = mm; 121 | 122 | -------------------------------------------------------------------------------- /isenhan.m: -------------------------------------------------------------------------------- 1 | function [enhanflg] = isenhan( kernel ) 2 | %ISENHAN Determine whether kernel should be enhanced 3 | % Detailed explanation goes here 4 | 5 | len = length(kernel); 6 | count = 0; 7 | 8 | for i = 1:len 9 | if kernel(i) == 1 10 | count = count+1; 11 | end 12 | end 13 | 14 | if count/len >= 0.65 && (kernel(len-1) == 1 || kernel(len) == 1) 15 | enhanflg = 1; 16 | else 17 | enhanflg = 0; 18 | end 19 | end 20 | 21 | -------------------------------------------------------------------------------- /lmin.m: -------------------------------------------------------------------------------- 1 | function [lmval,indd]=lmin(xx,filt) 2 | %LMIN function [lmval,indd]=lmin(x,filt) 3 | % Find local minima in vector X, where LMVAL is the output 4 | % vector with minima values, INDD is the corresponding indeces 5 | % FILT is the number of passes of the small running average filter 6 | % in order to get rid of small peaks. Default value FILT =0 (no 7 | % filtering). FILT in the range from 1 to 3 is usially sufficient to 8 | % remove most of a small peaks 9 | % Examples: 10 | % xx=0:0.01:35; y=sin(xx) + cos(xx ./3); 11 | % plot(xx,y); grid; hold on; 12 | % [a b]=lmin(y,2) 13 | % plot(xx(a),y(a),'r+') 14 | % see also LMAX, MAX, MIN 15 | 16 | % 17 | %**************************************************| 18 | % Serge Koptenko, Guigne International Ltd., | 19 | % phone (709)895-3819, fax (709)895-3822 | 20 | %--------------06/03/97----------------------------| 21 | 22 | x=xx; 23 | len_x = length(x); 24 | fltr=[1 1 1]/3; 25 | if nargin <2, filt=0; 26 | else 27 | x1=x(1); x2=x(len_x); 28 | 29 | for jj=1:filt, 30 | c=conv(fltr,x); 31 | x=c(2:len_x+1); 32 | x(1)=x1; 33 | x(len_x)=x2; 34 | end 35 | end 36 | 37 | lmval=[]; 38 | indd=[]; 39 | i=2; % start at second data point in time series 40 | 41 | while i < len_x-1, 42 | if x(i) < x(i-1) 43 | if x(i) < x(i+1) % definite min 44 | lmval =[lmval x(i)]; 45 | indd = [ indd i]; 46 | 47 | elseif x(i)==x(i+1)&x(i)==x(i+2) % 'long' flat spot 48 | %lmval =[lmval x(i)]; %1 comment these two lines for strict case 49 | %indd = [ indd i]; %2 when only definite min included 50 | i = i + 2; % skip 2 points 51 | 52 | elseif x(i)==x(i+1) % 'short' flat spot 53 | %lmval =[lmval x(i)]; %1 comment these two lines for strict case 54 | %indd = [ indd i]; %2 when only definite min included 55 | i = i + 1; % skip one point 56 | end 57 | end 58 | i = i + 1; 59 | end 60 | 61 | if filt>0 & ~isempty(indd), 62 | if (indd(1)<= 3)|(indd(length(indd))+2>length(xx)), 63 | rng=1; %check if index too close to the edge 64 | else rng=2; 65 | end 66 | 67 | for ii=1:length(indd), 68 | [val(ii) iind(ii)] = min(xx(indd(ii) -rng:indd(ii) +rng)); 69 | iind(ii)=indd(ii) + iind(ii) -rng-1; 70 | end 71 | indd=iind; lmval=val; 72 | else 73 | end 74 | 75 | -------------------------------------------------------------------------------- /localmax.m: -------------------------------------------------------------------------------- 1 | function m = localmax(x) 2 | % return 1 where there are local maxima in x (columnwise). 3 | % don't include first point, maybe last point 4 | 5 | [nr,nc] = size(x); 6 | 7 | if nr == 1 8 | lx = nc; 9 | elseif nc == 1 10 | lx = nr; 11 | x = x'; 12 | else 13 | lx = nr; 14 | end 15 | 16 | if (nr == 1) || (nc == 1) 17 | 18 | m = (x > [x(1),x(1:(lx-1))]) & (x >= [x(2:lx),1+x(lx)]); 19 | 20 | if nc == 1 21 | % retranspose 22 | m = m'; 23 | end 24 | 25 | else 26 | % matrix 27 | lx = nr; 28 | m = (x > [x(1,:);x(1:(lx-1),:)]) & (x >= [x(2:lx,:);1+x(lx,:)]); 29 | 30 | end 31 | -------------------------------------------------------------------------------- /locseg.asv: -------------------------------------------------------------------------------- 1 | function [chorus, seggroup, scoretab] = locseg(bimar, index, bts, sdmar, mono, fs, debug) 2 | %LOCSEG Locate interesting segmengs(which is likely to contain the 3 | %chorus). A heuristic scoring method is adoptted to find the most likely 4 | %segment. 5 | % bimar - binarized matrix 6 | % index - index for the diagonals 7 | % bts - beat for measuring time 8 | % debug - 0 for nothing, 1 for remove close segments, 2 for adding score 6, 9 | % 3 for adding score 3, 4 for add both score 3 and score 6 10 | 11 | if nargin < 7 12 | debug = 0; 13 | end 14 | 15 | chorus = zeros(1,4); 16 | %find all the segments longer than 4s 17 | count = 0; 18 | segflg = 0; 19 | for i = 1:length(index) 20 | temp = diag(bimar, -index(i)); 21 | for j = 1:length(temp) 22 | %the beginning of one segment 23 | if temp(j) == 1 && segflg == 0 24 | chorus(1) = index(i)+j; 25 | chorus(2) = j; 26 | segflg = 1; 27 | continue; 28 | end 29 | %the end of one segment 30 | if temp(j) == 0 && segflg == 1 31 | chorus(3) = index(i)+j; 32 | chorus(4) = j; 33 | %determine whether this segment is longer than 4s 34 | if bts(chorus(3))-bts(chorus(1)) >= 4 && bts(chorus(4))-bts(chorus(2)) >= 4 35 | if count == 0 36 | seggroup = chorus; 37 | else 38 | seggroup = [seggroup;chorus]; 39 | end 40 | count = count+1; 41 | end 42 | segflg = 0; 43 | continue; 44 | end 45 | end 46 | end 47 | 48 | if debug == 1 49 | %for each diagonal segment found in the binarized matrix, the method 50 | %looks for diagonal segments which are located close to it. 51 | clostab = zeros(count, count+2); 52 | for i = 1:count 53 | closrec = 3; 54 | for j = 1:count 55 | if i == j 56 | continue; 57 | end 58 | if seggroup(j,1)>=seggroup(i,1)-5 && seggroup(j,3)<=seggroup(i,3)+20 && abs(seggroup(j,2)-seggroup(i,2))<=20 && seggroup(j,4)<=seggroup(i,4)+5 59 | clostab(i,1) = clostab(i,1)+1; 60 | clostab(j,2) = clostab(j,2)+1; 61 | clostab(i,closrec) = j; 62 | closrec = closrec+1; 63 | end 64 | end 65 | end 66 | %Remove the extra segments 67 | %current not considering 68 | end 69 | 70 | %scoring scheme 71 | scoretab = zeros(count,1); 72 | 73 | %prework for 4th score 74 | mono2 = (mono.^2); 75 | aven = mean(mono2); 76 | avedis = mean(mean(sdmar)); 77 | 78 | %prework for 5th score 79 | if debug == 2 || debug == 4 80 | occurnum = zeros(count, 1); 81 | for i = 1:count 82 | for j = 1:count 83 | if j == i 84 | continue; 85 | elseif abs(seggroup(i,2)-seggroup(j,2))<=0.2*abs(seggroup(j,2)-seggroup(j,4)) && abs(seggroup(i,4)-seggroup(j,4))<=0.2*abs(seggroup(j,2)-seggroup(j,4)) 86 | occurnum(i) = occurnum(i)+1; 87 | end 88 | end 89 | end 90 | end 91 | 92 | %prework for 2nd score 93 | if debug == 3 || debug == 4 94 | %find the segment group - 3 segment with one locating under and one 95 | %locating right 96 | groupcount = 0; 97 | group = zeros(1,3); 98 | for i = 1:count 99 | for j = 1:count 100 | if j == i 101 | continue; 102 | elseif seggroup(j,1)>=seggroup(i,3) && ~(seggroup(i,4)<=seggroup(j,2)||seggroup(i,2)>=seggroup(j,4)) 103 | for k = 1:count 104 | if k == i || k == j 105 | continue; 106 | elseif ~(seggroup(j,3)<=seggroup(k,1)||seggroup(j,1)>=seggroup(k,3)) 107 | if groupcount == 0 108 | group = [i,j,k]; 109 | else 110 | group = [group;i,j,k]; 111 | end 112 | end 113 | end 114 | end 115 | end 116 | end 117 | [m,~] = size(group); 118 | sc3 = zeros(m,2); 119 | sc3(:,1) = group(:,2); 120 | for n = 1:m 121 | xb = seggroup(group(n,2),4)-seggroup(group(n,2),2); 122 | xu = seggroup(group(n,1),4)-seggroup(group(n,1),2); 123 | xr = seggroup(group(n,3),4)-seggroup(group(n,3),2); 124 | theta1 = 1-2*abs(seggroup(group(n,1),4)-seggroup(group(n,2),4))/(xb+xu); 125 | if seggroup(group(n,2),2)=seggroup(group(n,1),4) 128 | theta2 = 1-(seggroup(group(n,2),2)-seggroup(group(n,1),4))/xb; 129 | else 130 | theta2 = 1; 131 | end 132 | theta3 = 1-abs(xr-xb)/xb; 133 | theta4 = 1-2*min(abs(seggroup(group(n,2),1)-seggroup(group(n,3),1)),abs(seggroup(group(n,2),3)-seggroup(group(n,3),3)))/(xb+xr); 134 | theta = (theta1+theta2+theta3+theta4)/4; 135 | sc3(n,2) = theta; 136 | end 137 | end 138 | 139 | for i = 1:count 140 | %1st - position score 141 | s1 = 1-abs(seggroup(i,2)+0.5*(seggroup(i,3)-seggroup(i,1))-round(length(bts)/4))/(round(length(bts)/4)); 142 | s2 = 1-abs(seggroup(i,1)+0.5*(seggroup(i,3)-seggroup(i,1))-round(3*length(bts)/4))/(round(length(bts)/4)); 143 | %2nd - relation to other repetitions 144 | if debug == 3 || debug == 4 145 | if isempty(find(sc3(:,1)==i)) 146 | s3 = 0; 147 | else 148 | s3 = max(sc3(find(sc3(:,1)==i),2)); 149 | end 150 | else 151 | s3 = 0; 152 | end 153 | 154 | %3rd - average energy 155 | s4 = avenergy(mono2, aven, fs, bts, seggroup, i); 156 | %4th - average distance 157 | s5 = distsc(avedis, sdmar, seggroup, i); 158 | %5th - number of times the repetition occurs 159 | if debug == 2 || debug == 4 160 | s6 = occurnum(i)/max(occurnum); 161 | else 162 | s6 = 0; 163 | end 164 | fprintf('The %d th segment:\n', i); 165 | fprintf('s1:%.2d, s2:%.2d, s3:%.2d, s4:%.2d, s5:%.2d, s6:%.2d,',s1,s2,s3,s4,s5,s6); 166 | scoretab(i) = 0.5*(s1+s2+s4+s6)+s3+s5; 167 | fprintf('s:%.2d\n',scoretab(i)); 168 | end 169 | 170 | %the segment with the most score be considered for chorus 171 | chorus = seggroup(scoretab == max(scoretab),:); 172 | end 173 | 174 | -------------------------------------------------------------------------------- /locseg.m: -------------------------------------------------------------------------------- 1 | function [chorus, seggroup, scoretab] = locseg(bimar, index, bts, sdmar, mono, fs, debug) 2 | %LOCSEG Locate interesting segmengs(which is likely to contain the 3 | %chorus). A heuristic scoring method is adoptted to find the most likely 4 | %segment. 5 | % bimar - binarized matrix 6 | % index - index for the diagonals 7 | % bts - beat for measuring time 8 | % debug - 0 for nothing, 1 for remove close segments, 2 for adding score 6, 9 | % 3 for adding score 3, 4 for add both score 3 and score 6 10 | 11 | if nargin < 7 12 | debug = 0; 13 | end 14 | 15 | chorus = zeros(1,4); 16 | %find all the segments longer than 4s 17 | count = 0; 18 | segflg = 0; 19 | for i = 1:length(index) 20 | temp = diag(bimar, -index(i)); 21 | for j = 1:length(temp) 22 | %the beginning of one segment 23 | if temp(j) == 1 && segflg == 0 24 | chorus(1) = index(i)+j; 25 | chorus(2) = j; 26 | segflg = 1; 27 | continue; 28 | end 29 | %the end of one segment 30 | if temp(j) == 0 && segflg == 1 31 | chorus(3) = index(i)+j; 32 | chorus(4) = j; 33 | %determine whether this segment is longer than 4s 34 | if bts(chorus(3))-bts(chorus(1)) >= 4 && bts(chorus(4))-bts(chorus(2)) >= 4 35 | if count == 0 36 | seggroup = chorus; 37 | else 38 | seggroup = [seggroup;chorus]; 39 | end 40 | count = count+1; 41 | end 42 | segflg = 0; 43 | continue; 44 | end 45 | end 46 | end 47 | 48 | if debug == 1 49 | %for each diagonal segment found in the binarized matrix, the method 50 | %looks for diagonal segments which are located close to it. 51 | clostab = zeros(count, count+2); 52 | for i = 1:count 53 | closrec = 3; 54 | for j = 1:count 55 | if i == j 56 | continue; 57 | end 58 | if seggroup(j,1)>=seggroup(i,1)-5 && seggroup(j,3)<=seggroup(i,3)+20 && abs(seggroup(j,2)-seggroup(i,2))<=20 && seggroup(j,4)<=seggroup(i,4)+5 59 | clostab(i,1) = clostab(i,1)+1; 60 | clostab(j,2) = clostab(j,2)+1; 61 | clostab(i,closrec) = j; 62 | closrec = closrec+1; 63 | end 64 | end 65 | end 66 | %Remove the extra segments 67 | %current not considering 68 | end 69 | 70 | %scoring scheme 71 | scoretab = zeros(count,1); 72 | 73 | %prework for 4th score 74 | mono2 = (mono.^2); 75 | aven = mean(mono2); 76 | avedis = mean(mean(sdmar)); 77 | 78 | %prework for 5th score 79 | if debug == 2 || debug == 4 80 | occurnum = zeros(count, 1); 81 | for i = 1:count 82 | for j = 1:count 83 | if j == i 84 | continue; 85 | elseif abs(seggroup(i,2)-seggroup(j,2))<=0.2*abs(seggroup(j,2)-seggroup(j,4)) && abs(seggroup(i,4)-seggroup(j,4))<=0.2*abs(seggroup(j,2)-seggroup(j,4)) 86 | occurnum(i) = occurnum(i)+1; 87 | end 88 | end 89 | end 90 | end 91 | 92 | %prework for 2nd score 93 | if debug == 3 || debug == 4 94 | %find the segment group - 3 segment with one locating under and one 95 | %locating right 96 | groupcount = 0; 97 | group = zeros(1,3); 98 | for i = 1:count 99 | for j = 1:count 100 | if j == i 101 | continue; 102 | elseif seggroup(j,1)>=seggroup(i,3) && ~(seggroup(i,4)<=seggroup(j,2)||seggroup(i,2)>=seggroup(j,4)) 103 | for k = 1:count 104 | if k == i || k == j 105 | continue; 106 | elseif ~(seggroup(j,3)<=seggroup(k,1)||seggroup(j,1)>=seggroup(k,3)) 107 | if groupcount == 0 108 | group = [i,j,k]; 109 | else 110 | group = [group;i,j,k]; 111 | end 112 | end 113 | end 114 | end 115 | end 116 | end 117 | [m,~] = size(group); 118 | sc3 = zeros(m,2); 119 | sc3(:,1) = group(:,2); 120 | for n = 1:m 121 | xb = seggroup(group(n,2),4)-seggroup(group(n,2),2); 122 | xu = seggroup(group(n,1),4)-seggroup(group(n,1),2); 123 | xr = seggroup(group(n,3),4)-seggroup(group(n,3),2); 124 | theta1 = 1-2*abs(seggroup(group(n,1),4)-seggroup(group(n,2),4))/(xb+xu); 125 | if seggroup(group(n,2),2)=seggroup(group(n,1),4) 128 | theta2 = 1-(seggroup(group(n,2),2)-seggroup(group(n,1),4))/xb; 129 | else 130 | theta2 = 1; 131 | end 132 | theta3 = 1-abs(xr-xb)/xb; 133 | theta4 = 1-2*min(abs(seggroup(group(n,2),1)-seggroup(group(n,3),1)),abs(seggroup(group(n,2),3)-seggroup(group(n,3),3)))/(xb+xr); 134 | theta = (theta1+theta2+theta3+theta4)/4; 135 | sc3(n,2) = theta; 136 | end 137 | end 138 | 139 | for i = 1:count 140 | %1st - position score 141 | s1 = 1-abs(seggroup(i,2)+0.5*(seggroup(i,3)-seggroup(i,1))-round(length(bts)/4))/(round(length(bts)/4)); 142 | s2 = 1-abs(seggroup(i,1)+0.5*(seggroup(i,3)-seggroup(i,1))-round(3*length(bts)/4))/(round(length(bts)/4)); 143 | %2nd - relation to other repetitions 144 | if debug == 3 || debug == 4 145 | if isempty(find(sc3(:,1)==i)) 146 | s3 = 0; 147 | else 148 | s3 = max(sc3(find(sc3(:,1)==i),2)); 149 | end 150 | else 151 | s3 = 0; 152 | end 153 | 154 | %3rd - average energy 155 | s4 = avenergy(mono2, aven, fs, bts, seggroup, i); 156 | %4th - average distance 157 | s5 = distsc(avedis, sdmar, seggroup, i); 158 | %5th - number of times the repetition occurs 159 | if debug == 2 || debug == 4 160 | s6 = occurnum(i)/max(occurnum); 161 | else 162 | s6 = 0; 163 | end 164 | %fprintf('The %d th segment:\n', i); 165 | %fprintf('s1:%.2d, s2:%.2d, s3:%.2d, s4:%.2d, s5:%.2d, s6:%.2d,',s1,s2,s3,s4,s5,s6); 166 | scoretab(i) = 0.5*(s1+s2+s4+s6)+s3+s5; 167 | %fprintf('s:%.2d\n',scoretab(i)); 168 | end 169 | 170 | %the segment with the most score be considered for chorus 171 | chorus = seggroup(scoretab == max(scoretab),:); 172 | end 173 | 174 | -------------------------------------------------------------------------------- /main.asv: -------------------------------------------------------------------------------- 1 | %output the detection result 2 | fout = fopen('Result.txt', 'w'); 3 | 4 | ctime = chorusdetection('C:\Users\Cheerz\Desktop\07. Viva La Vida.wav'); 5 | 6 | %output the result to result.txt 7 | fprintf(fout, '%s', 8 | for i = 1:4 9 | fprintf(fout, '%f', ctime(i)); 10 | fprintf(fout, '%s', ' '); 11 | end 12 | 13 | -------------------------------------------------------------------------------- /main.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/main.m -------------------------------------------------------------------------------- /matcentre.m: -------------------------------------------------------------------------------- 1 | function [dirmean] = matcentre(chroma_mar, i, j) 2 | %MATCENTRE the intermediate process of chroma enhancement 3 | 4 | kernel = zeros(5,5); 5 | dirmean = zeros(6,1); 6 | len = length(chroma_mar); 7 | 8 | for m = -2:2 9 | for n = -2:2 10 | if i+m<=0 || i+m>len || j+n<=0 || j+n>len 11 | continue; 12 | end 13 | kernel(m+3,n+3) = chroma_mar(i+m,j+n); 14 | end 15 | end 16 | 17 | %Six directional local mean values are calculated along the upper-left, 18 | %lower-right, right, left, upper, and lower dimensions of the kernel 19 | 20 | dirmean(1) = mean([kernel(1,1),kernel(2,2)]); 21 | dirmean(2) = mean([kernel(4,4),kernel(5,5)]); 22 | dirmean(3) = mean([kernel(3,4),kernel(3,5)]); 23 | dirmean(4) = mean([kernel(3,1),kernel(3,2)]); 24 | dirmean(5) = mean([kernel(1,3),kernel(2,3)]); 25 | dirmean(6) = mean([kernel(4,3),kernel(5,3)]); 26 | 27 | end 28 | 29 | -------------------------------------------------------------------------------- /mfccbeatftrs.asv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/mfccbeatftrs.asv -------------------------------------------------------------------------------- /mfccbeatftrs.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/torogmw/MusicSegmentation/a96764e2d2dd4b97d6dc6357a55d165a31ef90cf/mfccbeatftrs.m -------------------------------------------------------------------------------- /octs2hz.m: -------------------------------------------------------------------------------- 1 | function hz = octs2hz(octs,A440) 2 | % hz = octs2hz(octs,A440) 3 | % Convert a real-number octave 4 | % into a frequency in Hzfrequency in Hz into a real number counting 5 | % the octaves above A0. So hz2octs(440) = 4.0. 6 | % Optional A440 specifies the Hz to be treated as middle A (default 440). 7 | % 2006-06-29 dpwe@ee.columbia.edu for fft2chromamx 8 | 9 | if nargin < 2; A440 = 440; end 10 | 11 | % A4 = A440 = 440 Hz, so A0 = 440/16 Hz 12 | 13 | hz = (A440/16).*(2.^octs); 14 | 15 | 16 | -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | a music segmentation algorithm that I proposed and implemented as my undergraduate project. 2 | The basic function is: 3 | 1. a song is loaded to the system, 4 | 2. the system will calculate the chroma(harmonic) and MFCC(timbre) features of the audio input 5 | 3. find the segmentation label by using similarity matrix. 6 | 4. Then output the segmented time information of this song 7 | -------------------------------------------------------------------------------- /sdm.m: -------------------------------------------------------------------------------- 1 | function [sdmar] = sdm(ftr, debug) 2 | %SDM calculate the self-distance matrix of the input vector ftr, sdmar 3 | %returns the results 4 | 5 | if nargin < 2; debug = 0;end 6 | 7 | [~, vecnum] = size(ftr); 8 | sdmar = zeros(vecnum, vecnum); 9 | 10 | for i = 1:vecnum; 11 | for j = 1:vecnum; 12 | sdmar(i,j) = sqrt((sum(ftr(:,i)-ftr(:,j)).^2)); 13 | end 14 | end 15 | 16 | if debug ~= 0; 17 | figure; imshow(mat2gray(sdmar)); 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /tempo.m: -------------------------------------------------------------------------------- 1 | function [t,xcr,D,onsetenv,sgsrate] = tempo(d,sr,tmean,tsd,onsetenv,debug) 2 | % [t,xcr,D,onsetenv,sgsrate] = tempo(d,sr,tmean,tsd,onsetenv,debug) 3 | % Estimate the overall tempo of a track for the MIREX McKinney 4 | % contest. 5 | % d is the input audio at sampling rate sr. tmean is the mode 6 | % for BPM weighting (in bpm) and tsd is its spread (in octaves). 7 | % onsetenv is an already-calculated onset envelope (so d is 8 | % ignored). debug causes a debugging plot. 9 | % Output t(1) is the lower BPM estimate, t(2) is the faster, 10 | % t(3) is the relative weight for t(1) compared to t(2). 11 | % xcr is the windowed autocorrelation from which the BPM peaks were picked. 12 | % D is the mel-freq spectrogram 13 | % onsetenv is the "onset strength waveform", used for beat tracking 14 | % sgsrate is the sampling rate of onsetenv and D. 15 | % 16 | % 2006-08-25 dpwe@ee.columbia.edu 17 | % uses: localmax, fft2melmx 18 | 19 | % Copyright (c) 2006 Columbia University. 20 | % 21 | % This file is part of LabROSA-coversongID 22 | % 23 | % LabROSA-coversongID is free software; you can redistribute it and/or modify 24 | % it under the terms of the GNU General Public License version 2 as 25 | % published by the Free Software Foundation. 26 | % 27 | % LabROSA-coversongID is distributed in the hope that it will be useful, but 28 | % WITHOUT ANY WARRANTY; without even the implied warranty of 29 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 30 | % General Public License for more details. 31 | % 32 | % You should have received a copy of the GNU General Public License 33 | % along with LabROSA-coversongID; if not, write to the Free Software 34 | % Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 35 | % 02110-1301 USA 36 | % 37 | % See the file "COPYING" for the text of the license. 38 | 39 | if nargin < 3; tmean = 120; end 40 | if nargin < 4; tsd = 3.0; end 41 | if nargin < 5; onsetenv = []; end 42 | if nargin < 6; debug = 0; end 43 | 44 | sro = 8000; 45 | % specgram: 256 bin @ 8kHz = 32 ms / 4 ms hop 46 | swin = 256; 47 | shop = 32; 48 | % mel channels 49 | nmel = 40; 50 | % sample rate for specgram frames (granularity for rest of processing) 51 | sgsrate = sro/shop; 52 | % autoco out to 4 s 53 | acmax = round(4*sgsrate); 54 | 55 | D = 0; 56 | 57 | if isempty(onsetenv) 58 | % no onsetenv provided - have to calculate it 59 | 60 | % resample to 8 kHz 61 | if (sr ~= sro) 62 | gg = gcd(sro,sr); 63 | d = resample(d,sro/gg,sr/gg); 64 | sr = sro; 65 | end 66 | 67 | D = specgram(d,swin,sr,swin,swin-shop); 68 | 69 | % Construct db-magnitude-mel-spectrogram 70 | mlmx = fft2melmx(swin,sr,nmel); 71 | D = 20*log10(max(1e-10,mlmx(:,1:(swin/2+1))*abs(D))); 72 | 73 | % Only look at the top 80 dB 74 | D = max(D, max(max(D))-80); 75 | 76 | % The raw onset decision waveform 77 | mm = (mean(max(0,diff(D')'))); 78 | eelen = length(mm); 79 | 80 | % dc-removed mm 81 | onsetenv = filter([1 -1], [1 -.99],mm); 82 | 83 | end % of onsetenv calc block 84 | 85 | % Find rough global period 86 | % Only use the 1st 90 sec to estimate global pd (avoid glitches?) 87 | 88 | maxdur = 90; % sec 89 | maxcol = min(round(maxdur*sgsrate),length(onsetenv)); 90 | 91 | xcr = xcorr(onsetenv(1:maxcol),onsetenv(1:maxcol),acmax); 92 | 93 | % find local max in the global ac 94 | rawxcr = xcr(acmax+1+[0:acmax]); 95 | 96 | % window it around default bpm 97 | xcrwin = exp(-.5*((log((60*sgsrate./([0:acmax]+0.1)/tmean))/log(2)*tsd).^2)); 98 | xcr = rawxcr.*xcrwin; 99 | 100 | xpks = localmax(xcr); 101 | % will not include any peaks in first down slope (before goes below 102 | % zero for the first time) 103 | xpks(1:min(find(xcr<0))) = 0; 104 | % largest local max away from zero 105 | maxpk = max(xcr(xpks)); 106 | 107 | % ?? then period is shortest period with a peak that approaches the max 108 | %maxpkthr = 0.4; 109 | %startpd = -1 + min(find( (xpks.*xcr) > maxpkthr*maxpk ) ); 110 | %startpd = -1 + (find( (xpks.*xcr) > maxpkthr*maxpk ) ); 111 | 112 | % no, just largest peak after windowing 113 | startpd = -1 + find((xpks.*xcr) == max(xpks.*xcr)); 114 | 115 | % ??Choose acceptable peak closest to 120 bpm 116 | %[vv,spix] = min(abs(60./(startpd/sgsrate) - 120)); 117 | %startpd = startpd(spix); 118 | % No, just choose shortest acceptable peak 119 | startpd = startpd(1); 120 | 121 | t = 60/(startpd/sgsrate); 122 | 123 | % Choose best peak out of .33 .5 2 3 x this period 124 | candpds = round([.33 .5 2 3]*startpd); 125 | candpds = candpds(candpds < acmax); 126 | 127 | [vv,xx] = max(xcr(1+candpds)); 128 | 129 | startpd2 = candpds(xx); 130 | vvm = xcr(1+startpd); 131 | pratio = vvm/(vvm+vv); 132 | 133 | t = [60/(startpd/sgsrate) 60/(startpd2/sgsrate) pratio]; 134 | 135 | % ensure results are lowest-first 136 | if t(2) < t(1) 137 | t([1 2]) = t([2 1]); 138 | t(3) = 1-t(3); 139 | end 140 | 141 | if debug > 0 142 | 143 | % Report results and plot weighted autocorrelation with picked peaks 144 | disp(['Global bt pd = ',num2str(t(1)),' @ ',num2str(t(3)),' / ',num2str(t(2)),' bpm']); 145 | 146 | subplot(414) 147 | plot([0:acmax],xcr,'-b', ... 148 | [0:acmax],xcrwin*maxpk,'-r', ... 149 | [startpd startpd], [min(xcr) max(xcr)], '-g', ... 150 | [startpd2 startpd2], [min(xcr) max(xcr)], '-c'); 151 | grid; 152 | 153 | end 154 | 155 | % Read in all the tempo settings 156 | % for i = 1:20; f = fopen(['mirex-beattrack/train/train',num2str(i),'-tempo.txt']); r(i,:) = fscanf(f, '%f\n'); fclose(f); end 157 | -------------------------------------------------------------------------------- /tokenize.m: -------------------------------------------------------------------------------- 1 | function a = tokenize(s,t) 2 | % Break space-separated string into cell array of strings. 3 | % Optional second arg gives alternate separator (default ' ') 4 | % 2004-09-18 dpwe@ee.columbia.edu 5 | if nargin < 2; t = ' '; end 6 | a = []; 7 | p = 1; 8 | n = 1; 9 | l = length(s); --------------------------------------------------------------------------------