├── .DS_Store ├── .gitignore ├── LICENSE.md ├── README ├── SingingMeansCovars.mat ├── alignmentVisualiser.m ├── example.mid ├── example.txt ├── example.wav ├── exampleFixed.txt ├── examplePerformance.mid ├── exampleScript.m ├── fillpriormat_gauss.m ├── filltransmat.m ├── findMids.m ├── findPeaks.m ├── findSteady.m ├── genMeansCovars.m ├── genPolyTrans.m ├── getCentVals.m ├── getLoudnessEstimates.m ├── getOnsOffs.m ├── getPitchVibratoData.m ├── getPitchVibratoDynamicsData.m ├── getTimingData.m ├── getVals.m ├── hzcents.m ├── noteDct.m ├── perceivedPitch.m ├── plotFineAlign.m ├── polyExample.mid ├── polyExample.wav ├── polySingingMeansCovars.mat ├── readme.txt ├── runAlignment.m ├── runDTWAlignment.m ├── runHMMAlignment.m ├── runPolyAlignment.m ├── runPolyAlignment.m~ ├── selectStates.m ├── smoothNote.m └── visualiser.m /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .m~ 2 | .DS_STORE 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2011–2021, Johanna Devaney and Michael Mandel 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any 6 | purpose with or without fee is hereby granted, provided that the above 7 | copyright notice and this permission notice appear in all copies. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Example Usage 2 | - from the included script ensembleScript.m 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % exampleScript.m 5 | % 6 | % Description: 7 | % Example of how to use the HMM alignment algorithm 8 | % 9 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 10 | % http://www.ampact.org 11 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. 12 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 13 | 14 | % audio file to be aligned 15 | audiofile=('example.wav'); 16 | 17 | % MIDI file to be aligned 18 | midifile=('example.mid'); 19 | 20 | % number of notes to align 21 | numNotes=6; 22 | 23 | % vector of order of states (according to lyrics) in stateOrd and 24 | % corresponding note numbers in noteNum 25 | % 1 indicates a rest at the beginning of ending of the note 26 | % 2 indicates a transient at the beginning or ending of the note 27 | % 3 indicates a steady state section 28 | % the following encoding is for six syllables "A-ve Ma-ri-(i)-a" 29 | % syllable A-ve Ma-ri-(i)-a 30 | % state type 13 23 23 23 3 31 31 | % note number 11 22 33 44 5 66 32 | stateOrd = [1 3 2 3 2 3 2 3 3 3 1]; 33 | noteNum = [1 1 2 2 3 3 4 4 5 6 6]; 34 | 35 | % load singing means and covariances for the HMM alignment 36 | load SingingMeansCovars.mat 37 | means=sqrtmeans; 38 | covars=sqrtcovars; 39 | 40 | % specify that the means and covariances in the HMM won't be learned 41 | learnparams=0; 42 | 43 | % run the alignment 44 | [allstate selectstate,spec,yinres]=runAlignment(audiofile, midifile, numNotes, stateOrd, noteNum, means, covars, learnparams); 45 | 46 | % visualise the alignment 47 | alignmentVisualiser(selectstate,midifile,spec,1); 48 | 49 | % get onset and offset times 50 | times=getOnsOffs(selectstate); 51 | 52 | % write the onset and offset times to an audacity-readable file 53 | dlmwrite('example.txt',[times.ons' times.offs'], 'delimiter', '\t'); 54 | 55 | % you can load 'example.txt' into audacity and correct any errors in the 56 | % alignment, i.e., the offset error on the last note, and then reload the 57 | % corrected labels into matlab 58 | fixedLabels=load('exampleFixed.txt'); 59 | times.ons=fixedLabels(:,1)'; 60 | times.offs=fixedLabels(:,2)'; 61 | 62 | % map timing information to the quantized MIDI file 63 | nmatNew=getTimingData(midifile, times); 64 | writemidi(nmatNew,'examplePerformance.mid') 65 | 66 | % get cent values for each note 67 | cents=getCentVals(times,yinres); 68 | 69 | % calculate intervals size, perceived pitch, vibrato rate, and vibrato depth 70 | [vibratoDepth, vibratoRate, intervalSize, perceivedPitch]=getPitchVibratoData(cents,yinres.sr); 71 | 72 | % get loudness values for each note using the Genesis Loudness Toolbox 73 | [loudnessEstimates loudnessStructure]=getLoudnessEstimates(audiofile, times); 74 | 75 | % get DCT values for each note 76 | for i = 1 : length(cents) 77 | 78 | % find the peaks and troughs in the F0 trace for each note 79 | [mins{i} maxes{i}] = findPeaks(cents{i}, 100, yinres.sr/32, 60); 80 | 81 | % find the midpoints between mins and maxes in the F0 trace for each 82 | % note 83 | [x_mids{i} y_mids{i}] = findMids(cents{i}, mins{i}, maxes{i}, 100, yinres.sr/32); 84 | 85 | % generate a smoothed trajectory of a note by connecting the 86 | % midpoints between peaks and troughs. 87 | smoothedF0s{i}=smoothNote(cents{i}, x_mids{i}, y_mids{i}); 88 | 89 | % find the steady-state portion of a note 90 | steady{i}(1:2)=findSteady(cents{i}, mins{i}, maxes{i}, x_mids{i}, y_mids{i}, 1); 91 | 92 | % compute the DCT of a signal and approximate it with the first 3 coefficients 93 | [dctVals{i}, approx{i}]=noteDct(smoothedF0s{i}(steady{i}(1):steady{i}(2)),3,yinres.sr/32); 94 | 95 | end 96 | 97 | ---------------- 98 | 99 | AMPACT Function Descriptions 100 | 101 | runAlignment.m: Calls the DTW alignment function and refines the results with the HMM alignment algorithm, with both a basic and modified state spaces (based on the lyrics). 102 | 103 | getVals.m: Gets values for DTW alignment and YIN analysis of specified audio signal and MIDI file 104 | 105 | runDTWAlignment.m: Performs a dynamic time warping alignment between specified audio and MIDI files. 106 | 107 | runHMMAlignment.m: Refines DTW alignment values with a three-state HMM, identifying silence,transient, and steady state parts of the signal. The HMM uses the DTW alignment as a prior. 108 | 109 | filltransmat.m: Makes a transition matrix from a seed transition matrix. 110 | 111 | fillpriormat_gauss.m: Creates a prior matrix based on the DTW alignment (supplied by the input variables ons and offs). 112 | 113 | selectStates.m: Refines the HMM parameters according to the modified state sequence vector passed into the function. 114 | 115 | alignmentVisualiser.m: Plots a gross DTW alignment overlaid with the fine alignment resulting from the HMM aligner on the output of YIN. 116 | 117 | getTimingData: Create a note matrix with performance timings. 118 | 119 | getCentVals: Get cent values (in relation to A, 440 Hz) for each note. 120 | 121 | getPitchVibratoData: Calculate vibrato depth, vibrato rate, perceived pitch, and interval size for the notes in the inputted cell array cents. 122 | 123 | getLoudnessEstimates: Get loudness estimate based on Glasberg and Moore (2002) for time-varying sounds using the Loudness Toolbox. 124 | 125 | findPeaks: Find peaks and troughs in a signal. 126 | 127 | findMids: Find the midpoints between mins and maxes in a signal. 128 | 129 | smoothNote: Generate a smoothed trajectory of a note by connecting the midpoints between peaks and troughs. 130 | 131 | noteDct: Compute the DCT of a signal and approximate it with a specified number of coefficients. 132 | 133 | ---------------- 134 | 135 | AMPACT Dependencies 136 | 137 | You will need to have the following toolkits installed and in your path 138 | de Cheveigné, A. 2002. YIN MATLAB implementation Available from: http://audition.ens.fr/adc/sw/yin.zip 139 | Ellis, D. P. W. 2003. Dynamic Time Warp (DTW) in Matlab. Available from: http://www.ee.columbia.edu/~dpwe/resources/matlab/dtw/ 140 | Ellis, D. P. W. 2008. Aligning MIDI scores to music audio. Available from: http://www.ee.columbia.edu/~dpwe/resources/matlab/alignmidiwav/ 141 | Genesis Acoustics. 2010. Loudness Toolbox for Matlab. Available from http://www.genesis-acoustics.com/index.php?page=32 142 | Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab. Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html 143 | Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials/miditoolbox/ 144 | 145 | ---------------- 146 | 147 | Papers on algorithms developed for AMPACT 148 | 149 | Devaney, J., M. I. Mandel, and I. Fujinaga. 2011. Characterizing Singing Voice Fundamental Frequency Trajectories. Proceedings of the 2011 Workshop on Applications of Signal Processing to Audio and Acoustics. 150 | Devaney, J., M. I. Mandel, D. P. W. Ellis, and I. Fujinaga. 2010. Automatically extracting performance data from recordings of trained singers. Psychomusicology: Music, Mind & Brain. 21(1–2): in press. 151 | Devaney, J. 2011. An empirical study of the influence of musical context on intonation practices in solo singers and SATB ensembles. Ph. D. Dissertation. McGill University. 152 | Devaney, J., M. I. Mandel, and D. P. W. Ellis. 2009. Improving MIDI-audio alignment with acoutics features. In Proceedings of the 2009 Workshop on Applications of Signal Processing to Audio and Acoustics. 153 | 154 | ---------------- 155 | 156 | Papers on algorithms by other authors used by AMPACT 157 | 158 | de Cheveigné, A., and H. Kawahara. 2002. YIN, a fundamental frequency estimator for speech and music. Journal of the Acoustical Society of America 111 (4): 1917–30. 159 | Orio, N., and D. Schwarz. 2001. Alignment of monophonic and polyphonic music to a score. In Proceedings of the International Computer Music Conference, 155–8. 160 | -------------------------------------------------------------------------------- /SingingMeansCovars.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/SingingMeansCovars.mat -------------------------------------------------------------------------------- /alignmentVisualiser.m: -------------------------------------------------------------------------------- 1 | function alignmentVisualiser(trace,mid,spec,fig) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % alignmentVisualiser(trace,sig,sr,mid,highlight) 5 | % 6 | % Description: 7 | % Plots a gross DTW alignment overlaid with the fine alignment 8 | % resulting from the HMM aligner on the output of YIN. Trace(1,:) 9 | % is the list of states in the HMM, and trace(2,:) is the number of YIN 10 | % frames for which that state is occupied. Highlight is a list of 11 | % notes for which the steady state will be highlighted. 12 | % 13 | % Inputs: 14 | % trace - 3-D matrix of a list of states (trace(1,:)), the times 15 | % they end at (trace(2,:)), and the state indices (trace(3,:)) 16 | % mid - midi file 17 | % spec - spectogram of audio file (from alignmidiwav.m) 18 | % 19 | % Dependencies: 20 | % Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: 21 | % https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials 22 | % /miditoolbox/ 23 | % 24 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 25 | % http://www.ampact.org 26 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 27 | % (mim@mr-pc.org), all rights reserved. 28 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 29 | 30 | if ~exist('fig', 'var'), fig=1; end 31 | 32 | % Fix for ending zeros that mess up the plot 33 | if trace(2,end)==0 34 | trace=trace(:,1:end-1); 35 | end 36 | if trace(2, end-1)==0 37 | trace(2,end-1)=trace(2,end-2); 38 | end 39 | 40 | % hop size between frames 41 | stftHop = 0.025; 42 | 43 | % read midi file 44 | nmat=readmidi(mid); 45 | 46 | % plot spectogram of audio file 47 | figure(fig) 48 | imagesc(20*log10(spec)); 49 | title(['Spectrogram with Aligned MIDI Notes Overlaid']); 50 | xlabel(['Time (.05s)']); 51 | ylabel(['Midinote']); 52 | axis xy; 53 | caxis(max(caxis)+[-50 0]) 54 | colormap(1-gray) 55 | 56 | % zoom in fundamental frequencies 57 | notes = nmat(:,4)'; 58 | notes = (2.^((notes-105)/12))*440; 59 | notes(end+1) = notes(end); 60 | nlim = length(notes); 61 | 62 | % plot alignment 63 | plotFineAlign(trace(1,:), trace(2,:), notes(1:nlim), stftHop); 64 | if size(trace,1) >= 3 65 | notenums = trace(3,2:end); 66 | else 67 | nlim = length(notes); 68 | notenums = [reshape(repmat(1:nlim,4,1),1,[]) nlim]; 69 | end 70 | 71 | 72 | function plotFineAlign(stateType, occupancy, notes, stftHop) 73 | 74 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 75 | % plotFineAlign(stateType, occupancy, notes, stftHop, highlight) 76 | % 77 | % Description: 78 | % Plot the HMM alignment based on the output of YIN. StateType is the 79 | % list of states in the HMM, and occupancy is the number of YIN frames 80 | % for which that state is occupied. Notes is a list of midi note numbers 81 | % that are played, should be one note for each [3] in stateType. If the 82 | % highlight vector is supplied, it should contain indices of the states 83 | % to highlight by plotting an extra line at the bottom of the window. 84 | % 85 | % Inputs: 86 | % stateType - vector with a list of states 87 | % occupancy - vector indicating the time (in seconds) at which the states 88 | % in stateType end 89 | % notes - vector of notes from MIDI file 90 | % stftHop - the hop size between frames in the spectrogram 91 | % 92 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 93 | % http://www.ampact.org 94 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 95 | % (mim@mr-pc.org), all rights reserved. 96 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 97 | 98 | % Plot the 4 states: silence in red, beginning transient in green, 99 | % steady state in blue, ending transient in green. 100 | 101 | styles = {{'r+-', 'LineWidth', 2}, 102 | {'g+-', 'LineWidth', 2}, 103 | {'b+-', 'LineWidth', 2}}; 104 | 105 | cs = occupancy /stftHop; 106 | segments = [cs(1:end-1); cs(2:end)]'; 107 | 108 | hold on 109 | 110 | stateNote = max(1, cumsum(stateType == 3)+1); 111 | for i=1:size(segments,1) 112 | plot(segments(i,:)', repmat(notes(stateNote(i)),2,1), styles{stateType(i+1)}{:}) 113 | end 114 | 115 | hold off 116 | -------------------------------------------------------------------------------- /example.mid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/example.mid -------------------------------------------------------------------------------- /example.txt: -------------------------------------------------------------------------------- 1 | 0.98177 4.3443 2 | 4.4161 4.8849 3 | 4.9328 5.588 4 | 5.6374 7.7751 5 | 7.7751 9.7009 6 | 9.7009 12.936 7 | -------------------------------------------------------------------------------- /example.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/example.wav -------------------------------------------------------------------------------- /exampleFixed.txt: -------------------------------------------------------------------------------- 1 | 0.981770 4.344300 4.416100 4.884900 4.932800 5.588000 5.637400 9.148700 9.148700 9.724100 9.724100 11.732525 -------------------------------------------------------------------------------- /examplePerformance.mid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/examplePerformance.mid -------------------------------------------------------------------------------- /exampleScript.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/exampleScript.m -------------------------------------------------------------------------------- /fillpriormat_gauss.m: -------------------------------------------------------------------------------- 1 | function prior = fillpriormat_gauss(Nobs,ons,offs,Nstates) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % prior = fillpriormat_gauss(Nobs,ons,offs,Nstates) 5 | % 6 | % Description: 7 | % Creates a prior matrix based on the DTW alignment (supplied by the input 8 | % variables ons and offs. A rectangular window with half a Gaussian on 9 | % each side over the onsets and offsets estimated by the DTW alignment. 10 | % 11 | % Inputs: 12 | % Nobs - number of observations 13 | % ons - vector of onset times predicted by DTW alignment 14 | % offs - vector of offset times predicted by DTW alignment 15 | % Nstates - number of states in the hidden Markov model 16 | % 17 | % Outputs: 18 | % prior - prior matrix based on DTW alignment 19 | % 20 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 21 | % http://www.ampact.org - Johanna Devaney, 2011 22 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 23 | % (mim@mr-pc.org), all rights reserved. 24 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 25 | 26 | if ~exist('Nstates', 'var'), Nstates = 5; end 27 | 28 | Nnotes = length(ons); 29 | prior = sparse(Nnotes*(Nstates-1)+1,Nobs); 30 | frames = 1:Nobs; 31 | 32 | for i=1:Nnotes 33 | row = (i-1)*(Nstates-1); 34 | insert = Nstates-5; 35 | 36 | % Silence 37 | prior(row+1,:) = flatTopGaussian(frames, gh(ons,i-1,offs,i-1,frames,.5), ... 38 | g(offs,i-1,frames), g(ons,i,frames), gh(ons,i,offs,i,frames,.5)); 39 | 40 | prior(row+2:row+2+insert-1,:) = repmat(prior(row+1,:),insert,1); 41 | 42 | % Transient, steady state, transient 43 | prior(row+2+insert,:) = ... 44 | flatTopGaussian(frames, g(offs,i-1,frames), ... 45 | gh(offs,i-1,ons,i,frames,.75), gh(ons,i,offs,i,frames,.25), g(offs,i,frames)); 46 | prior(row+3+insert,:) = ... 47 | flatTopGaussian(frames, g(offs,i-1,frames), ... 48 | g(ons,i,frames), g(offs,i,frames), g(ons,i+1,frames)); 49 | prior(row+4+insert,:) = ... 50 | flatTopGaussian(frames, g(ons,i,frames), ... 51 | gh(ons,i,offs,i,frames,.75), gh(offs,i,ons,i+1,frames,.25), g(ons,i+1,frames)); 52 | 53 | end 54 | 55 | % The last silence 56 | i = i+1; 57 | prior(row+5+insert,:) = flatTopGaussIdx(frames, ons,i-1, offs,i-1, ... 58 | offs,i, ons,i+1); 59 | 60 | function x = gh(v1, i1, v2, i2, domain, frac) 61 | 62 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 63 | % x = gh(v1, i1, v2, i2, domain, frac) 64 | % 65 | % Description: 66 | % Get an element that is frac fraction of the way between v1(i1) and 67 | % v2(i2), but check bounds on both vectors. Frac of 0 returns v1(i1), 68 | % frac of 1 returns v2(i2), frac of 1/2 (the default) returns half way 69 | % between them. 70 | % 71 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 72 | % http://www.ampact.org 73 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 74 | % (mim@mr-pc.org), all rights reserved. 75 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 76 | 77 | if ~exist('frac', 'var'), frac = 0.5; end 78 | 79 | x1 = g(v1, i1, domain); 80 | x2 = g(v2, i2, domain); 81 | x = floor(frac*x1 + (1-frac)*x2); 82 | 83 | function w = flatTopGaussIdx(x, b1,bi1, t1,ti1, t2,ti2, b2,bi2) 84 | 85 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 86 | % flatTopGaussIdx(x, b1,bi1, t1,ti1, t2,ti2, b2,bi2) 87 | % 88 | % Description: 89 | % Create a window function that is zeros, going up to 1s with the left 90 | % half of a gaussian, then ones, then going back down to zeros with 91 | % the right half of another gaussian. b1(bi1) is the x coordinate 2 92 | % stddevs out from the mean, which is at t1(ti1). t2(ti2) is the x 93 | % coordinate of the mean of the second gaussian and b2(bi2) is 2 94 | % stddevs out from that. The points should be in that order. Vectors 95 | % are indexed intelligently, so you don't have to worry about 96 | % overflows or underflows. X is the set of points over which this is 97 | % to be calculated. 98 | % 99 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 100 | % http://www.ampact.org 101 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 102 | % (mim@mr-pc.org), all rights reserved. 103 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 104 | 105 | b1 = g(b1, bi1, x); 106 | t1 = g(t1, ti1, x); 107 | t2 = g(t2, ti2, x); 108 | b2 = g(b2, bi2, x); 109 | w = flatTopGaussian(x, b1, t1, t2, b2); 110 | 111 | 112 | 113 | function x = g(vec, idx, domain) 114 | 115 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 116 | % x = g(vec, idx, domain) 117 | % 118 | % Description: 119 | % Get an element from vec, checking bounds. Domain is the set of points 120 | % that vec is a subset of. 121 | % 122 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 123 | % http://www.ampact.org 124 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 125 | % (mim@mr-pc.org), all rights reserved. 126 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 127 | 128 | if idx < 1 129 | x = 1; 130 | elseif idx > length(vec) 131 | x = domain(end); 132 | else 133 | x = vec(idx); 134 | end 135 | 136 | 137 | 138 | function w = flatTopGaussian(x, b1, t1, t2, b2) 139 | 140 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 141 | % flatTopGaussian(x, b1, t1, t2, b2) 142 | % 143 | % Description: 144 | % Create a window function that is zeros, going up to 1s with the left 145 | % half of a gaussian, then ones, then going back down to zeros with the 146 | % right half of another gaussian. b1 is the x coordinate 2 stddevs out 147 | % from the mean, which is at t1. t2 is the x coordinate of the mean of 148 | % the second gaussian and b2 is 2 stddevs out from that. The points 149 | % should be in that order. X is the set of points over which this is 150 | % to be calculated. 151 | % 152 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 153 | % http://www.ampact.org 154 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 155 | % (mim@mr-pc.org), all rights reserved. 156 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 157 | 158 | if any([b1 t1 t2] > [t1 t2 b2]) 159 | warning('Endpoints are not in order: %f %f %f %f', b1, t1, t2, b2) 160 | end 161 | 162 | left = normpdf(x, t1, (t1-b1)/2+1); 163 | middle = ones(1,t2-t1-1); 164 | right = normpdf(x, t2, (b2-t2)/2+1); 165 | 166 | left = left ./ max(left); 167 | right = right ./ max(right); 168 | 169 | takeOneOut = (t1 == t2); 170 | w = [left(1:t1) middle right(t2+takeOneOut:end)]; 171 | -------------------------------------------------------------------------------- /filltransmat.m: -------------------------------------------------------------------------------- 1 | function trans = filltransmat(transseed, notes) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % trans = filltransmat (transseed, notes) 5 | % 6 | % Description: 7 | % Makes a transition matrix from a seed transition matrix. The seed 8 | % matrix is composed of the states: steady state, transient, silence, 9 | % transient, steady state, but the full transition matrix starts and 10 | % ends with silence, so the seed with be chopped up on the ends. 11 | % Notes is the number of times to repeat the seed. Transseed's first 12 | % and last states should be equivalent, as they will be overlapped 13 | % with each other. 14 | % 15 | % Inputs: 16 | % transseed - transition matrix seed 17 | % notes - number of notes being aligned 18 | % 19 | % Outputs: 20 | % trans - transition matrix 21 | % 22 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 23 | % http://www.ampact.org 24 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 25 | % (mim@mr-pc.org), all rights reserved. 26 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 27 | 28 | % Set up transition matrix 29 | N = size(transseed,1); 30 | trans = zeros(notes*(N-1)+1,notes*(N-1)+1); 31 | Non2 = ceil(N/2); 32 | 33 | % Fill in first and last parts of the big matrix with the 34 | % appropriate fragments of the seed 35 | trans(1:Non2, 1:Non2) = transseed(Non2:end, Non2:end); 36 | trans(end-Non2+1:end, end-Non2+1:end) = transseed(1:Non2, 1:Non2); 37 | 38 | % Fill in the middle parts of the big matrix with the whole seed 39 | for i = Non2 : N-1 : (notes-1)*(N-1)+1 - Non2+1 40 | trans(i+(1:N)-1,i+(1:N)-1) = transseed; 41 | end 42 | -------------------------------------------------------------------------------- /findMids.m: -------------------------------------------------------------------------------- 1 | function [x_mids y_mids] = findMids(x, mins, maxes, windowLength_ms, sr) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % mids = findMids(x, mins, maxes, windowLength_ms, sr) 5 | % 6 | % Description: Find the midpoints between mins and maxes in a signal x. 7 | % mins and maxes could come from findPeaks. Finds the y 8 | % values of peaks and then finds the x values of the signal 9 | % that are closest to the average between the min and max 10 | % peak. 11 | % 12 | % Inputs: 13 | % x - inputted signal in cents 14 | % mins - indices of minima of x 15 | % maxes - indices of maxima of x 16 | % windowLength_ms - window length in miliseconds 17 | % sr - sampling rate of x (frame rate of frequency analysis) 18 | % 19 | % Outputs: 20 | % x_mids - midpoint locations in x axis between peaks and troughs 21 | % y_mids - midpoint locations in y axis between peaks and troughs 22 | % 23 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 24 | % http://www.ampact.org 25 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 26 | % (mim@mr-pc.org), all rights reserved 27 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 28 | 29 | % window length in frames 30 | windowLength = round(windowLength_ms * sr / 2000) * 2; 31 | 32 | % sort the peaks 33 | pks = sort([maxes mins]); 34 | 35 | % average the frequency estimate of the points around each peak 36 | for i = 1:length(pks) 37 | idx = max(pks(i)-windowLength/2, 1) : ... 38 | min(pks(i)+windowLength/2, length(x)); 39 | neighborhoods(i) = mean(x(idx)); 40 | end 41 | 42 | % find the mid-points in frequency between peaks 43 | y_mids = (neighborhoods(1:end-1) + neighborhoods(2:end)) / 2; 44 | 45 | % find the index of the point in the signal between each peak with its 46 | % value closest to the mid-point in frequency 47 | for i = 1:length(y_mids) 48 | idx = pks(i):pks(i+1); 49 | [d offset] = min(abs(y_mids(i) - x(idx))); 50 | x_mids(i) = pks(i) + offset - 1; 51 | end -------------------------------------------------------------------------------- /findPeaks.m: -------------------------------------------------------------------------------- 1 | function [mins maxes] = findPeaks(x, windowLength_ms, sr, minCount) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % [mins maxes] = findPeaks(x, windowLength_ms, sr, minCount) 5 | % 6 | % Description: Find peaks and troughs in a waveform 7 | % Finds the max and min in a window of a given size and keeps 8 | % track of how many windows each point is the min or max of. 9 | % Points that are the min or max of more than minCount windows 10 | % are returned. 11 | % 12 | % Inputs: 13 | % x - inputted signal 14 | % windowLength_ms - window length in ms 15 | % sr - sampling rate 16 | % minCount - minimum number of windows that a point needs to be the max 17 | % of to be considered a minimum or a maximum 18 | % 19 | % Outputs: 20 | % mins - minimum values in the signal 21 | % maxes - maximum values in the signal 22 | % 23 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 24 | % http://www.ampact.org 25 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 26 | % (mim@mr-pc.org), all rights reserved 27 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 28 | 29 | % create a vector of zeros for mins and maxes 30 | mins = zeros(size(x)); 31 | maxes = zeros(size(x)); 32 | 33 | % calculate window length in frames 34 | windowLength = windowLength_ms * sr / 1000; 35 | 36 | % calculate the minimum and maximum value 37 | for i = 1:length(x) - windowLength 38 | w = x(i:i+windowLength-1); 39 | [d di] = min(w); 40 | mins(i + di - 1) = mins(i + di - 1) + 1; 41 | [d di] = max(w); 42 | maxes(i + di - 1) = maxes(i + di - 1) + 1; 43 | end 44 | 45 | % pruns mins and maxes to only those that occur in an equal to or greater 46 | % number windows specified in minCount 47 | mins = find(mins >= minCount); 48 | maxes = find(maxes >= minCount); -------------------------------------------------------------------------------- /findSteady.m: -------------------------------------------------------------------------------- 1 | function steady = findSteady(x, mins, maxes, x_mids, y_mids, thresh_cents) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % steady = findSteady(x, mins, maxes, x_mids, y_mids, thresh_cents) 5 | % 6 | % Description: Find the steady-state portion of a note. 7 | % Finds the section of the note with steady vibrato where the 8 | % peaks and troughs are at least thresh_cents cents away from 9 | % the mid points between them. mins and maxes come from 10 | % findPeaks, x_mids and y_mids come from findMids. Steady is 11 | % a range of two indices into f0. mins and maxes may come from 12 | % the findPeaks function and x_mids and y_mids may come from 13 | % the findMids function. 14 | % 15 | % Inputs: 16 | % x - vector of f0 estimates in cents 17 | % mins - indices of minima of x 18 | % maxes - indices of maxima of x 19 | % x_mids - midpoint locations in x axis between peaks and troughs 20 | % y_mids - midpoint locations in y axis between peaks and troughs 21 | % thresh_cents - minimum distance in cents from midpoint for peaks and 22 | % troughs 23 | % 24 | % Outputs: 25 | % steady - steady-state portion of inputted signal x 26 | % 27 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 28 | % http://www.ampact.org 29 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 30 | % (mim@mr-pc.org), all rights reserved 31 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 32 | 33 | % Find extrema that are far enough away from the midpoints 34 | peaks = sort([mins maxes]); 35 | excursion = y_mids - x(peaks(1:end-1)); 36 | bigEnough = abs(excursion) >= thresh_cents; 37 | 38 | % Count how many extrema are big enough in a row 39 | inARow(1) = double(bigEnough(1)); 40 | for i = 2:length(bigEnough) 41 | if bigEnough(i) 42 | inARow(i) = inARow(i-1)+1; 43 | else 44 | inARow(i) = 0; 45 | end 46 | end 47 | 48 | % Extract the portion of the note corresponding to the longest run of big 49 | % enough extrema 50 | [times pos] = max(inARow); 51 | steadyPeaks = peaks([pos-times+1 pos]); 52 | steady = x_mids([find(x_mids > steadyPeaks(1), 1), ... 53 | find(x_mids < steadyPeaks(2), 1, 'last')]); 54 | steady = round(steady); -------------------------------------------------------------------------------- /genMeansCovars.m: -------------------------------------------------------------------------------- 1 | function [meansSeed covarsSeed versions]=genMeansCovars(notes, vals, voiceType) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % [meansSeed covarsSeed versions]=genMeansCovars(notes, vals) 5 | % 6 | % Description: Generate seed means and covariances matrices for specified 7 | % voice type 8 | % 9 | % Inputs: 10 | % notes - cell array of possible sequences 11 | % vals - mean and covariance values 12 | % voiceType - voice type (male or female) 13 | % 14 | % Outputs: 15 | % meansSeed - mean seed matrix 16 | % covarsSeed - covariance seed matrix 17 | % versions - possible sequences of states for the number of voices 18 | % 19 | % Dependencies: 20 | % None 21 | % 22 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 23 | % http://www.ampact.org 24 | % (c) copyright 2014 Johanna Devaney (j@devaney.ca), all rights reserved. 25 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 26 | 27 | % format of vals 28 | % vals{:}{1}{:} - male 29 | % vals{:}{2}{:} - female 30 | 31 | numVoice = length(voiceType); 32 | 33 | for i = 1 : numVoice 34 | noteMean1(i,1) = vals{1}{voiceType(i)}(1); 35 | noteMean1(i,2) = vals{2}{voiceType(i)}(1); 36 | noteMean1(i,3) = vals{2}{voiceType(i)}(1); 37 | noteCovar1(i,1) = vals{1}{voiceType(i)}(2); 38 | noteCovar1(i,2) = vals{2}{voiceType(i)}(2); 39 | noteCovar1(i,3) = vals{2}{voiceType(i)}(2); 40 | noteMean2(i,1) = vals{2}{voiceType(i)}(1); 41 | noteMean2(i,2) = vals{2}{voiceType(i)}(1); 42 | noteMean2(i,3) = vals{1}{voiceType(i)}(1); 43 | noteCovar2(i,1) = vals{2}{voiceType(i)}(2); 44 | noteCovar2(i,2) = vals{2}{voiceType(i)}(2); 45 | noteCovar2(i,3) = vals{1}{voiceType(i)}(2); 46 | end 47 | 48 | for i = 1 : numVoice 49 | versions{i}=nchoosek(1:numVoice,i); 50 | end 51 | 52 | for nVoice = 1:length(versions) 53 | for iVer = 1 : size(versions{nVoice},1) 54 | nMean1 = noteMean1(versions{nVoice}(iVer,:),:); 55 | nMean2 = noteMean2(versions{nVoice}(iVer,:),:); 56 | nVar1 = noteCovar1(versions{nVoice}(iVer,:),:); 57 | nVar2 = noteCovar2(versions{nVoice}(iVer,:),:); 58 | notes2 = cat(1, notes{nVoice}{:})'; 59 | for v = 1 : nVoice 60 | meansSeed{nVoice}{iVer}(2*v-1,:) = nMean1(v,notes2(v,:)); 61 | meansSeed{nVoice}{iVer}(2*v,:) = nMean2(v,notes2(v,:)); 62 | covarsSeed{nVoice}{iVer}(2*v-1,2*v-1,:) = nVar1(v,notes2(v,:)); 63 | covarsSeed{nVoice}{iVer}(2*v,2*v,:) = nVar2(v,notes2(v,:)); 64 | end 65 | end 66 | end -------------------------------------------------------------------------------- /genPolyTrans.m: -------------------------------------------------------------------------------- 1 | function [voices trans]=genPolyTrans(selfWeight, skip2Weight, skip1Weight) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % [voices trans]=genPolyTrans(selfWeight, skip2Weight, skip1Weight) 5 | % 6 | % Description: Generate transition matrix for HMM 7 | % 8 | % Inputs: 9 | % selfWeight - relative weight given to self transitions 10 | % skip2Weight - relative weight given to transitions from 1->2 or 2->3 11 | % skip1Weight - relative weight given to transitions from 1->3 12 | % 13 | % Outputs: 14 | % voices - cell array of possible sequences 15 | % trans - transition matrix 16 | % 17 | % Dependencies: 18 | % None 19 | % 20 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 21 | % http://www.ampact.org 22 | % (c) copyright 2014 Johanna Devaney (j@devaney.ca), all rights reserved. 23 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 24 | 25 | if ~exist('selfWeight', 'var') || isempty(selfWeight), selfWeight = 5; end 26 | if ~exist('skip1Weight', 'var') || isempty(skip1Weight), skip1Weight = 1; end 27 | if ~exist('skip2Weight', 'var') || isempty(skip2Weight), skip2Weight = 1; end 28 | 29 | idx = 1; 30 | for i = 1 : 3 31 | voices{1}{idx} = i; 32 | idx = idx + 1; 33 | end 34 | 35 | idx = 1; 36 | for i = 1 : 3 37 | for j = 1 : 3 38 | voices{2}{idx} = [i j]; 39 | idx = idx + 1; 40 | end 41 | end 42 | 43 | idx = 1; 44 | for i = 1 : 3 45 | for j = 1 : 3 46 | for k = 1 : 3 47 | voices{3}{idx} = [i j k]; 48 | idx = idx + 1; 49 | end 50 | end 51 | end 52 | 53 | idx = 1; 54 | for i = 1 : 3 55 | for j = 1 : 3 56 | for k = 1 : 3 57 | for m = 1 : 3 58 | voices{4}{idx} = [i j k m]; 59 | idx = idx + 1; 60 | end 61 | end 62 | end 63 | end 64 | 65 | idx = 1; 66 | for i = 1 : 3 67 | for j = 1 : 3 68 | for k = 1 : 3 69 | for m = 1 : 3 70 | for n = 1 : 3 71 | voices{5}{idx} = [i j k m n]; 72 | idx = idx + 1; 73 | end 74 | end 75 | end 76 | end 77 | end 78 | 79 | idx = 1; 80 | for i = 1 : 3 81 | for j = 1 : 3 82 | for k = 1 : 3 83 | for m = 1 : 3 84 | for n = 1 : 3 85 | for p = 1 : 3 86 | voices{6}{idx} = [i j k m n p]; 87 | idx = idx + 1; 88 | end 89 | end 90 | end 91 | end 92 | end 93 | end 94 | 95 | 96 | for t = 1:length(voices) 97 | trans{t}=zeros(length(voices{t})); 98 | for i = 1 : size(trans{t},1) 99 | for j = i : size(trans{t},2) 100 | if sum(voices{t}{i}==voices{t}{j}) >= length(voices{t}{j})-1 101 | stateChange = max(voices{t}{j} - voices{t}{i}); 102 | if stateChange == 2 % 1->3 103 | trans{t}(i,j) = skip2Weight; 104 | elseif stateChange == 1 % 1->2 or 2->3 105 | trans{t}(i,j) = skip1Weight; 106 | elseif stateChange == 0 % 1->1 or 2->2 or 3->3 107 | trans{t}(i,j) = selfWeight; 108 | end 109 | end 110 | end 111 | end 112 | 113 | % Normalize 114 | trans{t} = bsxfun(@rdivide, trans{t}, sum(trans{t}, 2)); 115 | 116 | end 117 | -------------------------------------------------------------------------------- /getCentVals.m: -------------------------------------------------------------------------------- 1 | function cents=getCentVals(times,yinres) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % cents=getCentVals(times,yinres) 5 | % 6 | % Description: Get cent values (in relation to A, 440 Hz) for each note 7 | % 8 | % Inputs: 9 | % times - onset and offset times 10 | % yinres - structure of YIN values 11 | % 12 | % Outputs: 13 | % cents - cell array of cent values for each note 14 | % 15 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 16 | % http://www.ampact.org 17 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) 18 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 19 | 20 | % index into f0 estimates in YIN structure with onset and offset times 21 | for i = 1:length(times.ons) 22 | cents{i}=yinres.f0(round(times.ons(i)/32*yinres.sr):round(times.offs(i)/32*yinres.sr))*1200; 23 | end -------------------------------------------------------------------------------- /getLoudnessEstimates.m: -------------------------------------------------------------------------------- 1 | function [loudnessEstimates loudnessStructure]=getLoudnessEstimates(audiofile, times) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % nmat=getTimingData(midifile, times) 5 | % 6 | % Description: Get loudness estimate based on Glasberg and Moore (2002) 7 | % for time-varying sounds using the Loudness Toolbox 8 | % 9 | % Inputs: 10 | % audiofile - name of audiofile 11 | % times - onset and offset times 12 | % 13 | % Outputs: 14 | % loudnessEstimates - maximum short-term loudness (in sones) vs time 15 | % loudnessStructure - complete structure returned by 16 | % Loudness_TimeVaryingSound_Moore 17 | % 18 | % Dependencies: 19 | % Genesis Acoustics. 2010. Loudness Toolbox for Matlab. 20 | % Available from http://www.genesis-acoustics.com/index.php?page=32 21 | % 22 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 23 | % http://www.ampact.org 24 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 25 | % (mim@mr-pc.org), all rights reserved 26 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 27 | 28 | % read audiofile 29 | [sig,sr]=wavread(audiofile); 30 | 31 | for i = 1 : length(times.ons) 32 | 33 | % get loudness estimate for time-vaying sounds based on Glasberg and 34 | % Moore (2002) 35 | loudnessStructure{i}=Loudness_TimeVaryingSound_Moore(sig(times.ons(i)*sr:times.offs(i)*sr),sr); 36 | 37 | % save the maximum short-term loudness (in sones) vs time in a seperate 38 | % variable 39 | loudnessEstimates(i)=loudnessStructure{i}.STLlevelmax; 40 | 41 | end -------------------------------------------------------------------------------- /getOnsOffs.m: -------------------------------------------------------------------------------- 1 | function res=getOnsOffs(onsoffs) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % res=getOnsOffs(onsoffs) 5 | % 6 | % Description: Extracts a list of onset and offset from an inputted 7 | % 3*N matrix of states and corresponding ending times 8 | % from AMPACT's HMM-based alignment algorithm 9 | % 10 | % Inputs: 11 | % onsoffs - a 3*N alignment matrix, the first row is a list of N states 12 | % the second row is the time which the state ends, and the 13 | % third row is the state index 14 | % 15 | % Outputs: 16 | % res.ons - list of onset times 17 | % res.offs - list of offset times 18 | % 19 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 20 | % http://www.ampact.org 21 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) 22 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 23 | 24 | stopping=find(onsoffs(1,:)==3); 25 | starting=stopping-1; 26 | 27 | for i = 1 : length(starting) 28 | res.ons(i)=onsoffs(2,starting(i)); 29 | res.offs(i)=onsoffs(2,stopping(i)); 30 | end -------------------------------------------------------------------------------- /getPitchVibratoData.m: -------------------------------------------------------------------------------- 1 | function [vibratoDepth, vibratoRate, intervalSize, pp]=getPitchVibratoData(cents,sr) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % [vibratoDepth, vibratoRate, noteDynamics, intervals] 5 | % =getPitchVibratoDynamicsData(times,sr) 6 | % 7 | % Description: Calculate vibrato depth, vibrato rate, perceived pitch, and 8 | % interval size for the notes in the inputted cell array cents 9 | % 10 | % Inputs: 11 | % cents - cell array of cent values for each note 12 | % sr - sampling rate 13 | % 14 | % Outputs: 15 | % vibratoDepth - cell array of vibrato depth calculations for each note 16 | % vibratoRate - cell array of vibrato rate calculations for each note 17 | % intervalSize - cell array of interval size calculations between 18 | % sequential notes 19 | % pp - cell array of perceived pitch calculations for each note 20 | % 21 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 22 | % http://www.ampact.org 23 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) 24 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 25 | 26 | % calculate vibrato depth, vibrato rate, and percieved pitch for each note 27 | for i = 1 : length(cents) 28 | pp(i)=perceivedPitch(cents{i}, 1/sr*32, 100000); 29 | vibrato{i}=fft(cents{i}); 30 | vibrato{i}(1)=0; 31 | vibrato{i}(round(end/2):end) = 0; 32 | [vibratoDepth(i) noteVibratOpos(i)] = max(abs(vibrato{i})); 33 | vibratoRate(i) = noteVibratOpos(i) * (44100/32) / length(vibrato{i}); 34 | end 35 | 36 | % calculate interval size from sequential notes' perceived pitch estiamtes 37 | for i=1 : length(pp)-1 38 | intervalSize(i) = pp(i+1)*1200-pp(i)*1200; 39 | end -------------------------------------------------------------------------------- /getPitchVibratoDynamicsData.m: -------------------------------------------------------------------------------- 1 | function [vibratoDepth, vibratoRate, noteDynamic, intervalSize, pp, nmat,cents]=getPitchVibratoDynamicsData(times,yinres,nmat) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % [vibratoDepth, vibratoRate, noteDynamics, intervals] 5 | % =getPitchVibratoDynamicsData(times,yinres) 6 | % 7 | % Description: 8 | % 9 | % Inputs: 10 | % times - 11 | % yinres - 12 | % 13 | % Outputs: 14 | % vibratoDepth - 15 | % vibratoRate - 16 | % noteDynamics - 17 | % intervalSize - 18 | % pp - 19 | % 20 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 21 | % http://www.ampact.org 22 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) 23 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 24 | 25 | for i = 1 : length(times.ons) 26 | cents{i}=yinres.f0(round(times.ons(i)/32*yinres.sr):round(times.offs(i)/32*yinres.sr)); 27 | pp(i)=perceivedPitch(cents{i}, 1/yinres.sr*32, 100000); 28 | vibrato{i}=fft(cents{i}); 29 | vibrato{i}(1)=0; 30 | vibrato{i}(round(end/2):end) = 0; 31 | [vibratoDepth(i) noteVibratOpos(i)] = max(abs(vibrato{i})); 32 | vibratoRate(i) = noteVibratOpos(i) * (44100/32) / length(vibrato{i}); 33 | pwrs{i}=yinres.pwr(round(times.ons(i)/32*yinres.sr):round(times.offs(i)/32*yinres.sr)); 34 | dynamicsVals{i}=10*log10(pwrs{i}); 35 | noteDynamic(i)=mean(dynamicsVals{i}); 36 | end 37 | 38 | nmat(:,5)=(noteDynamic+100)'; 39 | 40 | for i=1 : length(pp)-1 41 | intervalSize(i) = pp(i+1)*1200-pp(i)*1200; 42 | end -------------------------------------------------------------------------------- /getTimingData.m: -------------------------------------------------------------------------------- 1 | function nmatNew=getTimingData(midifile, times) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % nmat=getTimingData(midifile, times) 5 | % 6 | % Description: Create a note matrix with performance timings 7 | % 8 | % Inputs: 9 | % midifile - name of midifile 10 | % times - note onset and offset times 11 | % 12 | % Outputs: 13 | % nmatNew - MIDI toolbox note matrix with performance timings 14 | % 15 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 16 | % http://www.ampact.org 17 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 18 | % (mim@mr-pc.org), all rights reserved 19 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 20 | 21 | % Read quantized MIDI file 22 | nmatOld=readmidi(midifile); 23 | nmatOld(:,[1,2])=nmatOld(:,[1,2])/2; 24 | 25 | % Replace timing information in MIDI file with performance timings 26 | nmatNew=nmatOld; 27 | nmatNew(:,6:7)=[times.ons',times.offs'-times.ons']; 28 | offset=nmatNew(1,6)-nmatOld(1,1); 29 | nmatNew(:,6)=nmatNew(:,6)-offset; 30 | nmatNew(:,[1,2])=nmatNew(:,[6,7]); -------------------------------------------------------------------------------- /getVals.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/getVals.m -------------------------------------------------------------------------------- /hzcents.m: -------------------------------------------------------------------------------- 1 | function cents = hzcents(x1, x2) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % y = hzcents(x1, x2) 5 | % 6 | % Description: Calculates the difference in cents between the frequencies 7 | % supplied in x1 and x2 using the formula: 8 | % cents = 1200 * log(x1/x2) / log 2 9 | % if x1 is higher than x2 the value in cents will be positive 10 | % if x1 is lower than x2 the value in cents will be negative 11 | % 12 | % Inputs: 13 | % x1 - frequency one in hertz 14 | % x2 - frequency two in hertz 15 | % 16 | % Outputs: 17 | % cents - size of the interval in cents between x1 and x2 18 | % 19 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 20 | % http://www.ampact.org 21 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) 22 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 23 | 24 | if x1 == 0 25 | cents = 0 26 | elseif x2 == 0 27 | cents = 0 28 | else 29 | cents = 1200 * log(x2 ./ x1) ./ log(2); 30 | end -------------------------------------------------------------------------------- /noteDct.m: -------------------------------------------------------------------------------- 1 | function [coefs approx] = noteDct(x, Ndct, sr) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % [coefs approx] = noteDct(x, Ndct, sr) 5 | % 6 | % Description: Compute the DCT of a signal and approximate it with the 7 | % first Ndct coefficients x is the signal Ndct is the number 8 | % of DCT coefficients to be calculated sr is the sampling rate 9 | % of the signal 10 | % 11 | % Inputs: 12 | % x - signal to be analyzed 13 | % Ndct - number of DCT coefficients to be calculated 14 | % sr - sampling rate 15 | % 16 | % Outputs: 17 | % coefs - DCT coefficients 18 | % approx - reconstruction of X using the Ndct number of DCT coefficients 19 | % 20 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 21 | % http://www.ampact.org 22 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 23 | % (mim@mr-pc.org), all rights reserved 24 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 25 | 26 | % calculate DCT coefficients using built-in dct function 27 | coefsTmp = dct(x); 28 | coefsTmp(min(end,Ndct)+1:end) = 0; 29 | 30 | % Divide by square root of N so that everything is divded by N instead of 31 | % the square root of N, because it is already divded by the sqrt of N 32 | coefs = coefsTmp(1:min(Ndct,end)) / sqrt(length(coefsTmp)); 33 | 34 | % The sampling rate divided by the length of the signal is the lowest 35 | % frequency represented by the DCT. Multiplying by it makes the 1st 36 | % coefficient into cents/second. For curves of constant slope, this makes 37 | % the 1st coefficient approximately independent of the length of the 38 | % signal. Multiplying by that frequency squared makes the 2nd coefficient into 39 | % cents/second^2. For curves of constant 2nd derivative, this makes the 2nd 40 | % coefficient approximately independent of the length of the signal, etc. 41 | % 42 | % For 2nd coefficient, multiple by -1 so that it represents positive slope 43 | if length(coefs)>1 44 | coefs(2:end)=coefs(2:end) .* (sr ./ length(x)) .^ [1:length(coefs)-1]; 45 | coefs(2)=-coefs(2); 46 | end 47 | 48 | % reconstruct X using the DCT coefficients 49 | approx = real(idct(coefsTmp)); -------------------------------------------------------------------------------- /perceivedPitch.m: -------------------------------------------------------------------------------- 1 | function [pp1 pp2]= perceivedPitch(f0s, sr, gamma) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % pp = perceivedPitch(f0s, sr, gamma) 5 | % 6 | % Description: Calculate the perceived pitch of a note based on 7 | % Gockel, H., B.J.C. Moore,and R.P. Carlyon. 2001. 8 | % Influence of rate of change of frequency on the overall 9 | % pitch of frequency-modulated Tones. Journal of the 10 | % Acoustical Society of America. 109(2):701?12. 11 | % 12 | % Inputs: 13 | % f0s - vector of fundamental frequency estimates 14 | % sr - 1/sample rate of the f0 estimates (e.g. the hop rate in Hz of yin) 15 | % gamma - sets the relative weighting of quickly changing vs slowly 16 | % changing portions of notes. - a high gamma (e.g., 1000000) 17 | % gives more weight to slowly changing portions. 18 | % 19 | % Outputs: 20 | % res.ons - list of onset times 21 | % res.offs - list of offset times 22 | % 23 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 24 | % http://www.ampact.org 25 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 26 | % (mim@mr-pc.org), all rights reserved 27 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 28 | 29 | if ~exist('gamma', 'var'), gamma = 100000; end 30 | 31 | % remove all NaNs in the f0 vector 32 | f0s(isnan(f0s))=[]; 33 | 34 | % create an index into the f0 vector in order to remove outliers by 35 | % only using the central 80% of the sorted vector 36 | [d ord] = sort(f0s); 37 | ind = ord(floor(end*.1):floor(end*.9)); 38 | 39 | % calculate the rate of change 40 | deriv = [diff(f0s)*sr -100]; 41 | 42 | % set weights for the quickly changing vs slowly changing portions 43 | weights = exp(-gamma * abs(deriv)); 44 | 45 | % calculate two versions of the perceived pitch, one using the entire 46 | % vector (pp1) and one with the central 80% (pp2) 47 | pp1 = f0s(:)' * weights(:) / sum(weights); 48 | pp2 = f0s(ind) * weights(ind)' / sum(weights(ind)); -------------------------------------------------------------------------------- /plotFineAlign.m: -------------------------------------------------------------------------------- 1 | function plotFineAlign(stateType, occupancy, notes, stftHop) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % plotFineAlign(stateType, occupancy, notes, stftHop, highlight) 5 | % 6 | % Description: 7 | % Plot the HMM alignment based on the output of YIN. StateType is the 8 | % list of states in the HMM, and occupancy is the number of YIN frames 9 | % for which that state is occupied. Notes is a list of midi note numbers 10 | % that are played, should be one note for each [3] in stateType. If the 11 | % highlight vector is supplied, it should contain indices of the states 12 | % to highlight by plotting an extra line at the bottom of the window. 13 | % 14 | % Inputs: 15 | % stateType - vector with a list of states 16 | % occupancy - vector indicating the time (in seconds) at which the states 17 | % in stateType end 18 | % notes - vector of notes from MIDI file 19 | % stftHop - the hop size between frames in the spectrogram 20 | % 21 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 22 | % http://www.ampact.org 23 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 24 | % (mim@mr-pc.org), all rights reserved. 25 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 26 | 27 | % Plot the 4 states: silence in red, beginning transient in green, 28 | % steady state in blue, ending transient in green. 29 | 30 | styles = {{'r+-', 'LineWidth', 2}, 31 | {'g+-', 'LineWidth', 2}, 32 | {'b+-', 'LineWidth', 2}}; 33 | 34 | cs = occupancy /stftHop; 35 | segments = [cs(1:end-1); cs(2:end)]'; 36 | 37 | hold on 38 | 39 | stateNote = max(1, cumsum(stateType == 3)+1); 40 | for i=1:size(segments,1) 41 | plot(segments(i,:)', repmat(notes(stateNote(i)),2,1), styles{stateType(i+1)}{:}) 42 | end 43 | 44 | hold off -------------------------------------------------------------------------------- /polyExample.mid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/polyExample.mid -------------------------------------------------------------------------------- /polyExample.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/polyExample.wav -------------------------------------------------------------------------------- /polySingingMeansCovars.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/polySingingMeansCovars.mat -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | Example Usage 2 | - from the included script exampleScript.m 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % exampleScript.m 5 | % 6 | % Description: 7 | % Example of how to use the HMM alignment algorithm 8 | % 9 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 10 | % http://www.ampact.org 11 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. 12 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 13 | 14 | % audio file to be aligned 15 | audiofile=('example.wav'); 16 | 17 | % MIDI file to be aligned 18 | midifile=('example.mid'); 19 | 20 | % number of notes to align 21 | numNotes=6; 22 | 23 | % vector of order of states (according to lyrics) in stateOrd and 24 | % corresponding note numbers in noteNum 25 | % 1 indicates a rest at the beginning of ending of the note 26 | % 2 indicates a transient at the beginning or ending of the note 27 | % 3 indicates a steady state section 28 | % the following encoding is for six syllables "A-ve Ma-ri-(i)-a" 29 | % syllable A-ve Ma-ri-(i)-a 30 | % state type 13 23 23 23 3 31 31 | % note number 11 22 33 44 5 66 32 | stateOrd = [1 3 2 3 2 3 2 3 3 3 1]; 33 | noteNum = [1 1 2 2 3 3 4 4 5 6 6]; 34 | 35 | % load singing means and covariances for the HMM alignment 36 | load SingingMeansCovars.mat 37 | means=sqrtmeans; 38 | covars=sqrtcovars; 39 | 40 | % specify that the means and covariances in the HMM won't be learned 41 | learnparams=0; 42 | 43 | % run the alignment 44 | [allstate selectstate,spec,yinres]=runAlignment(audiofile, midifile, numNotes, stateOrd, noteNum, means, covars, learnparams); 45 | 46 | % visualise the alignment 47 | alignmentVisualiser(selectstate,midifile,spec,1); 48 | 49 | % get onset and offset times 50 | times=getOnsOffs(selectstate); 51 | 52 | % write the onset and offset times to an audacity-readable file 53 | dlmwrite('example.txt',[times.ons' times.offs'], 'delimiter', '\t'); 54 | 55 | % you can load 'example.txt' into audacity and correct any errors in the 56 | % alignment, i.e., the offset error on the last note, and then reload the 57 | % corrected labels into matlab 58 | fixedLabels=load('exampleFixed.txt'); 59 | times.ons=fixedLabels(:,1)'; 60 | times.offs=fixedLabels(:,2)'; 61 | 62 | % map timing information to the quantized MIDI file 63 | nmatNew=getTimingData(midifile, times); 64 | writemidi(nmatNew,'examplePerformance.mid') 65 | 66 | % get cent values for each note 67 | cents=getCentVals(times,yinres); 68 | 69 | % calculate intervals size, perceived pitch, vibrato rate, and vibrato depth 70 | [vibratoDepth, vibratoRate, intervalSize, perceivedPitch]=getPitchVibratoData(cents,yinres.sr); 71 | 72 | % get loudness values for each note using the Genesis Loudness Toolbox 73 | [loudnessEstimates loudnessStructure]=getLoudnessEstimates(audiofile, times); 74 | 75 | % get DCT values for each note 76 | for i = 1 : length(cents) 77 | 78 | % find the peaks and troughs in the F0 trace for each note 79 | [mins{i} maxes{i}] = findPeaks(cents{i}, 100, yinres.sr/32, 60); 80 | 81 | % find the midpoints between mins and maxes in the F0 trace for each 82 | % note 83 | [x_mids{i} y_mids{i}] = findMids(cents{i}, mins{i}, maxes{i}, 100, yinres.sr/32); 84 | 85 | % generate a smoothed trajectory of a note by connecting the 86 | % midpoints between peaks and troughs. 87 | smoothedF0s{i}=smoothNote(cents{i}, x_mids{i}, y_mids{i}); 88 | 89 | % find the steady-state portion of a note 90 | steady{i}(1:2)=findSteady(cents{i}, mins{i}, maxes{i}, x_mids{i}, y_mids{i}, 1); 91 | 92 | % compute the DCT of a signal and approximate it with the first 3 coefficients 93 | [dctVals{i}, approx{i}]=noteDct(smoothedF0s{i}(steady{i}(1):steady{i}(2)),3,yinres.sr/32); 94 | 95 | end 96 | 97 | ---------------- 98 | 99 | AMPACT Function Descriptions 100 | 101 | runAlignment.m: Calls the DTW alignment function and refines the results with the HMM alignment algorithm, with both a basic and modified state spaces (based on the lyrics). 102 | 103 | getVals.m: Gets values for DTW alignment and YIN analysis of specified audio signal and MIDI file 104 | 105 | runDTWAlignment.m: Performs a dynamic time warping alignment between specified audio and MIDI files. 106 | 107 | runHMMAlignment.m: Refines DTW alignment values with a three-state HMM, identifying silence,transient, and steady state parts of the signal. The HMM uses the DTW alignment as a prior. 108 | 109 | filltransmat.m: Makes a transition matrix from a seed transition matrix. 110 | 111 | fillpriormat_gauss.m: Creates a prior matrix based on the DTW alignment (supplied by the input variables ons and offs). 112 | 113 | selectStates.m: Refines the HMM parameters according to the modified state sequence vector passed into the function. 114 | 115 | alignmentVisualiser.m: Plots a gross DTW alignment overlaid with the fine alignment resulting from the HMM aligner on the output of YIN. 116 | 117 | getTimingData: Create a note matrix with performance timings. 118 | 119 | getCentVals: Get cent values (in relation to A, 440 Hz) for each note. 120 | 121 | getPitchVibratoData: Calculate vibrato depth, vibrato rate, perceived pitch, and interval size for the notes in the inputted cell array cents. 122 | 123 | getLoudnessEstimates: Get loudness estimate based on Glasberg and Moore (2002) for time-varying sounds using the Loudness Toolbox. 124 | 125 | findPeaks: Find peaks and troughs in a signal. 126 | 127 | findMids: Find the midpoints between mins and maxes in a signal. 128 | 129 | smoothNote: Generate a smoothed trajectory of a note by connecting the midpoints between peaks and troughs. 130 | 131 | noteDct: Compute the DCT of a signal and approximate it with a specified number of coefficients. 132 | 133 | ---------------- 134 | 135 | AMPACT Dependencies 136 | 137 | You will need to have the following toolkits installed and in your path 138 | de Cheveigné, A. 2002. YIN MATLAB implementation Available from: http://audition.ens.fr/adc/sw/yin.zip 139 | Ellis, D. P. W. 2003. Dynamic Time Warp (DTW) in Matlab. Available from: http://www.ee.columbia.edu/~dpwe/resources/matlab/dtw/ 140 | Ellis, D. P. W. 2008. Aligning MIDI scores to music audio. Available from: http://www.ee.columbia.edu/~dpwe/resources/matlab/alignmidiwav/ 141 | Genesis Acoustics. 2010. Loudness Toolbox for Matlab. Available from http://www.genesis-acoustics.com/index.php?page=32 142 | Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab. Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html 143 | Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials/miditoolbox/ 144 | 145 | ---------------- 146 | 147 | Papers on algorithms developed for AMPACT 148 | 149 | Devaney, J., M. I. Mandel, and I. Fujinaga. 2011. Characterizing Singing Voice Fundamental Frequency Trajectories. Proceedings of the 2011 Workshop on Applications of Signal Processing to Audio and Acoustics. 150 | Devaney, J., M. I. Mandel, D. P. W. Ellis, and I. Fujinaga. 2010. Automatically extracting performance data from recordings of trained singers. Psychomusicology: Music, Mind & Brain. 21(1–2): in press. 151 | Devaney, J. 2011. An empirical study of the influence of musical context on intonation practices in solo singers and SATB ensembles. Ph. D. Dissertation. McGill University. 152 | Devaney, J., M. I. Mandel, and D. P. W. Ellis. 2009. Improving MIDI-audio alignment with acoutics features. In Proceedings of the 2009 Workshop on Applications of Signal Processing to Audio and Acoustics. 153 | 154 | ---------------- 155 | 156 | Papers on algorithms by other authors used by AMPACT 157 | 158 | de Cheveigné, A., and H. Kawahara. 2002. YIN, a fundamental frequency estimator for speech and music. Journal of the Acoustical Society of America 111 (4): 1917–30. 159 | Orio, N., and D. Schwarz. 2001. Alignment of monophonic and polyphonic music to a score. In Proceedings of the International Computer Music Conference, 155–8. 160 | -------------------------------------------------------------------------------- /runAlignment.m: -------------------------------------------------------------------------------- 1 | function [allstate,selectstate,spec,yinres]=runAlignment(filename, midiname, numNotes, stateOrd2, noteNum, means, covars, learnparams) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % [allstate selectstate spec yinres]=seeAlignment(audiofile,midifile,... 5 | % numNotes, stateOrd, noteNum, means, covars,learnparams) 6 | % 7 | % Description: 8 | % Calls the DTW alignment function and refines the results with the HMM 9 | % alignment algorithm, with both a basic and modified state spaces (based 10 | % on the lyrics). This function returns the results of both the state 11 | % spaces as well as the YIN analysis of the specified audio file. 12 | % 13 | % Inputs: 14 | % filename - name of audio file 15 | % midiname - name of MIDI file 16 | % numNotes - number of notes in the MIDI file to be aligned 17 | % stateOrd2 - vector of state sequence 18 | % noteNum - vector of note numbers corresponding to state sequence 19 | % means - mean values for each state 20 | % covars - covariance values for each state 21 | % learnparams - flag as to whether to learn means and covars in the HMM 22 | % 23 | % Outputs: 24 | % allstate - ending times for each state 25 | % selectstate - ending times for each state 26 | % spec - spectogram of the audio file 27 | % yinres - structure of results of funning the YIN algorithm on the 28 | % audio signal indicated by the input variable filename 29 | % 30 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 31 | % http://www.ampact.org 32 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. 33 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 34 | 35 | if ~exist('learnparams', 'var'), learnparams = 0; end 36 | 37 | % refine stateOrd2 to correspond to the number of states specified 38 | % in numStates 39 | numStates = max(find(noteNum <= numNotes)); 40 | stateOrd2=stateOrd2(1:numStates); 41 | noteNum=noteNum(1:numStates); 42 | 43 | % read audio file and perform DTW alignment and YIN analysis 44 | hop = 32; 45 | [audiofile, sr] = wavread(filename); 46 | 47 | % normalize audio file 48 | audiofile=audiofile/sqrt(mean(audiofile.^2))*.6; 49 | 50 | %get vals 51 | [align, yinres, spec] = getVals(filename, midiname, audiofile, sr, hop); 52 | clear audiofile 53 | 54 | % run HMM alignment with the full state sequence 55 | [vpath,startingState,prior,trans,meansFull,covarsFull,mixmat,obs,stateOrd] = runHMMAlignment(numNotes, means, covars, align, yinres, sr, learnparams); 56 | 57 | % tally of the number of frames in each state 58 | histvals = hist(vpath, 1:max(vpath)); 59 | 60 | % ending time of each state in seconds 61 | cumsumvals = cumsum(histvals*hop/sr); 62 | 63 | % run HMM alignment with the state sequence refined, based on the lyrics 64 | cumsumvals2=selectStates(startingState,prior,trans,meansFull,covarsFull,mixmat,obs,stateOrd2,noteNum,sr); 65 | 66 | % create 3*N matrices of the alignments, where the first row is the 67 | % current states, the second row is the time which the state ends, and 68 | % the third row is the state index and N is the total number of states 69 | allstate=stateOrd; 70 | allstate(2,1:length(cumsumvals))=cumsumvals; 71 | selectstate=stateOrd2; 72 | selectstate(2,1:length(cumsumvals2))=cumsumvals2; 73 | selectstate(3,:) = noteNum; -------------------------------------------------------------------------------- /runDTWAlignment.m: -------------------------------------------------------------------------------- 1 | function [align,spec] = runDTWAlignment(audiofile, midorig, tres) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % align = runDTWAlignment(sig, sr, midorig, tres, plot) 5 | % 6 | % Description: 7 | % Performs a dynamic time warping alignment between specified audio and 8 | % MIDI files and returns a matrix with the aligned onset and offset times 9 | % (with corresponding MIDI note numbers) and a spectrogram of the audio 10 | % 11 | % Inputs: 12 | % sig - audio file 13 | % midorig - midi file 14 | % tres - time resolution for MIDI to spectrum information conversion 15 | % 16 | % Outputs: 17 | % align - dynamic time warping MIDI-audio alignment structure 18 | % align.on - onset times 19 | % align.off - offset times 20 | % align.midiNote - MIDI note numbers 21 | % spec - sepctrogram 22 | % 23 | % Dependencies: 24 | % Ellis, D. P. W. 2003. Dynamic Time Warp (DTW) in Matlab. Available 25 | % from: http://www.ee.columbia.edu/~dpwe/resources/matlab/dtw/ 26 | % Ellis, D. P. W. 2008. Aligning MIDI scores to music audio. Available 27 | % from: http://www.ee.columbia.edu/~dpwe/resources/matlab/alignmidiwav/ 28 | % Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: 29 | % https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials 30 | % /miditoolbox/ 31 | % 32 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 33 | % http://www.ampact.org 34 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. 35 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 36 | 37 | if nargin < 5 38 | tres = 0.025; 39 | end 40 | 41 | mid = midorig; 42 | 43 | % run alignment using peak structure distance as a feature 44 | [dtw.M,dtw.MA,dtw.RA,dtw.S,spec,dtw.notemask] = alignmidiwav(mid,... 45 | audiofile,tres,1); 46 | 47 | % read midi file and map the times in the midi file to the audio 48 | align.nmat = readmidi(mid); 49 | align.nmat(:,7) = align.nmat(:,6) + align.nmat(:,7); 50 | align.nmat(:,1:2) = maptimes(align.nmat(:,6:7),(dtw.MA-1)*tres,(dtw.RA-1)*tres); 51 | 52 | % create output alignment 53 | align.on = align.nmat(:,1); 54 | align.off = align.nmat(:,2); 55 | align.midiNote = align.nmat(:,4); 56 | -------------------------------------------------------------------------------- /runHMMAlignment.m: -------------------------------------------------------------------------------- 1 | function [vpath,startingState,prior,trans,meansFull,covarsFull,mixmat,obs,stateOrd] = runHMMAlignment(notenum, means, covars, align, yinres, sr, learnparams) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | %[vpath,startingState,prior,trans,meansFull,covarsFull,mixmat,obs,stateOrd] 5 | % = runHMMAlignment(notenum, means, covars, align, yinres, sr, learnparams) 6 | % 7 | % Description: 8 | % Refines DTW alignment values with a three-state HMM, identifying 9 | % silence,transient, and steady state parts of the signal. The HMM 10 | % uses the DTW alignment as a prior. 11 | % 12 | % Inputs: 13 | % notenum - number of notes to be aligned 14 | % means - 3x2 matrix of mean aperiodicy and power values HMM states 15 | % column: silence, trans, steady state 16 | % rows: aperiodicity, power 17 | % covars - 3x2 matrix of covariances for the aperiodicy and power 18 | % values (as per means) 19 | % res - structure containing inital DTW aligment 20 | % yinres - structure containg yin analysis of the signal 21 | % sr - sampling rate of the signal 22 | % 23 | % Outputs: 24 | % vpath - verterbi path 25 | % startingState - starting state for the HMM 26 | % prior - prior matrix from DTW alignment 27 | % trans - transition matrix 28 | % meansFull - means matrix 29 | % covarsFull - covariance matrix 30 | % mixmat - matrix of priors for GMM for each state 31 | % obs - two row matrix observations (aperiodicty and power) 32 | % stateOrd - modified state order sequence 33 | % 34 | % Dependencies: 35 | % Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab. 36 | % Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html 37 | % 38 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 39 | % http://www.ampact.org - Johanna Devaney, 2011 40 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. 41 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 42 | 43 | if ~exist('learnparams', 'var'), shift = 0; end 44 | 45 | % create vectors of onsets and offsets times from DTW alignment 46 | ons=floor(align.on*sr/32); 47 | offs=floor(align.off*sr/32); 48 | 49 | % create observation matrix 50 | obs(1,:)=sqrt(yinres.ap(1:offs(notenum)+50)); 51 | obs(2,:)=sqrt(yinres.pwr(1:offs(notenum)+50)); 52 | obs(3,:)=69+12*yinres.f0(1:offs(notenum)+50); % convert octave to midi note 53 | 54 | % replace any NaNs in the observation matrix with zeros 55 | obs(isnan(obs))=0; 56 | 57 | % refine the list of onsets and offsets according to the number of notes 58 | % specified in the input arg 'not 59 | prior_ons=ons(1:notenum); 60 | prior_offs=offs(1:notenum); 61 | notes = length(prior_ons); 62 | 63 | % states: silence, trans, steady state 64 | % rows: aperiodicity, power 65 | stateOrdSeed = [1 2 3 2 1]; 66 | stateOrd = [repmat(stateOrdSeed(1:end-1),1,notes) stateOrdSeed(end)]; 67 | 68 | % use stateOrd to expand means and covars to each appearance 69 | midiNotes = repmat(align.midiNote(1:notenum)', length(stateOrdSeed)-1, 1); 70 | midiNotes = [midiNotes(:)' midiNotes(end)]; 71 | meansFull = [means(:,stateOrd); midiNotes]; 72 | 73 | covars(3,3,1) = 100; 74 | covars(3,3,2) = 5; 75 | covars(3,3,3) = 1; 76 | covarsFull = covars(:,:,stateOrd); 77 | 78 | mixmat = ones(length(stateOrd),1); 79 | 80 | % transititon matrix seed 81 | % {steady state, transient, silence, transient, steady state} 82 | transseed=zeros(5,5); 83 | transseed(1,1)=.99; 84 | transseed(2,2)=.98; 85 | transseed(3,3)=.98; 86 | transseed(4,4)=.98; 87 | transseed(5,5)=.99; 88 | transseed(1,2)=.0018; 89 | transseed(1,3)=.0007; 90 | transseed(1,4)=.0042; 91 | transseed(1,5)=.0033; 92 | transseed(2,3)=0.0018; 93 | transseed(2,4)=0.0102; 94 | transseed(2,5)=0.0080; 95 | transseed(3,4)=0.0112; 96 | transseed(3,5)=0.0088; 97 | transseed(4,5)=0.02; 98 | 99 | % call filltransmat to expand the transition matrix to the appropriate size 100 | trans = filltransmat(transseed,notes); 101 | 102 | % create starting state space matrix 103 | startingState = [1; zeros(4*notes,1)]; 104 | 105 | % call fillpriormat_gauss to create a prior matrix 106 | prior=fillpriormat_gauss(size(obs,2),prior_ons,prior_offs,5); 107 | 108 | if learnparams 109 | % use the mhmm_em function from Kevin Murphy's HMM toolkit to 110 | % learn the HMM parameters 111 | save orig_hmm_params 112 | [LL, startingState, trans, meansFull, covarsFull, mixmat1] = ... 113 | mhmm_em(obs, startingState, trans, meansFull, covarsFull, mixmat, 'max_iter', 1, 'adj_prior', 0, 'adj_trans', 0, 'adj_mix', 0, 'cov_type', 'diag'); 114 | save new_hmm_params 115 | end 116 | 117 | % create a likelihood matrix with the mixgauss_prob function from Kevin 118 | % Murphy's HMM toolkit 119 | like = mixgauss_prob(obs, meansFull, covarsFull, mixmat,1); 120 | 121 | % use the veterbi path function from Kevin Murphy's HMM toolkit to find the 122 | % most likely path 123 | prlike=prior.*like; 124 | clear like 125 | vpath=viterbi_path(startingState, trans, prlike); 126 | -------------------------------------------------------------------------------- /runPolyAlignment.m: -------------------------------------------------------------------------------- 1 | function [estimatedOns estimatedOffs]=runPolyAlignment(audiofile, midifile, meansCovarsMat, voiceType) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % estimatedOns estimatedOffs]=runPolyAlignment(audiofile, midifile) 5 | % 6 | % Description: Main function for runing polyphonic MIDI-audio alignment 7 | % An intial DTW alignment is refined to estimate asychroncies 8 | % between notated simultaneities 9 | % 10 | % Note that this current version assumes that each note ends 11 | % immediately before it starts again (i.e., no rests) 12 | % 13 | % Inputs: 14 | % audiofile - audio file file 15 | % midifile - midi file 16 | % meansCovarsMat - specifies means and covariance matrix to use 17 | % voiceType - vector indicating which voice (or instrument) to use for 18 | % each musical line 19 | % 20 | % Outputs: 21 | % estimatedOns - cell array of onset times 22 | % estimatedOffs - cell array of offset times 23 | % 24 | % Dependencies: 25 | % Ellis, D. P. W. 2003. Dynamic Time Warp (DTW) in Matlab. Available 26 | % from: http://www.ee.columbia.edu/~dpwe/resources/matlab/dtw/ 27 | % Ellis, D. P. W. 2008. Aligning MIDI scores to music audio. Available 28 | % from: http://www.ee.columbia.edu/~dpwe/resources/matlab/alignmidiwav/ 29 | % Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: 30 | % https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials 31 | % /miditoolbox/ 32 | % Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab. 33 | % Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html 34 | % 35 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 36 | % http://www.ampact.org 37 | % (c) copyright 2014 Johanna Devaney (j@devaney.ca), all rights reserved. 38 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 39 | 40 | %%%%%%% if no arguments %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 41 | 42 | if nargin < 4 43 | voiceType = [2 1 1 1]; 44 | end 45 | 46 | if nargin < 3 47 | meansCovarsMat='polySingingMeansCovars.mat'; 48 | end 49 | 50 | if nargin < 2 51 | midifile = 'polyExample.mid'; 52 | end 53 | 54 | if nargin < 1 55 | audiofile = 'polyExample.wav'; 56 | end 57 | 58 | 59 | 60 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 61 | %%%%%%%%% Initial DTW alignment stuff %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 62 | % read MIDI file 63 | nmatAll=midi2nmat(midifile); 64 | 65 | if min(nmatAll(:,3)) == 0 66 | nmatAll(:,3)=nmatAll(:,3)+1; 67 | end 68 | 69 | for i = sort(unique(nmatAll(:,3)))' 70 | nmat{i} = nmatAll(nmatAll(:,3)==i,:); 71 | end 72 | 73 | maxNotes=max(nmatAll(:,3)); 74 | 75 | %%%%%%%% Initialize HMM variables %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 76 | % needs to be here for calculations in initial DTW alignment 77 | % starting state for HMM 78 | 79 | for i = 1 : maxNotes 80 | startingState{i} = [1; zeros(3^i-1,1)]; 81 | end 82 | 83 | % get transition matrix for HMM 84 | [notes trans] = genPolyTrans(50, 0, 5); 85 | for i = 1 : maxNotes 86 | notesInd{i} = cat(1, notes{i}{:})'; 87 | end 88 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 89 | % run DTW alignment using composite midifile 90 | [align,spec] = runDTWAlignment(audiofile, midifile, 0.025); 91 | 92 | % calculate how many voices change at each transition 93 | %nmatAll(:,1)=floor(nmatAll(:,1)*1000)/1000; 94 | [uniqueBeats, idx1, idx2] = unique(onset(nmatAll), 'first'); 95 | uniqueAlignOns = align.nmat(idx1, 1); 96 | onsetMap = zeros(length(uniqueBeats),maxNotes); 97 | for i = 1 : length(uniqueBeats) 98 | %num = 1; 99 | for j = 1:maxNotes 100 | if sum(onset(nmat{j}) == uniqueBeats(i)) 101 | onsetMap(i,j) = 1; 102 | end 103 | %num = num + 1; 104 | end 105 | end 106 | 107 | % create new onset map using alignment values 108 | % THIS IS CURRENTLY ASSUMING THAT THERE ARE NO NOTATED RESTS 109 | for i = 1 : size(onsetMap,1) % number of onsets 110 | for j = 1 : size(onsetMap,2) % number of voices 111 | if onsetMap(i,j) == 1, 112 | onsMap2(i,j) = uniqueAlignOns(i); 113 | end 114 | end 115 | lv2(i) = find(onsetMap(i,:), 1, 'first'); 116 | onVals(i)=onsMap2(i,lv2(i)); 117 | end 118 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 119 | 120 | %%%%%%% Audio analysis %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 121 | % set paramters for audio analysis 122 | offset1=0.125; 123 | offset2=0.125; 124 | [audio,sr]=wavread(audiofile); 125 | audio=resample(audio,1,2); 126 | sr = sr/2; 127 | tuning=estimateTuning(audio); 128 | parameter.winLenSTMSP=441; 129 | parameter.shiftFB = tuning; 130 | 131 | % create a matrix of the notes in the audio in midi note numbers for each 132 | % transition, as defined by onsetMap 133 | for i = 1 : maxNotes 134 | idxCell{i}=1; 135 | pitches{1}(i,3)=nmat{i}(1,4)+tuning; 136 | end 137 | for i = 2 : size(onsetMap,1) 138 | for j = 1 : maxNotes 139 | if onsetMap(i,j) == 1 140 | pitches{i}(j,1)=nmat{j}(idxCell{j},4)+tuning; 141 | pitches{i}(j,2)=0; 142 | try 143 | pitches{i}(j,3)=nmat{j}(idxCell{j}+1,4)+tuning; 144 | end 145 | idxCell{j}=idxCell{j}+1; 146 | else 147 | pitches{i}(j,1)=pitches{i-1}(j,3)+tuning; 148 | pitches{i}(j,2)=pitches{i-1}(j,3)+tuning; 149 | try 150 | pitches{i}(j,3)=pitches{i-1}(j,3)+tuning; 151 | end 152 | end 153 | end 154 | end 155 | 156 | % get means and covars for the singing voice 157 | % differentiate for different voices 158 | load(meansCovarsMat) 159 | for i = 1 : size(nmat,2) 160 | [meansSeed{i} covarsSeed{i} versions]=genMeansCovars(notes, vals{i},voiceType); 161 | end 162 | % set the harmonics that are going to be considered 163 | harmonics=[-1 0 1]; 164 | harmonics2=[-1 0 1 12 19 24 28 31 36]; 165 | 166 | % run audio analysis 167 | fpitchAll=audio_to_pitch_via_FB(audio,parameter); 168 | hop = length(audio)/size(fpitchAll,2); 169 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 170 | 171 | 172 | %%%%%%% NAME %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 173 | % initialize indexing cell array 174 | for i = 1 : maxNotes 175 | idxCell{i}=1; 176 | end 177 | for i = 1 : length(onsetMap) 178 | %for i = 2 : length(onsetMap)-1 179 | numVoices = sum(onsetMap(i,:),2); 180 | try 181 | fpitch{i}=fpitchAll(:,round((onVals(i)-offset1)*sr/hop):round((onVals(i)+offset2)*sr/hop)); 182 | catch 183 | fpitch{i}=fpitchAll(:,max(1,round((onVals(i)-offset1)*sr/hop)):end); 184 | end 185 | numFrames(i)=size(fpitch{i},2); 186 | lengthSignal(i)=length(audio(max(1,round((onVals(i)-offset1)*sr)):max(round((onVals(i)+offset2)*sr),1))); 187 | [a,b,c]=find(onsetMap(i,:), size(nmat,2)); 188 | num = 1; 189 | for j = b 190 | obs{i}(num,:)=db(sum(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics,:))); 191 | if sum(onsetMap(i+1:end,j))~=0 192 | 193 | % db of sum fpitch vals - no harmonics 194 | obs{i}(num+1,:)=db(sum(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics,:))); 195 | 196 | % alternative features 197 | % % db of mean fpitch vals - no harmonics 198 | % db(mean(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics,:))); 199 | % db(mean(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics,:))); 200 | % 201 | % % db of mean fpitch vals - harmonics 202 | % db(mean(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics2,:))); 203 | % db(mean(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics2,:))); 204 | % 205 | % % db of sum fpitch vals - harmonics 206 | % db(sum(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics2,:))); 207 | % db(sum(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics2,:))); 208 | 209 | idxCell{j}=idxCell{j}+1; 210 | 211 | else 212 | obs{i}(num+1,:)=db(sum(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics,:))); 213 | % numVoices = numVoices-1; 214 | % b = b(b~=j); 215 | end 216 | num = num + 2; 217 | end 218 | 219 | if numVoices 220 | for j = 1 : size(versions{numVoices},1) 221 | if all(versions{numVoices}(j,:)==b); 222 | idx = j; 223 | end 224 | end 225 | 226 | % get appropriate trans, meansSeed, covarsSeed, and calculate mixmat 227 | curTrans = trans{numVoices}; 228 | 229 | curMeansSeed = meansSeed{3}{numVoices}{idx}; 230 | curCovarsSeed = covarsSeed{3}{numVoices}{idx}; 231 | mixmat = ones(length(curMeansSeed),1); 232 | sState = startingState{numVoices}; 233 | states = [1 2 3]; 234 | 235 | if i == 1 236 | 237 | curTrans = curTrans(sum(notesInd{numVoices}==1,1)<1,sum(notesInd{numVoices}==1,1)<1); 238 | curMeansSeed = curMeansSeed(:,sum(notesInd{numVoices}==1,1)<1); 239 | curCovarsSeed = curCovarsSeed(:,:,sum(notesInd{numVoices}==1,1)<1); 240 | mixmat = mixmat(sum(notesInd{numVoices}==1,1)<1); 241 | sState = sState(sum(notesInd{numVoices}==1,1)<1); 242 | sState(1) = 1; 243 | notesIndTmp{i}=notesInd{numVoices}(:,sum(notesInd{4}==1,1)<1); 244 | states = [2 3]; 245 | 246 | % curTrans = curTrans(sum(notesInd{numVoices}~=3)>(maxNotes-1),:); 247 | % curMeansSeed = curMeansSeed(:,sum(notesInd{numVoices}~=3)>(maxNotes-1)); 248 | % curCovarsSeed = curCovarsSeed(:,:,sum(notesInd{numVoices}~=3)>(maxNotes-1)); 249 | % mixmat = mixmat(sum(notesInd{numVoices}~=3)>(maxNotes-1)); 250 | % sState = sState(sum(notesInd{numVoices}~=3)>(maxNotes-1)); 251 | % notesIndTmp=notesInd{maxNotes}(:,sum(notesInd{numVoices}~=3)>(maxNotes-1)); 252 | elseif i == length(onsetMap) 253 | curTrans = curTrans(sum(notesInd{numVoices}<3,1)>(numVoices-1),:); 254 | curMeansSeed = curMeansSeed(:,sum(notesInd{numVoices}<3,1)>(numVoices-1)); 255 | curCovarsSeed = curCovarsSeed(:,:,sum(notesInd{numVoices}<3,1)>(numVoices-1)); 256 | mixmat = mixmat(sum(notesInd{numVoices}<3,1)>(numVoices-1)); 257 | sState = sState(sum(notesInd{numVoices}<3,1)>(numVoices-1)); 258 | states = [1 2]; 259 | notesIndTmp{i}=notesInd{numVoices}(:,sum(notesInd{numVoices}<3,1)>(numVoices-1)); 260 | else 261 | notesIndTmp{i}=notesInd{numVoices}; 262 | end 263 | 264 | like1{i} = mixgauss_prob(obs{i}, curMeansSeed, curCovarsSeed, mixmat,1); 265 | like1{i}(:,1)=[1; zeros(length(like1{i}(:,end))-1,1)]; 266 | like1{i}(:,end)=[zeros(length(like1{i}(:,end))-1,1); 1]; 267 | vpath1{i}=viterbi_path(sState, curTrans, like1{i}); 268 | end 269 | 270 | % for each note 271 | % i is the note 272 | % b(j) is the voice 273 | for j = 1 : numVoices 274 | try 275 | noteVals{i}{j}=notesIndTmp{i}(j,vpath1{i}); 276 | end 277 | for m = states 278 | try 279 | notePos{i}{j}(m)=find(noteVals{i}{j}==m,1,'last'); 280 | catch 281 | notePos{i}{j}(m)=notePos{i}{j}(m-1); 282 | end 283 | end 284 | end 285 | 286 | end 287 | 288 | 289 | 290 | 291 | % % last note 292 | numVoices=maxNotes; 293 | curTrans = trans{numVoices}; 294 | idxEnd=sum(notesInd{numVoices}<3,1)>(numVoices-1); 295 | curTrans = curTrans(idxEnd,idxEnd); 296 | 297 | curMeansSeed = meansSeed{3}{numVoices}{1}; 298 | curMeansSeed = curMeansSeed(:,idxEnd); 299 | 300 | curCovarsSeed = covarsSeed{3}{numVoices}{1}; 301 | curCovarsSeed = curCovarsSeed(:,:,idxEnd); 302 | 303 | mixmat = ones(length(curMeansSeed),1); 304 | %mixmat = mixmat(sum(notesInd{numVoices}<3,1)>(numVoices-1)); 305 | 306 | sState = startingState{numVoices}; 307 | sState = sState(1:length(mixmat)); 308 | 309 | states = [1 2]; 310 | 311 | 312 | lastOffset=length(onsetMap)+1; 313 | notesIndTmp{lastOffset}=notesInd{numVoices}(:,idxEnd); 314 | fpitch{lastOffset}=fpitchAll(:,round((onVals(end)+offset1)*sr/hop):end); 315 | numFrames(lastOffset)=size(fpitch{lastOffset},2); 316 | lengthSignal(lastOffset)=length(audio(max(1,round((onVals(end)+offset1)*sr)):end)); 317 | num = 1; 318 | for note = 1 : numVoices 319 | obs{lastOffset}(num,:)=db(sum(fpitch{lastOffset}(nmat{note}(idxCell{note},4)+harmonics,:))); 320 | obs{lastOffset}(num+1,:)=db(sum(fpitch{lastOffset}(nmat{note}(idxCell{note},4)+harmonics,:))) 321 | num = num + 2; 322 | end 323 | 324 | like1{lastOffset} = mixgauss_prob(obs{lastOffset}, curMeansSeed, curCovarsSeed, mixmat,1); 325 | like1{lastOffset}(:,1)=[1; zeros(length(like1{lastOffset}(:,end))-1,1)]; 326 | like1{lastOffset}(:,end)=[zeros(length(like1{lastOffset}(:,end))-1,1); 1]; 327 | vpath1{lastOffset}=viterbi_path(sState, curTrans, like1{lastOffset}); 328 | 329 | for j = 1 : numVoices 330 | noteVals{lastOffset}{j}=notesIndTmp{lastOffset}(j,vpath1{lastOffset}); 331 | 332 | for m = states 333 | notePos{lastOffset}{j}(m)=find(noteVals{lastOffset}{j}==m,1,'last'); 334 | % catch 335 | % notePos{lastOffset}{j}(m)=notePos{lastOffset}{j}(m-1); 336 | % end 337 | end 338 | end 339 | 340 | 341 | 342 | 343 | for i = 1 : length(onsetMap) 344 | for j = find(onsetMap(i,:)): sum(onsetMap(i,:)) 345 | % if onsetMap(i,j) == 1 && sum(onsetMap(i+1:end,j))~=0 346 | noteSecs{i}{j}=notePos{i}{j}*lengthSignal(i)/numFrames(i)/sr+onVals(i)-offset1; 347 | if i > 1 348 | % this doesn't work 349 | estimatedOffs{j}(i-1) = noteSecs{i}{j}(1); 350 | end 351 | estimatedOns{j}(i) = noteSecs{i}{j}(2); 352 | % else 353 | % estimatedOffs{j}(i)=0; 354 | % estimatedOns{j}(i)=0; 355 | % end 356 | end 357 | end 358 | 359 | for j = 1 : maxNotes 360 | noteSecs{lastOffset}{j}=notePos{lastOffset}{j}*lengthSignal(lastOffset)/numFrames(lastOffset)/sr+onVals(end)+offset1; 361 | estimatedOffs{j}(length(estimatedOns{j}))=noteSecs{lastOffset}{j}(1); 362 | end -------------------------------------------------------------------------------- /runPolyAlignment.m~: -------------------------------------------------------------------------------- 1 | function [estimatedOns estimatedOffs]=runPolyAlignment(audiofile, midifile, meansCovarsMat, voiceType) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % estimatedOns estimatedOffs]=runPolyAlignment(audiofile, midifile) 5 | % 6 | % Description: Main function for runing polyphonic MIDI-audio alignment 7 | % An intial DTW alignment is refined to estimate asychroncies 8 | % between notated simultaneities 9 | % 10 | % Inputs: 11 | % audiofile - audio file file 12 | % midifile - midi file 13 | % meansCovarsMat - specifies means and covariance matrix to use 14 | % voiceType - vector indicating which voice (or instrument) to use for 15 | % each musical line 16 | % 17 | % Outputs: 18 | % estimatedOns - cell array of onset times 19 | % estimatedOffs - cell array of offset times 20 | % 21 | % Dependencies: 22 | % Ellis, D. P. W. 2003. Dynamic Time Warp (DTW) in Matlab. Available 23 | % from: http://www.ee.columbia.edu/~dpwe/resources/matlab/dtw/ 24 | % Ellis, D. P. W. 2008. Aligning MIDI scores to music audio. Available 25 | % from: http://www.ee.columbia.edu/~dpwe/resources/matlab/alignmidiwav/ 26 | % Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: 27 | % https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials 28 | % /miditoolbox/ 29 | % Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab. 30 | % Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html 31 | % 32 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 33 | % http://www.ampact.org 34 | % (c) copyright 2014 Johanna Devaney (j@devaney.ca), all rights reserved. 35 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 36 | 37 | %%%%%%% if no arguments %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 38 | 39 | if nargin < 4 40 | voiceType = [2 1 1 1]; 41 | end 42 | 43 | if nargin < 3 44 | meansCovarsMat='polySingingMeansCovars.mat'; 45 | end 46 | 47 | if nargin < 2 48 | midifile = 'polyExample.mid'; 49 | end 50 | 51 | if nargin < 1 52 | audiofile = 'polyExample.wav'; 53 | end 54 | 55 | 56 | 57 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 58 | %%%%%%%%% Initial DTW alignment stuff %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 59 | % read MIDI file 60 | nmatAll=midi2nmat(midifile); 61 | 62 | if min(nmatAll(:,3)) == 0 63 | nmatAll(:,3)=nmatAll(:,3)+1; 64 | end 65 | 66 | for i = sort(unique(nmatAll(:,3)))' 67 | nmat{i} = nmatAll(nmatAll(:,3)==i,:); 68 | end 69 | 70 | maxNotes=max(nmatAll(:,3)); 71 | 72 | %%%%%%%% Initialize HMM variables %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 73 | % needs to be here for calculations in initial DTW alignment 74 | % starting state for HMM 75 | 76 | for i = 1 : maxNotes 77 | startingState{i} = [1; zeros(3^i-1,1)]; 78 | end 79 | 80 | % get transition matrix for HMM 81 | [notes trans] = genPolyTrans(50, 0, 5); 82 | for i = 1 : maxNotes 83 | notesInd{i} = cat(1, notes{i}{:})'; 84 | end 85 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 86 | % run DTW alignment using composite midifile 87 | [align,spec] = runDTWAlignment(audiofile, midifile, 0.025); 88 | 89 | % calculate how many voices change at each transition 90 | %nmatAll(:,1)=floor(nmatAll(:,1)*1000)/1000; 91 | [uniqueBeats, idx1, idx2] = unique(onset(nmatAll), 'first'); 92 | uniqueAlignOns = align.nmat(idx1, 1); 93 | onsetMap = zeros(length(uniqueBeats),maxNotes); 94 | for i = 1 : length(uniqueBeats) 95 | %num = 1; 96 | for j = 1:maxNotes 97 | if sum(onset(nmat{j}) == uniqueBeats(i)) 98 | onsetMap(i,j) = 1; 99 | end 100 | %num = num + 1; 101 | end 102 | end 103 | 104 | % create new onset map using alignment values 105 | % THIS IS CURRENTLY ASSUMING THAT THERE ARE NO NOTATED RESTS 106 | for i = 1 : size(onsetMap,1) % number of onsets 107 | for j = 1 : size(onsetMap,2) % number of voices 108 | if onsetMap(i,j) == 1, 109 | onsMap2(i,j) = uniqueAlignOns(i); 110 | end 111 | end 112 | lv2(i) = find(onsetMap(i,:), 1, 'first'); 113 | onVals(i)=onsMap2(i,lv2(i)); 114 | end 115 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 116 | 117 | %%%%%%% Audio analysis %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 118 | % set paramters for audio analysis 119 | offset1=0.125; 120 | offset2=0.125; 121 | [audio,sr]=wavread(audiofile); 122 | audio=resample(audio,1,2); 123 | sr = sr/2; 124 | tuning=estimateTuning(audio); 125 | parameter.winLenSTMSP=441; 126 | parameter.shiftFB = tuning; 127 | 128 | % create a matrix of the notes in the audio in midi note numbers for each 129 | % transition, as defined by onsetMap 130 | for i = 1 : maxNotes 131 | idxCell{i}=1; 132 | pitches{1}(i,3)=nmat{i}(1,4)+tuning; 133 | end 134 | for i = 2 : size(onsetMap,1) 135 | for j = 1 : maxNotes 136 | if onsetMap(i,j) == 1 137 | pitches{i}(j,1)=nmat{j}(idxCell{j},4)+tuning; 138 | pitches{i}(j,2)=0; 139 | try 140 | pitches{i}(j,3)=nmat{j}(idxCell{j}+1,4)+tuning; 141 | end 142 | idxCell{j}=idxCell{j}+1; 143 | else 144 | pitches{i}(j,1)=pitches{i-1}(j,3)+tuning; 145 | pitches{i}(j,2)=pitches{i-1}(j,3)+tuning; 146 | try 147 | pitches{i}(j,3)=pitches{i-1}(j,3)+tuning; 148 | end 149 | end 150 | end 151 | end 152 | 153 | % get means and covars for the singing voice 154 | % differentiate for different voices 155 | load(meansCovarsMat) 156 | for i = 1 : size(nmat,2) 157 | [meansSeed{i} covarsSeed{i} versions]=genMeansCovars(notes, vals{i},voiceType); 158 | end 159 | % set the harmonics that are going to be considered 160 | harmonics=[-1 0 1]; 161 | harmonics2=[-1 0 1 12 19 24 28 31 36]; 162 | 163 | % run audio analysis 164 | fpitchAll=audio_to_pitch_via_FB(audio,parameter); 165 | hop = length(audio)/size(fpitchAll,2); 166 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 167 | 168 | 169 | %%%%%%% NAME %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 170 | % initialize indexing cell array 171 | for i = 1 : maxNotes 172 | idxCell{i}=1; 173 | end 174 | for i = 1 : length(onsetMap) 175 | %for i = 2 : length(onsetMap)-1 176 | numVoices = sum(onsetMap(i,:),2); 177 | try 178 | fpitch{i}=fpitchAll(:,round((onVals(i)-offset1)*sr/hop):round((onVals(i)+offset2)*sr/hop)); 179 | catch 180 | fpitch{i}=fpitchAll(:,max(1,round((onVals(i)-offset1)*sr/hop)):end); 181 | end 182 | numFrames(i)=size(fpitch{i},2); 183 | lengthSignal(i)=length(audio(max(1,round((onVals(i)-offset1)*sr)):max(round((onVals(i)+offset2)*sr),1))); 184 | [a,b,c]=find(onsetMap(i,:), size(nmat,2)); 185 | num = 1; 186 | for j = b 187 | obs{i}(num,:)=db(sum(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics,:))); 188 | if sum(onsetMap(i+1:end,j))~=0 189 | 190 | % db of sum fpitch vals - no harmonics 191 | obs{i}(num+1,:)=db(sum(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics,:))); 192 | 193 | % alternative features 194 | % % db of mean fpitch vals - no harmonics 195 | % db(mean(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics,:))); 196 | % db(mean(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics,:))); 197 | % 198 | % % db of mean fpitch vals - harmonics 199 | % db(mean(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics2,:))); 200 | % db(mean(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics2,:))); 201 | % 202 | % % db of sum fpitch vals - harmonics 203 | % db(sum(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics2,:))); 204 | % db(sum(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics2,:))); 205 | 206 | idxCell{j}=idxCell{j}+1; 207 | 208 | else 209 | obs{i}(num+1,:)=db(sum(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics,:))); 210 | % numVoices = numVoices-1; 211 | % b = b(b~=j); 212 | end 213 | num = num + 2; 214 | end 215 | 216 | if numVoices 217 | for j = 1 : size(versions{numVoices},1) 218 | if all(versions{numVoices}(j,:)==b); 219 | idx = j; 220 | end 221 | end 222 | 223 | % get appropriate trans, meansSeed, covarsSeed, and calculate mixmat 224 | curTrans = trans{numVoices}; 225 | 226 | curMeansSeed = meansSeed{3}{numVoices}{idx}; 227 | curCovarsSeed = covarsSeed{3}{numVoices}{idx}; 228 | mixmat = ones(length(curMeansSeed),1); 229 | sState = startingState{numVoices}; 230 | states = [1 2 3]; 231 | 232 | if i == 1 233 | 234 | curTrans = curTrans(sum(notesInd{numVoices}==1,1)<1,sum(notesInd{numVoices}==1,1)<1); 235 | curMeansSeed = curMeansSeed(:,sum(notesInd{numVoices}==1,1)<1); 236 | curCovarsSeed = curCovarsSeed(:,:,sum(notesInd{numVoices}==1,1)<1); 237 | mixmat = mixmat(sum(notesInd{numVoices}==1,1)<1); 238 | sState = sState(sum(notesInd{numVoices}==1,1)<1); 239 | sState(1) = 1; 240 | notesIndTmp{i}=notesInd{numVoices}(:,sum(notesInd{4}==1,1)<1); 241 | states = [2 3]; 242 | 243 | % curTrans = curTrans(sum(notesInd{numVoices}~=3)>(maxNotes-1),:); 244 | % curMeansSeed = curMeansSeed(:,sum(notesInd{numVoices}~=3)>(maxNotes-1)); 245 | % curCovarsSeed = curCovarsSeed(:,:,sum(notesInd{numVoices}~=3)>(maxNotes-1)); 246 | % mixmat = mixmat(sum(notesInd{numVoices}~=3)>(maxNotes-1)); 247 | % sState = sState(sum(notesInd{numVoices}~=3)>(maxNotes-1)); 248 | % notesIndTmp=notesInd{maxNotes}(:,sum(notesInd{numVoices}~=3)>(maxNotes-1)); 249 | elseif i == length(onsetMap) 250 | curTrans = curTrans(sum(notesInd{numVoices}<3,1)>(numVoices-1),:); 251 | curMeansSeed = curMeansSeed(:,sum(notesInd{numVoices}<3,1)>(numVoices-1)); 252 | curCovarsSeed = curCovarsSeed(:,:,sum(notesInd{numVoices}<3,1)>(numVoices-1)); 253 | mixmat = mixmat(sum(notesInd{numVoices}<3,1)>(numVoices-1)); 254 | sState = sState(sum(notesInd{numVoices}<3,1)>(numVoices-1)); 255 | states = [1 2]; 256 | notesIndTmp{i}=notesInd{numVoices}(:,sum(notesInd{numVoices}<3,1)>(numVoices-1)); 257 | else 258 | notesIndTmp{i}=notesInd{numVoices}; 259 | end 260 | 261 | like1{i} = mixgauss_prob(obs{i}, curMeansSeed, curCovarsSeed, mixmat,1); 262 | like1{i}(:,1)=[1; zeros(length(like1{i}(:,end))-1,1)]; 263 | like1{i}(:,end)=[zeros(length(like1{i}(:,end))-1,1); 1]; 264 | vpath1{i}=viterbi_path(sState, curTrans, like1{i}); 265 | end 266 | 267 | % for each note 268 | % i is the note 269 | % b(j) is the voice 270 | for j = 1 : numVoices 271 | try 272 | noteVals{i}{j}=notesIndTmp{i}(j,vpath1{i}); 273 | end 274 | for m = states 275 | try 276 | notePos{i}{j}(m)=find(noteVals{i}{j}==m,1,'last'); 277 | catch 278 | notePos{i}{j}(m)=notePos{i}{j}(m-1); 279 | end 280 | end 281 | end 282 | 283 | end 284 | 285 | 286 | 287 | 288 | % % last note 289 | numVoices=maxNotes; 290 | curTrans = trans{numVoices}; 291 | idxEnd=sum(notesInd{numVoices}<3,1)>(numVoices-1); 292 | curTrans = curTrans(idxEnd,idxEnd); 293 | 294 | curMeansSeed = meansSeed{3}{numVoices}{1}; 295 | curMeansSeed = curMeansSeed(:,idxEnd); 296 | 297 | curCovarsSeed = covarsSeed{3}{numVoices}{1}; 298 | curCovarsSeed = curCovarsSeed(:,:,idxEnd); 299 | 300 | mixmat = ones(length(curMeansSeed),1); 301 | %mixmat = mixmat(sum(notesInd{numVoices}<3,1)>(numVoices-1)); 302 | 303 | sState = startingState{numVoices}; 304 | sState = sState(1:length(mixmat)); 305 | 306 | states = [1 2]; 307 | 308 | 309 | lastOffset=length(onsetMap)+1; 310 | notesIndTmp{lastOffset}=notesInd{numVoices}(:,idxEnd); 311 | fpitch{lastOffset}=fpitchAll(:,round((onVals(end)+offset1)*sr/hop):end); 312 | numFrames(lastOffset)=size(fpitch{lastOffset},2); 313 | lengthSignal(lastOffset)=length(audio(max(1,round((onVals(end)+offset1)*sr)):end)); 314 | num = 1; 315 | for note = 1 : numVoices 316 | obs{lastOffset}(num,:)=db(sum(fpitch{lastOffset}(nmat{note}(idxCell{note},4)+harmonics,:))); 317 | obs{lastOffset}(num+1,:)=db(sum(fpitch{lastOffset}(nmat{note}(idxCell{note},4)+harmonics,:))) 318 | num = num + 2; 319 | end 320 | 321 | like1{lastOffset} = mixgauss_prob(obs{lastOffset}, curMeansSeed, curCovarsSeed, mixmat,1); 322 | like1{lastOffset}(:,1)=[1; zeros(length(like1{lastOffset}(:,end))-1,1)]; 323 | like1{lastOffset}(:,end)=[zeros(length(like1{lastOffset}(:,end))-1,1); 1]; 324 | vpath1{lastOffset}=viterbi_path(sState, curTrans, like1{lastOffset}); 325 | 326 | for j = 1 : numVoices 327 | noteVals{lastOffset}{j}=notesIndTmp{lastOffset}(j,vpath1{lastOffset}); 328 | 329 | for m = states 330 | notePos{lastOffset}{j}(m)=find(noteVals{lastOffset}{j}==m,1,'last'); 331 | % catch 332 | % notePos{lastOffset}{j}(m)=notePos{lastOffset}{j}(m-1); 333 | % end 334 | end 335 | end 336 | 337 | 338 | 339 | 340 | for i = 1 : length(onsetMap) 341 | for j = find(onsetMap(i,:)): sum(onsetMap(i,:)) 342 | % if onsetMap(i,j) == 1 && sum(onsetMap(i+1:end,j))~=0 343 | noteSecs{i}{j}=notePos{i}{j}*lengthSignal(i)/numFrames(i)/sr+onVals(i)-offset1; 344 | if i > 1 345 | % this doesn't work 346 | estimatedOffs{j}(i-1) = noteSecs{i}{j}(1); 347 | end 348 | estimatedOns{j}(i) = noteSecs{i}{j}(2); 349 | % else 350 | % estimatedOffs{j}(i)=0; 351 | % estimatedOns{j}(i)=0; 352 | % end 353 | end 354 | end 355 | 356 | for j = 1 : maxNotes 357 | noteSecs{lastOffset}{j}=notePos{lastOffset}{j}*lengthSignal(lastOffset)/numFrames(lastOffset)/sr+onVals(end)+offset1; 358 | estimatedOffs{j}(length(estimatedOns{1}))=noteSecs{lastOffset}{j}(1); 359 | end 360 | 361 | return -------------------------------------------------------------------------------- /selectStates.m: -------------------------------------------------------------------------------- 1 | function cumsumvals2=selectStates(startingState,prior,... 2 | trans,meansFull,covarsFull,mixmat,obs,stateO,noteNum,sr) 3 | 4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 5 | % [vpath2,histvals2,cumsumvals2]=selectStates(startingState,prior,trans, 6 | % meansFull,covarsFull,mixmat,obs,stateO,noteNum,sr) 7 | % 8 | % Description: 9 | % Refines the HMM parameters according to the modified state 10 | % sequence vector (stateO) passed into the function. 11 | % 12 | % Inputs: 13 | % startingState - starting state for the HMM 14 | % prior - prior matrix from DTW alignment 15 | % trans - transition matrix 16 | % meansFull - means matrix 17 | % covarsFull - covariance matrix 18 | % mixmat - matrix of priors for GMM for each state 19 | % obs - two row matrix observations (aperiodicty and power) 20 | % stateO - modified state order sequence 21 | % noteNum - number of notes to be aligned 22 | % sr - sampling rate 23 | % 24 | % Outputs: 25 | % vpath2 - viterbi path 26 | % histvals2 - tally of the number of frames in each state 27 | % cumsumvals2 - ending time of each state in seconds 28 | % 29 | % Dependencies: 30 | % Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab. 31 | % Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html 32 | % 33 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 34 | % http://www.ampact.org 35 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved. 36 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 37 | 38 | % create new versions the inputted variables based on the state sequence 39 | % StateO 40 | vec = (stateO + (noteNum - 1)*4); 41 | startingState2 = startingState(vec, :); 42 | prior2 = prior(vec, :); 43 | trans2 = trans(vec, vec); 44 | trans2 = diag(1./sum(trans2,2))*trans2; 45 | meansFull2 = meansFull(:,vec); 46 | covarsFull2 = covarsFull(:,:,vec); 47 | mixmat2 = mixmat(vec,:); 48 | 49 | % calculate the likelihood and vitiberi path with the new variables 50 | like2 = mixgauss_prob(obs, meansFull2, covarsFull2, mixmat2); 51 | vpath2=viterbi_path(startingState2, trans2, prior2.*like2); 52 | 53 | % create a vector of the modified alignment times 54 | histvals2 = hist(vpath2, 1:max(vpath2)); 55 | cumsumvals2 = cumsum(histvals2*32/sr); 56 | -------------------------------------------------------------------------------- /smoothNote.m: -------------------------------------------------------------------------------- 1 | function smoothed = smoothNote(x, x_mid, y_mid) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % smoothed = smoothNote(x, x_mid, y_mid) 5 | % 6 | % Description: Generate a smoothed trajectory of a note by connecting the 7 | % midpoints between peaks and troughs. 8 | % 9 | % Inputs: 10 | % x - inputted signal 11 | % x_mid - midpoint locations in x axis between peaks and troughs 12 | % y_mid - midpoint locations in y axis between peaks and troughs 13 | % 14 | % Outputs: 15 | % smoothed - smoothed version of inputted signal x 16 | % 17 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 18 | % http://www.ampact.org 19 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 20 | % (mim@mr-pc.org), all rights reserved 21 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 22 | 23 | % Make a note the same size as x 24 | smoothed = zeros(size(x)); 25 | 26 | % But only populate it with non-zero elements between the x_mid values 27 | x = min(x_mid) : max(x_mid); 28 | 29 | % Interpolate the mid points at all of the sample points in the signal 30 | smoothed(x) = interp1(x_mid, y_mid, x); -------------------------------------------------------------------------------- /visualiser.m: -------------------------------------------------------------------------------- 1 | function visualiser(trace,mid,spec) 2 | 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | % visualiser(trace,sig,sr,mid,highlight) 5 | % 6 | % Description: 7 | % Plots a gross DTW alignment overlaid with the fine alignment 8 | % resulting from the HMM aligner on the output of YIN. Trace(1,:) 9 | % is the list of states in the hmm (currently ignored, assumed to 10 | % be 1,2,3,2,1,2,3,2...), and trace(2,:) is the number of YIN 11 | % frames for which that state is occupied. Highlight is a list of 12 | % notes for which the steady state will be highlighted. 13 | % 14 | % Inputs: 15 | % trace - 3-D matrix of a list of states (trace(1,:)), the times 16 | % they end at (trace(2,:)), and the state indices (trace(3,:)) 17 | % mid - midi file 18 | % spec - spectogram of audio file (from alignmidiwav.m) 19 | % 20 | % Dependencies: 21 | % Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from: 22 | % https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials 23 | % /miditoolbox/ 24 | % 25 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 26 | % http://www.ampact.org 27 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel 28 | % (mim@mr-pc.org), all rights reserved. 29 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 30 | 31 | % Fix for ending zeros that mess up the plot 32 | if trace(2,end)==0 33 | trace=trace(:,1:end-1); 34 | end 35 | if trace(2, end-1)==0 36 | trace(2,end-1)=trace(2,end-2); 37 | end 38 | 39 | % hop size between frames 40 | stftHop = 0.025; 41 | 42 | % read midi file 43 | nmat=readmidi(mid); 44 | 45 | % plot spectogram of audio file 46 | imagesc(20*log10(spec)); 47 | title(['Spectrogram with Aligned MIDI Notes Overlaid']); 48 | xlabel(['Time (.05s)']); 49 | ylabel(['Midinote']); 50 | axis xy; 51 | caxis(max(caxis)+[-50 0]) 52 | colormap(1-gray) 53 | 54 | % zoom in fundamental frequencies 55 | notes = nmat(:,4)'; 56 | notes = (2.^((notes-105)/12))*440; 57 | notes(end+1) = notes(end); 58 | nlim = length(notes); 59 | 60 | % plot alignment 61 | plotFineAlign(trace(1,:), trace(2,:), notes(1:nlim), stftHop); 62 | if size(trace,1) >= 3 63 | notenums = trace(3,2:end); 64 | else 65 | nlim = length(notes); 66 | notenums = [reshape(repmat(1:nlim,4,1),1,[]) nlim]; 67 | end 68 | 69 | 70 | --------------------------------------------------------------------------------