├── .DS_Store
├── .gitignore
├── LICENSE.md
├── README
├── SingingMeansCovars.mat
├── alignmentVisualiser.m
├── example.mid
├── example.txt
├── example.wav
├── exampleFixed.txt
├── examplePerformance.mid
├── exampleScript.m
├── fillpriormat_gauss.m
├── filltransmat.m
├── findMids.m
├── findPeaks.m
├── findSteady.m
├── genMeansCovars.m
├── genPolyTrans.m
├── getCentVals.m
├── getLoudnessEstimates.m
├── getOnsOffs.m
├── getPitchVibratoData.m
├── getPitchVibratoDynamicsData.m
├── getTimingData.m
├── getVals.m
├── hzcents.m
├── noteDct.m
├── perceivedPitch.m
├── plotFineAlign.m
├── polyExample.mid
├── polyExample.wav
├── polySingingMeansCovars.mat
├── readme.txt
├── runAlignment.m
├── runDTWAlignment.m
├── runHMMAlignment.m
├── runPolyAlignment.m
├── runPolyAlignment.m~
├── selectStates.m
├── smoothNote.m
└── visualiser.m


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .m~
2 | .DS_STORE
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | ISC License
 2 | 
 3 | Copyright (c) 2011–2021, Johanna Devaney and Michael Mandel
 4 | 
 5 | Permission to use, copy, modify, and/or distribute this software for any
 6 | purpose with or without fee is hereby granted, provided that the above
 7 | copyright notice and this permission notice appear in all copies.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
  1 | Example Usage
  2 | - from the included script ensembleScript.m 
  3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | % exampleScript.m
  5 | %
  6 | % Description: 
  7 | %   Example of how to use the HMM alignment algorithm
  8 | %
  9 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
 10 | % http://www.ampact.org
 11 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved.
 12 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 13 |   
 14 | % audio file to be aligned
 15 | audiofile=('example.wav');
 16 |  
 17 | % MIDI file to be aligned
 18 | midifile=('example.mid');
 19 |  
 20 | % number of notes to align
 21 | numNotes=6;
 22 |  
 23 | % vector of order of states (according to lyrics) in stateOrd and 
 24 | % corresponding note numbers in noteNum
 25 | %   1 indicates a rest at the beginning of ending of the note
 26 | %   2 indicates a transient at the beginning or ending of the note
 27 | %   3 indicates a steady state section
 28 | % the following encoding is for six syllables "A-ve Ma-ri-(i)-a"
 29 | %  syllable      A-ve Ma-ri-(i)-a
 30 | %  state type   13 23 23 23  3  31
 31 | %  note number  11 22 33 44  5  66
 32 | stateOrd  = [1 3 2 3 2 3 2 3 3 3 1];
 33 | noteNum =   [1 1 2 2 3 3 4 4 5 6 6];
 34 |  
 35 | % load singing means and covariances for the HMM alignment
 36 | load SingingMeansCovars.mat
 37 | means=sqrtmeans; 
 38 | covars=sqrtcovars;
 39 |  
 40 | % specify that the means and covariances in the HMM won't be learned 
 41 | learnparams=0;
 42 |  
 43 | % run the alignment
 44 | [allstate selectstate,spec,yinres]=runAlignment(audiofile, midifile, numNotes, stateOrd, noteNum, means, covars, learnparams);
 45 |  
 46 | % visualise the alignment
 47 | alignmentVisualiser(selectstate,midifile,spec,1);
 48 |  
 49 | % get onset and offset times
 50 | times=getOnsOffs(selectstate);
 51 |  
 52 | % write the onset and offset times to an audacity-readable file
 53 | dlmwrite('example.txt',[times.ons' times.offs'], 'delimiter', '\t');
 54 |  
 55 | % you can load 'example.txt' into audacity and correct any errors in the
 56 | % alignment, i.e., the offset error on the last note, and then reload the
 57 | % corrected labels into matlab
 58 | fixedLabels=load('exampleFixed.txt');
 59 | times.ons=fixedLabels(:,1)';
 60 | times.offs=fixedLabels(:,2)';
 61 |  
 62 | % map timing information to the quantized MIDI file   
 63 | nmatNew=getTimingData(midifile, times);
 64 | writemidi(nmatNew,'examplePerformance.mid')
 65 |  
 66 | % get cent values for each note
 67 | cents=getCentVals(times,yinres);
 68 |  
 69 | % calculate intervals size, perceived pitch, vibrato rate, and vibrato depth
 70 | [vibratoDepth, vibratoRate, intervalSize, perceivedPitch]=getPitchVibratoData(cents,yinres.sr); 
 71 |  
 72 | % get loudness values for each note using the Genesis Loudness Toolbox
 73 | [loudnessEstimates loudnessStructure]=getLoudnessEstimates(audiofile, times);
 74 |  
 75 | % get DCT values for each note
 76 | for i = 1 : length(cents)
 77 |     
 78 |     % find the peaks and troughs in the F0 trace for each note
 79 |     [mins{i} maxes{i}] = findPeaks(cents{i}, 100, yinres.sr/32, 60);
 80 |     
 81 |     % find the midpoints between mins and maxes in the F0 trace for each
 82 |     % note
 83 |     [x_mids{i} y_mids{i}] = findMids(cents{i}, mins{i}, maxes{i}, 100, yinres.sr/32);
 84 |     
 85 |     % generate a smoothed trajectory of a note by connecting the
 86 |     % midpoints between peaks and troughs.
 87 |     smoothedF0s{i}=smoothNote(cents{i}, x_mids{i}, y_mids{i});
 88 |     
 89 |     % find the steady-state portion of a note
 90 |     steady{i}(1:2)=findSteady(cents{i}, mins{i}, maxes{i}, x_mids{i}, y_mids{i}, 1);
 91 |     
 92 |     % compute the DCT of a signal and approximate it with the first 3 coefficients
 93 |     [dctVals{i}, approx{i}]=noteDct(smoothedF0s{i}(steady{i}(1):steady{i}(2)),3,yinres.sr/32);
 94 |  
 95 | end
 96 | 
 97 | ----------------
 98 | 
 99 | AMPACT Function Descriptions
100 | 
101 | runAlignment.m: Calls the DTW alignment function and refines the results with the HMM alignment algorithm, with both a basic and modified state spaces (based on the lyrics). 
102 | 
103 | getVals.m: Gets values for DTW alignment and YIN analysis of specified audio signal and MIDI file
104 | 
105 | runDTWAlignment.m: Performs a dynamic time warping alignment between specified audio and MIDI files.
106 | 
107 | runHMMAlignment.m: Refines DTW alignment values with a three-state HMM, identifying silence,transient, and steady state parts of the signal. The HMM uses the DTW alignment as a prior. 
108 | 
109 | filltransmat.m: Makes a transition matrix from a seed transition matrix.  
110 | 
111 | fillpriormat_gauss.m: Creates a prior matrix based on the DTW alignment (supplied by the input variables ons and offs). 
112 | 
113 | selectStates.m: Refines the HMM parameters according to the modified state  sequence vector passed into the function.
114 | 
115 | alignmentVisualiser.m: Plots a gross DTW alignment overlaid with the fine alignment resulting from the HMM aligner on the output of YIN.  
116 | 
117 | getTimingData: Create a note matrix with performance timings.
118 | 
119 | getCentVals: Get cent values (in relation to A, 440 Hz) for each note.
120 | 
121 | getPitchVibratoData: Calculate vibrato depth, vibrato rate, perceived pitch, and interval size for the notes in the inputted cell array cents.
122 | 
123 | getLoudnessEstimates: Get loudness estimate based on Glasberg and Moore (2002) for time-varying sounds using the Loudness Toolbox.
124 | 
125 | findPeaks: Find peaks and troughs in a signal.
126 | 
127 | findMids: Find the midpoints between mins and maxes in a signal.
128 | 
129 | smoothNote: Generate a smoothed trajectory of a note by connecting the midpoints between peaks and troughs.
130 | 
131 | noteDct: Compute the DCT of a signal and approximate it with a specified number of coefficients.
132 | 
133 | ----------------
134 | 
135 | AMPACT Dependencies
136 | 
137 | You will need to have the following toolkits installed and in your path
138 |   de Cheveigné, A. 2002. YIN MATLAB implementation Available from: http://audition.ens.fr/adc/sw/yin.zip
139 |   Ellis, D. P. W. 2003. Dynamic Time Warp (DTW) in Matlab. Available from: http://www.ee.columbia.edu/~dpwe/resources/matlab/dtw/ 
140 |   Ellis, D. P. W. 2008. Aligning MIDI scores to music audio. Available from: http://www.ee.columbia.edu/~dpwe/resources/matlab/alignmidiwav/ 
141 |   Genesis Acoustics. 2010. Loudness Toolbox for Matlab. Available from http://www.genesis-acoustics.com/index.php?page=32 
142 |   Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab. Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html 
143 |  Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from:  https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials/miditoolbox/
144 | 
145 | ----------------
146 | 
147 | Papers on algorithms developed for AMPACT
148 | 
149 | Devaney, J., M. I. Mandel, and I. Fujinaga. 2011. Characterizing Singing Voice Fundamental Frequency Trajectories. Proceedings of the 2011 Workshop on Applications of Signal Processing to Audio and Acoustics.
150 | Devaney, J., M. I. Mandel, D. P. W. Ellis, and I. Fujinaga. 2010. Automatically extracting performance data from recordings of trained singers. Psychomusicology: Music, Mind & Brain. 21(1–2): in press.
151 | Devaney, J. 2011. An empirical study of the influence of musical context on intonation practices in solo singers and SATB ensembles. Ph. D. Dissertation. McGill University.
152 | Devaney, J., M. I. Mandel, and D. P. W. Ellis. 2009. Improving MIDI-audio alignment with acoutics features. In Proceedings of the 2009 Workshop on Applications of Signal Processing to Audio and Acoustics.
153 | 
154 | ----------------
155 | 
156 | Papers on algorithms by other authors used by AMPACT
157 | 
158 | de Cheveigné, A., and H. Kawahara. 2002. YIN, a fundamental frequency estimator for speech and music. Journal of the Acoustical Society of America 111 (4): 1917–30.
159 | Orio, N., and D. Schwarz. 2001. Alignment of monophonic and polyphonic music to a score. In Proceedings of the International Computer Music Conference, 155–8.
160 | 


--------------------------------------------------------------------------------
/SingingMeansCovars.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/SingingMeansCovars.mat


--------------------------------------------------------------------------------
/alignmentVisualiser.m:
--------------------------------------------------------------------------------
  1 | function alignmentVisualiser(trace,mid,spec,fig)
  2 | 
  3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | % alignmentVisualiser(trace,sig,sr,mid,highlight)
  5 | %
  6 | % Description: 
  7 | %  Plots a gross DTW alignment overlaid with the fine alignment
  8 | %  resulting from the HMM aligner on the output of YIN.  Trace(1,:)
  9 | %  is the list of states in the HMM, and trace(2,:) is the number of YIN
 10 | %  frames for which that state is occupied.  Highlight is a list of 
 11 | %  notes for which the steady state will be highlighted.
 12 | %
 13 | % Inputs:
 14 | %  trace - 3-D matrix of a list of states (trace(1,:)), the times   
 15 | %          they end at (trace(2,:)), and the state indices (trace(3,:))
 16 | %  mid - midi file
 17 | %  spec - spectogram of audio file (from alignmidiwav.m)
 18 | %
 19 | % Dependencies:
 20 | %   Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from:
 21 | %     https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials
 22 | %      /miditoolbox/
 23 | %
 24 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
 25 | % http://www.ampact.org
 26 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
 27 | %                    (mim@mr-pc.org), all rights reserved.
 28 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 29 | 
 30 | if ~exist('fig', 'var'), fig=1; end
 31 | 
 32 | % Fix for ending zeros that mess up the plot
 33 | if trace(2,end)==0
 34 |     trace=trace(:,1:end-1);
 35 | end
 36 | if trace(2, end-1)==0
 37 |     trace(2,end-1)=trace(2,end-2);
 38 | end
 39 | 
 40 | % hop size between frames
 41 | stftHop = 0.025;
 42 | 
 43 | % read midi file
 44 | nmat=readmidi(mid);
 45 | 
 46 | % plot spectogram of audio file
 47 | figure(fig)
 48 | imagesc(20*log10(spec));
 49 | title(['Spectrogram with Aligned MIDI Notes Overlaid']); 
 50 | xlabel(['Time (.05s)']); 
 51 | ylabel(['Midinote']); 
 52 | axis xy;
 53 | caxis(max(caxis)+[-50 0])
 54 | colormap(1-gray)
 55 | 
 56 | % zoom in fundamental frequencies
 57 | notes = nmat(:,4)';
 58 | notes = (2.^((notes-105)/12))*440;
 59 | notes(end+1) = notes(end);
 60 | nlim = length(notes);
 61 | 
 62 | % plot alignment
 63 | plotFineAlign(trace(1,:), trace(2,:), notes(1:nlim), stftHop);
 64 | if size(trace,1) >= 3
 65 |     notenums = trace(3,2:end);
 66 | else
 67 |     nlim = length(notes);
 68 |     notenums = [reshape(repmat(1:nlim,4,1),1,[]) nlim];
 69 | end
 70 | 
 71 | 
 72 | function plotFineAlign(stateType, occupancy, notes, stftHop)
 73 | 
 74 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 75 | % plotFineAlign(stateType, occupancy, notes, stftHop, highlight)
 76 | %
 77 | % Description: 
 78 | %  Plot the HMM alignment based on the output of YIN.  StateType is the 
 79 | %  list of states in the HMM, and occupancy is the number of YIN frames 
 80 | %  for which that state is occupied.  Notes is a list of midi note numbers 
 81 | %  that are played, should be one note for each [3] in stateType.  If the 
 82 | %  highlight vector is supplied, it should contain indices of the states 
 83 | %  to highlight by plotting an extra line at the bottom of the window.
 84 | %
 85 | % Inputs:
 86 | %  stateType - vector with a list of states
 87 | %  occupancy - vector indicating the time (in seconds) at which the states 
 88 | %              in stateType end
 89 | %  notes - vector of notes from MIDI file
 90 | %  stftHop - the hop size between frames in the spectrogram
 91 | %
 92 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
 93 | % http://www.ampact.org
 94 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
 95 | %                    (mim@mr-pc.org), all rights reserved.
 96 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 97 | 
 98 | % Plot the 4 states: silence in red, beginning transient in green,
 99 | % steady state in blue, ending transient in green.
100 | 
101 | styles = {{'r+-', 'LineWidth', 2}, 
102 |           {'g+-', 'LineWidth', 2}, 
103 |           {'b+-', 'LineWidth', 2}};
104 | 
105 | cs = occupancy /stftHop;
106 | segments = [cs(1:end-1); cs(2:end)]';
107 | 
108 | hold on
109 | 
110 | stateNote = max(1, cumsum(stateType == 3)+1);
111 | for i=1:size(segments,1)
112 |     plot(segments(i,:)', repmat(notes(stateNote(i)),2,1), styles{stateType(i+1)}{:})
113 | end
114 | 
115 | hold off
116 | 


--------------------------------------------------------------------------------
/example.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/example.mid


--------------------------------------------------------------------------------
/example.txt:
--------------------------------------------------------------------------------
1 | 0.98177	4.3443
2 | 4.4161	4.8849
3 | 4.9328	5.588
4 | 5.6374	7.7751
5 | 7.7751	9.7009
6 | 9.7009	12.936
7 | 


--------------------------------------------------------------------------------
/example.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/example.wav


--------------------------------------------------------------------------------
/exampleFixed.txt:
--------------------------------------------------------------------------------
1 | 0.981770	4.344300	4.416100	4.884900	4.932800	5.588000	5.637400	9.148700	9.148700	9.724100	9.724100	11.732525	


--------------------------------------------------------------------------------
/examplePerformance.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/examplePerformance.mid


--------------------------------------------------------------------------------
/exampleScript.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/exampleScript.m


--------------------------------------------------------------------------------
/fillpriormat_gauss.m:
--------------------------------------------------------------------------------
  1 | function prior = fillpriormat_gauss(Nobs,ons,offs,Nstates)
  2 | 
  3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | % prior = fillpriormat_gauss(Nobs,ons,offs,Nstates)
  5 | %
  6 | % Description:
  7 | %  Creates a prior matrix based on the DTW alignment (supplied by the input
  8 | %  variables ons and offs. A rectangular window with half a Gaussian on
  9 | %  each side over the onsets and offsets estimated by the DTW alignment.
 10 | %
 11 | % Inputs:
 12 | %  Nobs - number of observations
 13 | %  ons - vector of onset times predicted by DTW alignment
 14 | %  offs - vector of offset times predicted by DTW alignment
 15 | %  Nstates - number of states in the hidden Markov model
 16 | %
 17 | % Outputs: 
 18 | %  prior - prior matrix based on DTW alignment
 19 | %
 20 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
 21 | % http://www.ampact.org - Johanna Devaney, 2011
 22 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
 23 | %                    (mim@mr-pc.org), all rights reserved.
 24 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 25 | 
 26 | if ~exist('Nstates', 'var'), Nstates = 5; end
 27 | 
 28 | Nnotes = length(ons);
 29 | prior  = sparse(Nnotes*(Nstates-1)+1,Nobs);
 30 | frames = 1:Nobs;
 31 | 
 32 | for i=1:Nnotes
 33 |   row = (i-1)*(Nstates-1);
 34 |   insert = Nstates-5;
 35 | 
 36 |   % Silence  
 37 |   prior(row+1,:) = flatTopGaussian(frames, gh(ons,i-1,offs,i-1,frames,.5), ...
 38 |         g(offs,i-1,frames), g(ons,i,frames), gh(ons,i,offs,i,frames,.5));
 39 |                                
 40 |   prior(row+2:row+2+insert-1,:) = repmat(prior(row+1,:),insert,1);
 41 | 
 42 |   % Transient, steady state, transient
 43 |   prior(row+2+insert,:) = ...
 44 |       flatTopGaussian(frames, g(offs,i-1,frames), ...
 45 |         gh(offs,i-1,ons,i,frames,.75), gh(ons,i,offs,i,frames,.25), g(offs,i,frames));
 46 |   prior(row+3+insert,:) = ...
 47 |       flatTopGaussian(frames, g(offs,i-1,frames), ...
 48 |         g(ons,i,frames), g(offs,i,frames), g(ons,i+1,frames));
 49 |   prior(row+4+insert,:) = ...
 50 |       flatTopGaussian(frames, g(ons,i,frames), ...
 51 |         gh(ons,i,offs,i,frames,.75), gh(offs,i,ons,i+1,frames,.25), g(ons,i+1,frames));
 52 | 
 53 | end
 54 | 
 55 | % The last silence
 56 | i = i+1;
 57 | prior(row+5+insert,:) = flatTopGaussIdx(frames, ons,i-1, offs,i-1, ...
 58 |                                    offs,i, ons,i+1);
 59 |                                
 60 | function x = gh(v1, i1, v2, i2, domain, frac)
 61 | 
 62 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 63 | % x = gh(v1, i1, v2, i2, domain, frac) 
 64 | %
 65 | % Description:
 66 | %   Get an element that is frac fraction of the way between v1(i1) and
 67 | %   v2(i2), but check bounds on both vectors.  Frac of 0 returns v1(i1), 
 68 | %   frac of 1 returns v2(i2), frac of 1/2 (the default) returns half way 
 69 | %   between them.
 70 | %
 71 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
 72 | % http://www.ampact.org
 73 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
 74 | %                    (mim@mr-pc.org), all rights reserved.
 75 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 76 | 
 77 | if ~exist('frac', 'var'), frac = 0.5; end
 78 | 
 79 | x1 = g(v1, i1, domain);
 80 | x2 = g(v2, i2, domain);
 81 | x = floor(frac*x1 + (1-frac)*x2);
 82 | 
 83 | function w = flatTopGaussIdx(x, b1,bi1, t1,ti1, t2,ti2, b2,bi2)
 84 | 
 85 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 86 | % flatTopGaussIdx(x, b1,bi1, t1,ti1, t2,ti2, b2,bi2)
 87 | % 
 88 | % Description:
 89 | %   Create a window function that is zeros, going up to 1s with the left
 90 | %   half of a gaussian, then ones, then going back down to zeros with
 91 | %   the right half of another gaussian.  b1(bi1) is the x coordinate 2
 92 | %   stddevs out from the mean, which is at t1(ti1).  t2(ti2) is the x
 93 | %   coordinate of the mean of the second gaussian and b2(bi2) is 2
 94 | %   stddevs out from that.  The points should be in that order.  Vectors
 95 | %   are indexed intelligently, so you don't have to worry about
 96 | %   overflows or underflows.  X is the set of points over which this is
 97 | %   to be calculated.
 98 | %
 99 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
100 | % http://www.ampact.org
101 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
102 | %                    (mim@mr-pc.org), all rights reserved.
103 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
104 | 
105 | b1 = g(b1, bi1, x);
106 | t1 = g(t1, ti1, x);
107 | t2 = g(t2, ti2, x);
108 | b2 = g(b2, bi2, x);
109 | w = flatTopGaussian(x, b1, t1, t2, b2);
110 | 
111 | 
112 | 
113 | function x = g(vec, idx, domain)
114 | 
115 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
116 | % x = g(vec, idx, domain)
117 | % 
118 | % Description:
119 | %   Get an element from vec, checking bounds.  Domain is the set of points
120 | %   that vec is a subset of.
121 | %
122 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
123 | % http://www.ampact.org
124 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
125 | %                    (mim@mr-pc.org), all rights reserved.
126 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
127 | 
128 | if idx < 1
129 |   x = 1;
130 | elseif idx > length(vec)
131 |   x = domain(end);
132 | else
133 |   x = vec(idx);
134 | end
135 | 
136 | 
137 | 
138 | function w = flatTopGaussian(x, b1, t1, t2, b2)
139 | 
140 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
141 | % flatTopGaussian(x, b1, t1, t2, b2)
142 | % 
143 | % Description:
144 | %   Create a window function that is zeros, going up to 1s with the left 
145 | %   half of a gaussian, then ones, then going back down to zeros with the 
146 | %   right half of another gaussian.  b1 is the x coordinate 2 stddevs out 
147 | %   from the mean, which is at t1.  t2 is the x coordinate of the mean of 
148 | %   the second gaussian and b2 is 2 stddevs out from that.  The points 
149 | %   should be in that order.  X is the set of points over which this is 
150 | %   to be calculated.
151 | %
152 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
153 | % http://www.ampact.org
154 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
155 | %                    (mim@mr-pc.org), all rights reserved.
156 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
157 | 
158 | if any([b1 t1 t2] > [t1 t2 b2])
159 |     warning('Endpoints are not in order: %f %f %f %f', b1, t1, t2, b2)
160 | end
161 | 
162 | left   = normpdf(x, t1, (t1-b1)/2+1); 
163 | middle = ones(1,t2-t1-1);
164 | right  = normpdf(x, t2, (b2-t2)/2+1);
165 | 
166 | left  = left ./ max(left);
167 | right = right ./ max(right);
168 | 
169 | takeOneOut = (t1 == t2);
170 | w = [left(1:t1) middle right(t2+takeOneOut:end)];
171 |                                


--------------------------------------------------------------------------------
/filltransmat.m:
--------------------------------------------------------------------------------
 1 | function trans = filltransmat(transseed, notes)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % trans = filltransmat (transseed, notes)
 5 | %
 6 | % Description: 
 7 | %   Makes a transition matrix from a seed transition matrix.  The seed
 8 | %   matrix is composed of the states: steady state, transient, silence,
 9 | %   transient, steady state, but the full transition matrix starts and
10 | %   ends with silence, so the seed with be chopped up on the ends.
11 | %   Notes is the number of times to repeat the seed.  Transseed's first
12 | %   and last states should be equivalent, as they will be overlapped
13 | %   with each other.
14 | %
15 | % Inputs:
16 | %   transseed - transition matrix seed
17 | %   notes - number of notes being aligned
18 | %
19 | % Outputs: 
20 | %   trans - transition matrix
21 | %
22 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
23 | % http://www.ampact.org
24 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
25 | %                    (mim@mr-pc.org), all rights reserved.
26 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
27 | 
28 | % Set up transition matrix
29 | N = size(transseed,1);
30 | trans = zeros(notes*(N-1)+1,notes*(N-1)+1);
31 | Non2 = ceil(N/2);
32 | 
33 | % Fill in first and last parts of the big matrix with the
34 | % appropriate fragments of the seed
35 | trans(1:Non2, 1:Non2) = transseed(Non2:end, Non2:end);
36 | trans(end-Non2+1:end, end-Non2+1:end) = transseed(1:Non2, 1:Non2);
37 | 
38 | % Fill in the middle parts of the big matrix with the whole seed
39 | for i = Non2 : N-1 : (notes-1)*(N-1)+1 - Non2+1
40 |   trans(i+(1:N)-1,i+(1:N)-1) = transseed;
41 | end
42 | 


--------------------------------------------------------------------------------
/findMids.m:
--------------------------------------------------------------------------------
 1 | function [x_mids y_mids] = findMids(x, mins, maxes, windowLength_ms, sr)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % mids = findMids(x, mins, maxes, windowLength_ms, sr)
 5 | %
 6 | % Description: Find the midpoints between mins and maxes in a signal x.
 7 | %              mins and maxes could come from findPeaks.  Finds the y 
 8 | %              values of peaks and then finds the x values of the signal 
 9 | %              that are closest to the average between the min and max 
10 | %              peak.
11 | %
12 | % Inputs:
13 | %  x - inputted signal in cents
14 | %  mins - indices of minima of x
15 | %  maxes - indices of maxima of x
16 | %  windowLength_ms - window length in miliseconds
17 | %  sr - sampling rate of x (frame rate of frequency analysis)
18 | %
19 | % Outputs:
20 | %  x_mids - midpoint locations in x axis between peaks and troughs  
21 | %  y_mids - midpoint locations in y axis between peaks and troughs  
22 | %
23 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
24 | % http://www.ampact.org
25 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
26 | %                    (mim@mr-pc.org), all rights reserved
27 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
28 | 
29 | % window length in frames
30 | windowLength = round(windowLength_ms * sr / 2000) * 2;
31 | 
32 | % sort the peaks
33 | pks = sort([maxes mins]);
34 | 
35 | % average the frequency estimate of the points around each peak
36 | for i = 1:length(pks)
37 |     idx = max(pks(i)-windowLength/2, 1) : ...
38 |           min(pks(i)+windowLength/2, length(x));
39 |     neighborhoods(i) = mean(x(idx));
40 | end
41 | 
42 | % find the mid-points in frequency between peaks
43 | y_mids = (neighborhoods(1:end-1) + neighborhoods(2:end)) / 2;
44 | 
45 | % find the index of the point in the signal between each peak with its
46 | % value closest to the mid-point in frequency
47 | for i = 1:length(y_mids)
48 |     idx = pks(i):pks(i+1);
49 |     [d offset] = min(abs(y_mids(i) - x(idx)));
50 |     x_mids(i) = pks(i) + offset - 1;
51 | end


--------------------------------------------------------------------------------
/findPeaks.m:
--------------------------------------------------------------------------------
 1 | function [mins maxes] = findPeaks(x, windowLength_ms, sr, minCount)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % [mins maxes] = findPeaks(x, windowLength_ms, sr, minCount)
 5 | %
 6 | % Description: Find peaks and troughs in a waveform
 7 | %              Finds the max and min in a window of a given size and keeps 
 8 | %              track of how many windows each point is the min or max of.  
 9 | %              Points that are the min or max of more than minCount windows
10 | %              are returned.
11 | %
12 | % Inputs: 
13 | %  x - inputted signal
14 | %  windowLength_ms - window length in ms
15 | %  sr - sampling rate
16 | %  minCount - minimum number of windows that a point needs to be the max 
17 | %             of to be considered a minimum or a maximum
18 | %
19 | % Outputs:
20 | %  mins - minimum values in the signal
21 | %  maxes - maximum values in the signal
22 | %
23 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
24 | % http://www.ampact.org
25 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
26 | %                    (mim@mr-pc.org), all rights reserved
27 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
28 | 
29 | % create a vector of zeros for mins and maxes
30 | mins  = zeros(size(x));
31 | maxes = zeros(size(x));
32 | 
33 | % calculate window length in frames
34 | windowLength = windowLength_ms * sr / 1000;
35 | 
36 | % calculate the minimum and maximum value
37 | for i = 1:length(x) - windowLength
38 |     w = x(i:i+windowLength-1);
39 |     [d di] = min(w);
40 |     mins(i + di - 1) = mins(i + di - 1) + 1;
41 |     [d di] = max(w);
42 |     maxes(i + di - 1) = maxes(i + di - 1) + 1;
43 | end
44 | 
45 | % pruns mins and maxes to only those that occur in an equal to or greater
46 | % number windows specified in minCount
47 | mins  = find(mins  >= minCount);
48 | maxes = find(maxes >= minCount);


--------------------------------------------------------------------------------
/findSteady.m:
--------------------------------------------------------------------------------
 1 | function steady = findSteady(x, mins, maxes, x_mids, y_mids, thresh_cents)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % steady = findSteady(x, mins, maxes, x_mids, y_mids, thresh_cents)
 5 | %
 6 | % Description: Find the steady-state portion of a note.
 7 | %              Finds the section of the note with steady vibrato where the 
 8 | %              peaks and troughs are at least thresh_cents cents away from 
 9 | %              the mid points between them.  mins and maxes come from 
10 | %              findPeaks, x_mids and y_mids come from findMids.  Steady is 
11 | %              a range of two indices into f0. mins and maxes may come from
12 | %              the findPeaks function and x_mids and y_mids may come from
13 | %              the findMids function.
14 | %
15 | % Inputs:
16 | %  x - vector of f0 estimates in cents
17 | %  mins - indices of minima of x
18 | %  maxes - indices of maxima of x
19 | %  x_mids - midpoint locations in x axis between peaks and troughs  
20 | %  y_mids - midpoint locations in y axis between peaks and troughs  
21 | %  thresh_cents - minimum distance in cents from midpoint for peaks and
22 | %                 troughs
23 | %
24 | % Outputs:
25 | %  steady - steady-state portion of inputted signal x
26 | %
27 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
28 | % http://www.ampact.org
29 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
30 | %                    (mim@mr-pc.org), all rights reserved
31 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
32 | 
33 | % Find extrema that are far enough away from the midpoints
34 | peaks = sort([mins maxes]);
35 | excursion = y_mids - x(peaks(1:end-1));
36 | bigEnough = abs(excursion) >= thresh_cents;
37 | 
38 | % Count how many extrema are big enough in a row
39 | inARow(1) = double(bigEnough(1));
40 | for i = 2:length(bigEnough)
41 |     if bigEnough(i)
42 |         inARow(i) = inARow(i-1)+1;
43 |     else
44 |         inARow(i) = 0;
45 |     end
46 | end
47 | 
48 | % Extract the portion of the note corresponding to the longest run of big
49 | % enough extrema
50 | [times pos] = max(inARow);
51 | steadyPeaks = peaks([pos-times+1 pos]);
52 | steady = x_mids([find(x_mids > steadyPeaks(1), 1), ...
53 |                  find(x_mids < steadyPeaks(2), 1, 'last')]);
54 | steady = round(steady);


--------------------------------------------------------------------------------
/genMeansCovars.m:
--------------------------------------------------------------------------------
 1 | function [meansSeed covarsSeed versions]=genMeansCovars(notes, vals, voiceType)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % [meansSeed covarsSeed versions]=genMeansCovars(notes, vals)
 5 | %
 6 | % Description: Generate seed means and covariances matrices for specified 
 7 | %              voice type
 8 | %
 9 | % Inputs:
10 | %  notes - cell array of possible sequences
11 | %  vals - mean and covariance values
12 | %  voiceType - voice type (male or female)
13 | %
14 | % Outputs:
15 | %  meansSeed - mean seed matrix
16 | %  covarsSeed - covariance seed matrix
17 | %  versions - possible sequences of states for the number of voices 
18 | %
19 | % Dependencies:
20 | %   None
21 | %
22 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT)
23 | % http://www.ampact.org
24 | % (c) copyright 2014 Johanna Devaney (j@devaney.ca), all rights reserved.
25 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
26 | 
27 | % format of vals
28 | % vals{:}{1}{:} - male
29 | % vals{:}{2}{:} - female
30 | 
31 | numVoice = length(voiceType);
32 | 
33 | for i = 1 : numVoice
34 |     noteMean1(i,1) = vals{1}{voiceType(i)}(1);
35 |     noteMean1(i,2) = vals{2}{voiceType(i)}(1);
36 |     noteMean1(i,3) = vals{2}{voiceType(i)}(1);
37 |     noteCovar1(i,1) = vals{1}{voiceType(i)}(2);
38 |     noteCovar1(i,2) = vals{2}{voiceType(i)}(2);
39 |     noteCovar1(i,3) = vals{2}{voiceType(i)}(2);
40 |     noteMean2(i,1) = vals{2}{voiceType(i)}(1);
41 |     noteMean2(i,2) = vals{2}{voiceType(i)}(1);
42 |     noteMean2(i,3) = vals{1}{voiceType(i)}(1);
43 |     noteCovar2(i,1) = vals{2}{voiceType(i)}(2);
44 |     noteCovar2(i,2) = vals{2}{voiceType(i)}(2);
45 |     noteCovar2(i,3) = vals{1}{voiceType(i)}(2);
46 | end
47 | 
48 | for i = 1 : numVoice
49 |     versions{i}=nchoosek(1:numVoice,i);
50 | end
51 | 
52 | for nVoice = 1:length(versions)
53 |     for iVer = 1 : size(versions{nVoice},1)
54 |         nMean1 = noteMean1(versions{nVoice}(iVer,:),:);
55 |         nMean2 = noteMean2(versions{nVoice}(iVer,:),:);
56 |         nVar1 = noteCovar1(versions{nVoice}(iVer,:),:);
57 |         nVar2 = noteCovar2(versions{nVoice}(iVer,:),:);
58 |         notes2 = cat(1, notes{nVoice}{:})';
59 |         for v = 1 : nVoice
60 |             meansSeed{nVoice}{iVer}(2*v-1,:) = nMean1(v,notes2(v,:));
61 |             meansSeed{nVoice}{iVer}(2*v,:) = nMean2(v,notes2(v,:));
62 |             covarsSeed{nVoice}{iVer}(2*v-1,2*v-1,:) = nVar1(v,notes2(v,:));
63 |             covarsSeed{nVoice}{iVer}(2*v,2*v,:) = nVar2(v,notes2(v,:));
64 |         end
65 |     end
66 | end


--------------------------------------------------------------------------------
/genPolyTrans.m:
--------------------------------------------------------------------------------
  1 | function [voices trans]=genPolyTrans(selfWeight, skip2Weight, skip1Weight)
  2 | 
  3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | % [voices trans]=genPolyTrans(selfWeight, skip2Weight, skip1Weight)
  5 | %
  6 | % Description: Generate transition matrix for HMM
  7 | %
  8 | % Inputs:
  9 | %  selfWeight - relative weight given to self transitions
 10 | %  skip2Weight - relative weight given to transitions from 1->2 or 2->3
 11 | %  skip1Weight - relative weight given to transitions from 1->3
 12 | %
 13 | % Outputs:
 14 | %  voices - cell array of possible sequences
 15 | %  trans - transition matrix
 16 | %
 17 | % Dependencies:
 18 | %  None
 19 | %
 20 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT)
 21 | % http://www.ampact.org
 22 | % (c) copyright 2014 Johanna Devaney (j@devaney.ca), all rights reserved.
 23 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 24 | 
 25 | if ~exist('selfWeight', 'var') || isempty(selfWeight), selfWeight = 5; end
 26 | if ~exist('skip1Weight', 'var') || isempty(skip1Weight), skip1Weight = 1; end
 27 | if ~exist('skip2Weight', 'var') || isempty(skip2Weight), skip2Weight = 1; end
 28 | 
 29 | idx = 1; 
 30 | for i = 1 : 3
 31 |     voices{1}{idx} = i;
 32 |     idx = idx + 1;
 33 | end
 34 | 
 35 | idx = 1; 
 36 | for i = 1 : 3
 37 |     for j = 1 : 3
 38 |         voices{2}{idx} = [i j]; 
 39 |         idx = idx + 1; 
 40 |     end
 41 | end
 42 | 
 43 | idx = 1; 
 44 | for i = 1 : 3
 45 |     for j = 1 : 3
 46 |         for k = 1 : 3
 47 |             voices{3}{idx} = [i j k]; 
 48 |             idx = idx + 1; 
 49 |         end
 50 |     end
 51 | end
 52 | 
 53 | idx = 1; 
 54 | for i = 1 : 3
 55 |     for j = 1 : 3
 56 |         for k = 1 : 3
 57 |             for m = 1 : 3
 58 |                 voices{4}{idx} = [i j k m]; 
 59 |                 idx = idx + 1; 
 60 |             end
 61 |         end
 62 |     end
 63 | end
 64 | 
 65 | idx = 1; 
 66 | for i = 1 : 3
 67 |     for j = 1 : 3
 68 |         for k = 1 : 3
 69 |             for m = 1 : 3
 70 |                 for n = 1 : 3
 71 |                     voices{5}{idx} = [i j k m n]; 
 72 |                     idx = idx + 1; 
 73 |                 end
 74 |             end
 75 |         end
 76 |     end
 77 | end
 78 | 
 79 | idx = 1; 
 80 | for i = 1 : 3
 81 |     for j = 1 : 3
 82 |         for k = 1 : 3
 83 |             for m = 1 : 3
 84 |                 for n = 1 : 3
 85 |                     for p = 1 : 3
 86 |                         voices{6}{idx} = [i j k m n p]; 
 87 |                         idx = idx + 1; 
 88 |                     end
 89 |                 end
 90 |             end
 91 |         end
 92 |     end
 93 | end
 94 | 
 95 | 
 96 | for t = 1:length(voices)
 97 |     trans{t}=zeros(length(voices{t}));
 98 |     for i = 1 : size(trans{t},1)
 99 |         for j = i : size(trans{t},2)
100 |             if sum(voices{t}{i}==voices{t}{j}) >= length(voices{t}{j})-1
101 |                 stateChange = max(voices{t}{j} - voices{t}{i});
102 |                 if stateChange == 2  % 1->3
103 |                     trans{t}(i,j) = skip2Weight;
104 |                 elseif stateChange == 1  % 1->2 or 2->3
105 |                     trans{t}(i,j) = skip1Weight;
106 |                 elseif stateChange == 0  % 1->1 or 2->2 or 3->3
107 |                     trans{t}(i,j) = selfWeight;
108 |                 end
109 |             end
110 |         end
111 |     end
112 | 
113 |     % Normalize
114 |     trans{t} = bsxfun(@rdivide, trans{t}, sum(trans{t}, 2));
115 | 
116 | end
117 | 


--------------------------------------------------------------------------------
/getCentVals.m:
--------------------------------------------------------------------------------
 1 | function cents=getCentVals(times,yinres)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % cents=getCentVals(times,yinres)
 5 | %
 6 | % Description: Get cent values (in relation to A, 440 Hz) for each note
 7 | %
 8 | % Inputs:
 9 | %  times - onset and offset times
10 | %  yinres - structure of YIN values
11 | %
12 | % Outputs:
13 | %  cents - cell array of cent values for each note 
14 | %
15 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
16 | % http://www.ampact.org
17 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) 
18 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
19 | 
20 | % index into f0 estimates in YIN structure with onset and offset times
21 | for i = 1:length(times.ons)
22 |     cents{i}=yinres.f0(round(times.ons(i)/32*yinres.sr):round(times.offs(i)/32*yinres.sr))*1200;
23 | end


--------------------------------------------------------------------------------
/getLoudnessEstimates.m:
--------------------------------------------------------------------------------
 1 | function [loudnessEstimates loudnessStructure]=getLoudnessEstimates(audiofile, times)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % nmat=getTimingData(midifile, times)
 5 | %
 6 | % Description: Get loudness estimate based on Glasberg and Moore (2002)
 7 | %              for time-varying sounds using the Loudness Toolbox
 8 | %
 9 | % Inputs:
10 | %  audiofile - name of audiofile
11 | %  times - onset and offset times
12 | %
13 | % Outputs:
14 | %  loudnessEstimates - maximum short-term loudness (in sones) vs time
15 | %  loudnessStructure - complete structure returned by
16 | %                      Loudness_TimeVaryingSound_Moore
17 | %
18 | % Dependencies:
19 | %   Genesis Acoustics. 2010. Loudness Toolbox for Matlab.
20 | %    Available from http://www.genesis-acoustics.com/index.php?page=32 
21 | %
22 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
23 | % http://www.ampact.org
24 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
25 | %                    (mim@mr-pc.org), all rights reserved
26 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
27 | 
28 | % read audiofile
29 | [sig,sr]=wavread(audiofile);
30 | 
31 | for i = 1 : length(times.ons)
32 |     
33 |     % get loudness estimate for time-vaying sounds based on Glasberg and
34 |     % Moore (2002)
35 |     loudnessStructure{i}=Loudness_TimeVaryingSound_Moore(sig(times.ons(i)*sr:times.offs(i)*sr),sr);
36 |     
37 |     % save the maximum short-term loudness (in sones) vs time in a seperate
38 |     % variable
39 |     loudnessEstimates(i)=loudnessStructure{i}.STLlevelmax;
40 |     
41 | end


--------------------------------------------------------------------------------
/getOnsOffs.m:
--------------------------------------------------------------------------------
 1 | function res=getOnsOffs(onsoffs)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % res=getOnsOffs(onsoffs)
 5 | %
 6 | % Description: Extracts a list of onset and offset from an inputted 
 7 | %              3*N matrix of states and corresponding ending times 
 8 | %              from AMPACT's HMM-based alignment algorithm
 9 | %
10 | % Inputs:
11 | %  onsoffs - a 3*N alignment matrix, the first row is a list of N states
12 | %            the second row is the time which the state ends, and the
13 | %            third row is the state index
14 | %
15 | % Outputs:
16 | %  res.ons - list of onset times
17 | %  res.offs - list of offset times
18 | %
19 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
20 | % http://www.ampact.org
21 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) 
22 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
23 | 
24 | stopping=find(onsoffs(1,:)==3);
25 | starting=stopping-1;
26 | 
27 | for i = 1 : length(starting)
28 |     res.ons(i)=onsoffs(2,starting(i));
29 |     res.offs(i)=onsoffs(2,stopping(i));
30 | end


--------------------------------------------------------------------------------
/getPitchVibratoData.m:
--------------------------------------------------------------------------------
 1 | function [vibratoDepth, vibratoRate, intervalSize, pp]=getPitchVibratoData(cents,sr)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % [vibratoDepth, vibratoRate, noteDynamics, intervals]
 5 | %    =getPitchVibratoDynamicsData(times,sr)
 6 | %
 7 | % Description: Calculate vibrato depth, vibrato rate, perceived pitch, and
 8 | %              interval size for the notes in the inputted cell array cents
 9 | %
10 | % Inputs:
11 | %  cents - cell array of cent values for each note 
12 | %  sr - sampling rate
13 | %
14 | % Outputs:
15 | %  vibratoDepth - cell array of vibrato depth calculations for each note
16 | %  vibratoRate - cell array of vibrato rate calculations for each note
17 | %  intervalSize - cell array of interval size calculations between
18 | %                 sequential notes
19 | %  pp - cell array of perceived pitch calculations for each note
20 | %
21 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
22 | % http://www.ampact.org
23 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) 
24 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
25 | 
26 | % calculate vibrato depth, vibrato rate, and percieved pitch for each note
27 | for i = 1 : length(cents)
28 |     pp(i)=perceivedPitch(cents{i}, 1/sr*32, 100000);
29 |     vibrato{i}=fft(cents{i});
30 |     vibrato{i}(1)=0;
31 |     vibrato{i}(round(end/2):end) = 0;
32 |     [vibratoDepth(i) noteVibratOpos(i)] = max(abs(vibrato{i}));
33 |     vibratoRate(i) = noteVibratOpos(i) * (44100/32) / length(vibrato{i});
34 | end
35 | 
36 | % calculate interval size from sequential notes' perceived pitch estiamtes
37 | for i=1 : length(pp)-1
38 |     intervalSize(i) = pp(i+1)*1200-pp(i)*1200;
39 | end


--------------------------------------------------------------------------------
/getPitchVibratoDynamicsData.m:
--------------------------------------------------------------------------------
 1 | function [vibratoDepth, vibratoRate, noteDynamic, intervalSize, pp, nmat,cents]=getPitchVibratoDynamicsData(times,yinres,nmat)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % [vibratoDepth, vibratoRate, noteDynamics, intervals]
 5 | %    =getPitchVibratoDynamicsData(times,yinres)
 6 | %
 7 | % Description: 
 8 | %
 9 | % Inputs:
10 | %  times - 
11 | %  yinres - 
12 | %
13 | % Outputs:
14 | %  vibratoDepth - 
15 | %  vibratoRate - 
16 | %  noteDynamics - 
17 | %  intervalSize - 
18 | %  pp - 
19 | %
20 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
21 | % http://www.ampact.org
22 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) 
23 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
24 | 
25 | for i = 1 : length(times.ons)
26 |     cents{i}=yinres.f0(round(times.ons(i)/32*yinres.sr):round(times.offs(i)/32*yinres.sr));
27 |     pp(i)=perceivedPitch(cents{i}, 1/yinres.sr*32, 100000);
28 |     vibrato{i}=fft(cents{i});
29 |     vibrato{i}(1)=0;
30 |     vibrato{i}(round(end/2):end) = 0;
31 |     [vibratoDepth(i) noteVibratOpos(i)] = max(abs(vibrato{i}));
32 |     vibratoRate(i) = noteVibratOpos(i) * (44100/32) / length(vibrato{i});
33 |     pwrs{i}=yinres.pwr(round(times.ons(i)/32*yinres.sr):round(times.offs(i)/32*yinres.sr));
34 |     dynamicsVals{i}=10*log10(pwrs{i});
35 |     noteDynamic(i)=mean(dynamicsVals{i});
36 | end
37 | 
38 | nmat(:,5)=(noteDynamic+100)';
39 | 
40 | for i=1 : length(pp)-1
41 |     intervalSize(i) = pp(i+1)*1200-pp(i)*1200;
42 | end


--------------------------------------------------------------------------------
/getTimingData.m:
--------------------------------------------------------------------------------
 1 | function nmatNew=getTimingData(midifile, times)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % nmat=getTimingData(midifile, times)
 5 | %
 6 | % Description: Create a note matrix with performance timings
 7 | %
 8 | % Inputs:
 9 | %  midifile - name of midifile
10 | %  times - note onset and offset times
11 | %
12 | % Outputs:
13 | %  nmatNew - MIDI toolbox note matrix with performance timings
14 | %
15 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
16 | % http://www.ampact.org
17 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
18 | %                    (mim@mr-pc.org), all rights reserved
19 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
20 | 
21 | % Read quantized MIDI file
22 | nmatOld=readmidi(midifile);
23 | nmatOld(:,[1,2])=nmatOld(:,[1,2])/2;
24 | 
25 | % Replace timing information in MIDI file with performance timings
26 | nmatNew=nmatOld;
27 | nmatNew(:,6:7)=[times.ons',times.offs'-times.ons'];
28 | offset=nmatNew(1,6)-nmatOld(1,1);
29 | nmatNew(:,6)=nmatNew(:,6)-offset;
30 | nmatNew(:,[1,2])=nmatNew(:,[6,7]);


--------------------------------------------------------------------------------
/getVals.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/getVals.m


--------------------------------------------------------------------------------
/hzcents.m:
--------------------------------------------------------------------------------
 1 | function cents = hzcents(x1, x2)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % y = hzcents(x1, x2)
 5 | %
 6 | % Description: Calculates the difference in cents between the frequencies
 7 | %              supplied in x1 and x2 using the formula: 
 8 | %                 cents = 1200 * log(x1/x2) / log 2
 9 | %              if x1 is higher than x2 the value in cents will be positive
10 | %              if x1 is lower than x2 the value in cents will be negative
11 | %
12 | % Inputs:
13 | %  x1 - frequency one in hertz
14 | %  x2 - frequency two in hertz
15 | %
16 | % Outputs:
17 | %  cents - size of the interval in cents between x1 and x2
18 | %
19 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
20 | % http://www.ampact.org
21 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca)
22 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
23 | 
24 | if x1 == 0 
25 |     cents = 0
26 | elseif x2 == 0
27 |     cents = 0
28 | else
29 |     cents =  1200 * log(x2 ./ x1) ./ log(2);
30 | end


--------------------------------------------------------------------------------
/noteDct.m:
--------------------------------------------------------------------------------
 1 | function [coefs approx] = noteDct(x, Ndct, sr)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % [coefs approx] = noteDct(x, Ndct, sr)
 5 | %
 6 | % Description: Compute the DCT of a signal and approximate it with the 
 7 | %              first Ndct coefficients  x is the signal  Ndct is the number 
 8 | %              of DCT coefficients to be calculated sr is the sampling rate 
 9 | %              of the signal
10 | %
11 | % Inputs:
12 | %  x - signal to be analyzed
13 | %  Ndct - number of DCT coefficients to be calculated
14 | %  sr - sampling rate
15 | %
16 | % Outputs:
17 | %  coefs - DCT coefficients
18 | %  approx - reconstruction of X using the Ndct number of DCT coefficients
19 | %
20 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
21 | % http://www.ampact.org
22 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
23 | %                    (mim@mr-pc.org), all rights reserved
24 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
25 | 
26 | % calculate DCT coefficients using built-in dct function
27 | coefsTmp = dct(x);
28 | coefsTmp(min(end,Ndct)+1:end) = 0;
29 | 
30 | % Divide by square root of N so that everything is divded by N instead of
31 | % the square root of N, because it is already divded by the sqrt of N
32 | coefs = coefsTmp(1:min(Ndct,end)) / sqrt(length(coefsTmp));
33 | 
34 | % The sampling rate divided by the length of the signal is the lowest
35 | % frequency represented by the DCT.  Multiplying by it makes the 1st
36 | % coefficient into cents/second. For curves of constant slope, this makes
37 | % the 1st coefficient approximately independent of the length of the
38 | % signal. Multiplying by that frequency squared makes the 2nd coefficient into
39 | % cents/second^2. For curves of constant 2nd derivative, this makes the 2nd
40 | % coefficient approximately independent of the length of the signal, etc.
41 | %
42 | % For 2nd coefficient, multiple by -1 so that it represents positive slope   
43 | if length(coefs)>1
44 |     coefs(2:end)=coefs(2:end) .* (sr ./ length(x)) .^ [1:length(coefs)-1];
45 |     coefs(2)=-coefs(2);
46 | end
47 | 
48 | % reconstruct X using the DCT coefficients
49 | approx = real(idct(coefsTmp));


--------------------------------------------------------------------------------
/perceivedPitch.m:
--------------------------------------------------------------------------------
 1 | function [pp1 pp2]= perceivedPitch(f0s, sr, gamma)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % pp = perceivedPitch(f0s, sr, gamma)
 5 | %
 6 | % Description: Calculate the perceived pitch of a note based on 
 7 | %              Gockel, H., B.J.C. Moore,and R.P. Carlyon. 2001. 
 8 | %              Influence of rate of change of frequency on the overall 
 9 | %              pitch of frequency-modulated Tones. Journal of the 
10 | %              Acoustical Society of America. 109(2):701?12.
11 | %
12 | % Inputs:
13 | %  f0s - vector of fundamental frequency estimates
14 | %  sr - 1/sample rate of the f0 estimates (e.g. the hop rate in Hz of yin)
15 | %  gamma - sets the relative weighting of quickly changing vs slowly 
16 | %          changing portions of  notes. - a high gamma (e.g., 1000000)  
17 | %          gives more weight to slowly changing portions.
18 | %
19 | % Outputs:
20 | %  res.ons - list of onset times
21 | %  res.offs - list of offset times
22 | %
23 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
24 | % http://www.ampact.org
25 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
26 | %                    (mim@mr-pc.org), all rights reserved
27 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
28 | 
29 | if ~exist('gamma', 'var'), gamma = 100000; end
30 | 
31 | % remove all NaNs in the f0 vector
32 | f0s(isnan(f0s))=[];
33 | 
34 | % create an index into the f0 vector in order to remove outliers by
35 | % only using the central 80% of the sorted vector
36 | [d ord] = sort(f0s);
37 | ind = ord(floor(end*.1):floor(end*.9));
38 | 
39 | % calculate the rate of change
40 | deriv = [diff(f0s)*sr -100];
41 | 
42 | % set weights for the quickly changing vs slowly changing portions 
43 | weights = exp(-gamma * abs(deriv));
44 | 
45 | % calculate two versions of the perceived pitch, one using the entire
46 | % vector (pp1) and one with the central 80% (pp2)
47 | pp1 = f0s(:)' * weights(:) / sum(weights);
48 | pp2 = f0s(ind) * weights(ind)' / sum(weights(ind));


--------------------------------------------------------------------------------
/plotFineAlign.m:
--------------------------------------------------------------------------------
 1 | function plotFineAlign(stateType, occupancy, notes, stftHop)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % plotFineAlign(stateType, occupancy, notes, stftHop, highlight)
 5 | %
 6 | % Description: 
 7 | %  Plot the HMM alignment based on the output of YIN.  StateType is the 
 8 | %  list of states in the HMM, and occupancy is the number of YIN frames 
 9 | %  for which that state is occupied.  Notes is a list of midi note numbers 
10 | %  that are played, should be one note for each [3] in stateType.  If the 
11 | %  highlight vector is supplied, it should contain indices of the states 
12 | %  to highlight by plotting an extra line at the bottom of the window.
13 | %
14 | % Inputs:
15 | %  stateType - vector with a list of states
16 | %  occupancy - vector indicating the time (in seconds) at which the states 
17 | %              in stateType end
18 | %  notes - vector of notes from MIDI file
19 | %  stftHop - the hop size between frames in the spectrogram
20 | %
21 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
22 | % http://www.ampact.org
23 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
24 | %                    (mim@mr-pc.org), all rights reserved.
25 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
26 | 
27 | % Plot the 4 states: silence in red, beginning transient in green,
28 | % steady state in blue, ending transient in green.
29 | 
30 | styles = {{'r+-', 'LineWidth', 2}, 
31 |           {'g+-', 'LineWidth', 2}, 
32 |           {'b+-', 'LineWidth', 2}};
33 | 
34 | cs = occupancy /stftHop;
35 | segments = [cs(1:end-1); cs(2:end)]';
36 | 
37 | hold on
38 | 
39 | stateNote = max(1, cumsum(stateType == 3)+1);
40 | for i=1:size(segments,1)
41 |     plot(segments(i,:)', repmat(notes(stateNote(i)),2,1), styles{stateType(i+1)}{:})
42 | end
43 | 
44 | hold off


--------------------------------------------------------------------------------
/polyExample.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/polyExample.mid


--------------------------------------------------------------------------------
/polyExample.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/polyExample.wav


--------------------------------------------------------------------------------
/polySingingMeansCovars.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcdevaney/AMPACT/b99f1b0e46d8a2fc3d564305d4b2dacd7783e2fc/polySingingMeansCovars.mat


--------------------------------------------------------------------------------
/readme.txt:
--------------------------------------------------------------------------------
  1 | Example Usage
  2 | - from the included script exampleScript.m 
  3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | % exampleScript.m
  5 | %
  6 | % Description: 
  7 | %   Example of how to use the HMM alignment algorithm
  8 | %
  9 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
 10 | % http://www.ampact.org
 11 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved.
 12 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 13 |   
 14 | % audio file to be aligned
 15 | audiofile=('example.wav');
 16 |  
 17 | % MIDI file to be aligned
 18 | midifile=('example.mid');
 19 |  
 20 | % number of notes to align
 21 | numNotes=6;
 22 |  
 23 | % vector of order of states (according to lyrics) in stateOrd and 
 24 | % corresponding note numbers in noteNum
 25 | %   1 indicates a rest at the beginning of ending of the note
 26 | %   2 indicates a transient at the beginning or ending of the note
 27 | %   3 indicates a steady state section
 28 | % the following encoding is for six syllables "A-ve Ma-ri-(i)-a"
 29 | %  syllable      A-ve Ma-ri-(i)-a
 30 | %  state type   13 23 23 23  3  31
 31 | %  note number  11 22 33 44  5  66
 32 | stateOrd  = [1 3 2 3 2 3 2 3 3 3 1];
 33 | noteNum =   [1 1 2 2 3 3 4 4 5 6 6];
 34 |  
 35 | % load singing means and covariances for the HMM alignment
 36 | load SingingMeansCovars.mat
 37 | means=sqrtmeans; 
 38 | covars=sqrtcovars;
 39 |  
 40 | % specify that the means and covariances in the HMM won't be learned 
 41 | learnparams=0;
 42 |  
 43 | % run the alignment
 44 | [allstate selectstate,spec,yinres]=runAlignment(audiofile, midifile, numNotes, stateOrd, noteNum, means, covars, learnparams);
 45 |  
 46 | % visualise the alignment
 47 | alignmentVisualiser(selectstate,midifile,spec,1);
 48 |  
 49 | % get onset and offset times
 50 | times=getOnsOffs(selectstate);
 51 |  
 52 | % write the onset and offset times to an audacity-readable file
 53 | dlmwrite('example.txt',[times.ons' times.offs'], 'delimiter', '\t');
 54 |  
 55 | % you can load 'example.txt' into audacity and correct any errors in the
 56 | % alignment, i.e., the offset error on the last note, and then reload the
 57 | % corrected labels into matlab
 58 | fixedLabels=load('exampleFixed.txt');
 59 | times.ons=fixedLabels(:,1)';
 60 | times.offs=fixedLabels(:,2)';
 61 |  
 62 | % map timing information to the quantized MIDI file   
 63 | nmatNew=getTimingData(midifile, times);
 64 | writemidi(nmatNew,'examplePerformance.mid')
 65 |  
 66 | % get cent values for each note
 67 | cents=getCentVals(times,yinres);
 68 |  
 69 | % calculate intervals size, perceived pitch, vibrato rate, and vibrato depth
 70 | [vibratoDepth, vibratoRate, intervalSize, perceivedPitch]=getPitchVibratoData(cents,yinres.sr); 
 71 |  
 72 | % get loudness values for each note using the Genesis Loudness Toolbox
 73 | [loudnessEstimates loudnessStructure]=getLoudnessEstimates(audiofile, times);
 74 |  
 75 | % get DCT values for each note
 76 | for i = 1 : length(cents)
 77 |     
 78 |     % find the peaks and troughs in the F0 trace for each note
 79 |     [mins{i} maxes{i}] = findPeaks(cents{i}, 100, yinres.sr/32, 60);
 80 |     
 81 |     % find the midpoints between mins and maxes in the F0 trace for each
 82 |     % note
 83 |     [x_mids{i} y_mids{i}] = findMids(cents{i}, mins{i}, maxes{i}, 100, yinres.sr/32);
 84 |     
 85 |     % generate a smoothed trajectory of a note by connecting the
 86 |     % midpoints between peaks and troughs.
 87 |     smoothedF0s{i}=smoothNote(cents{i}, x_mids{i}, y_mids{i});
 88 |     
 89 |     % find the steady-state portion of a note
 90 |     steady{i}(1:2)=findSteady(cents{i}, mins{i}, maxes{i}, x_mids{i}, y_mids{i}, 1);
 91 |     
 92 |     % compute the DCT of a signal and approximate it with the first 3 coefficients
 93 |     [dctVals{i}, approx{i}]=noteDct(smoothedF0s{i}(steady{i}(1):steady{i}(2)),3,yinres.sr/32);
 94 |  
 95 | end
 96 | 
 97 | ----------------
 98 | 
 99 | AMPACT Function Descriptions
100 | 
101 | runAlignment.m: Calls the DTW alignment function and refines the results with the HMM alignment algorithm, with both a basic and modified state spaces (based on the lyrics). 
102 | 
103 | getVals.m: Gets values for DTW alignment and YIN analysis of specified audio signal and MIDI file
104 | 
105 | runDTWAlignment.m: Performs a dynamic time warping alignment between specified audio and MIDI files.
106 | 
107 | runHMMAlignment.m: Refines DTW alignment values with a three-state HMM, identifying silence,transient, and steady state parts of the signal. The HMM uses the DTW alignment as a prior. 
108 | 
109 | filltransmat.m: Makes a transition matrix from a seed transition matrix.  
110 | 
111 | fillpriormat_gauss.m: Creates a prior matrix based on the DTW alignment (supplied by the input variables ons and offs). 
112 | 
113 | selectStates.m: Refines the HMM parameters according to the modified state  sequence vector passed into the function.
114 | 
115 | alignmentVisualiser.m: Plots a gross DTW alignment overlaid with the fine alignment resulting from the HMM aligner on the output of YIN.  
116 | 
117 | getTimingData: Create a note matrix with performance timings.
118 | 
119 | getCentVals: Get cent values (in relation to A, 440 Hz) for each note.
120 | 
121 | getPitchVibratoData: Calculate vibrato depth, vibrato rate, perceived pitch, and interval size for the notes in the inputted cell array cents.
122 | 
123 | getLoudnessEstimates: Get loudness estimate based on Glasberg and Moore (2002) for time-varying sounds using the Loudness Toolbox.
124 | 
125 | findPeaks: Find peaks and troughs in a signal.
126 | 
127 | findMids: Find the midpoints between mins and maxes in a signal.
128 | 
129 | smoothNote: Generate a smoothed trajectory of a note by connecting the midpoints between peaks and troughs.
130 | 
131 | noteDct: Compute the DCT of a signal and approximate it with a specified number of coefficients.
132 | 
133 | ----------------
134 | 
135 | AMPACT Dependencies
136 | 
137 | You will need to have the following toolkits installed and in your path
138 |   de Cheveigné, A. 2002. YIN MATLAB implementation Available from: http://audition.ens.fr/adc/sw/yin.zip
139 |   Ellis, D. P. W. 2003. Dynamic Time Warp (DTW) in Matlab. Available from: http://www.ee.columbia.edu/~dpwe/resources/matlab/dtw/ 
140 |   Ellis, D. P. W. 2008. Aligning MIDI scores to music audio. Available from: http://www.ee.columbia.edu/~dpwe/resources/matlab/alignmidiwav/ 
141 |   Genesis Acoustics. 2010. Loudness Toolbox for Matlab. Available from http://www.genesis-acoustics.com/index.php?page=32 
142 |   Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab. Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html 
143 |  Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from:  https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials/miditoolbox/
144 | 
145 | ----------------
146 | 
147 | Papers on algorithms developed for AMPACT
148 | 
149 | Devaney, J., M. I. Mandel, and I. Fujinaga. 2011. Characterizing Singing Voice Fundamental Frequency Trajectories. Proceedings of the 2011 Workshop on Applications of Signal Processing to Audio and Acoustics.
150 | Devaney, J., M. I. Mandel, D. P. W. Ellis, and I. Fujinaga. 2010. Automatically extracting performance data from recordings of trained singers. Psychomusicology: Music, Mind & Brain. 21(1–2): in press.
151 | Devaney, J. 2011. An empirical study of the influence of musical context on intonation practices in solo singers and SATB ensembles. Ph. D. Dissertation. McGill University.
152 | Devaney, J., M. I. Mandel, and D. P. W. Ellis. 2009. Improving MIDI-audio alignment with acoutics features. In Proceedings of the 2009 Workshop on Applications of Signal Processing to Audio and Acoustics.
153 | 
154 | ----------------
155 | 
156 | Papers on algorithms by other authors used by AMPACT
157 | 
158 | de Cheveigné, A., and H. Kawahara. 2002. YIN, a fundamental frequency estimator for speech and music. Journal of the Acoustical Society of America 111 (4): 1917–30.
159 | Orio, N., and D. Schwarz. 2001. Alignment of monophonic and polyphonic music to a score. In Proceedings of the International Computer Music Conference, 155–8.
160 | 


--------------------------------------------------------------------------------
/runAlignment.m:
--------------------------------------------------------------------------------
 1 | function [allstate,selectstate,spec,yinres]=runAlignment(filename, midiname, numNotes, stateOrd2, noteNum, means, covars, learnparams)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % [allstate selectstate spec yinres]=seeAlignment(audiofile,midifile,...
 5 | % numNotes, stateOrd, noteNum, means, covars,learnparams)
 6 | %
 7 | % Description: 
 8 | %  Calls the DTW alignment function and refines the results with the HMM 
 9 | %  alignment algorithm, with both a basic and modified state spaces (based 
10 | %  on the lyrics). This function returns the results of both the state
11 | %  spaces as well as the YIN analysis of the specified audio file.
12 | %
13 | % Inputs:
14 | %  filename - name of audio file
15 | %  midiname - name of MIDI file
16 | %  numNotes - number of notes in the MIDI file to be aligned
17 | %  stateOrd2 - vector of state sequence
18 | %  noteNum - vector of note numbers corresponding to state sequence
19 | %  means - mean values for each state
20 | %  covars - covariance values for each state
21 | %  learnparams - flag as to whether to learn means and covars in the HMM
22 | %
23 | % Outputs: 
24 | %  allstate - ending times for each state
25 | %  selectstate - ending times for each state
26 | %  spec - spectogram of the audio file
27 | %  yinres - structure of results of funning the YIN algorithm on the 
28 | %           audio signal indicated by the input variable filename
29 | %
30 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
31 | % http://www.ampact.org
32 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved.
33 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
34 | 
35 | if ~exist('learnparams', 'var'), learnparams = 0; end
36 | 
37 | % refine stateOrd2 to correspond to the number of states specified 
38 | % in numStates
39 | numStates = max(find(noteNum <= numNotes));
40 | stateOrd2=stateOrd2(1:numStates);
41 | noteNum=noteNum(1:numStates);
42 | 
43 | % read audio file and perform DTW alignment and YIN analysis
44 | hop = 32;
45 | [audiofile, sr] = wavread(filename);
46 | 
47 | % normalize audio file
48 | audiofile=audiofile/sqrt(mean(audiofile.^2))*.6;
49 |  
50 | %get vals
51 | [align, yinres, spec] = getVals(filename, midiname, audiofile, sr, hop);
52 | clear audiofile
53 | 
54 | % run HMM alignment with the full state sequence
55 | [vpath,startingState,prior,trans,meansFull,covarsFull,mixmat,obs,stateOrd] = runHMMAlignment(numNotes, means, covars, align, yinres, sr, learnparams);
56 | 
57 | % tally of the number of frames in each state
58 | histvals = hist(vpath, 1:max(vpath));
59 | 
60 | % ending time of each state in seconds 
61 | cumsumvals = cumsum(histvals*hop/sr);
62 | 
63 | % run HMM alignment with the state sequence refined, based on the lyrics
64 | cumsumvals2=selectStates(startingState,prior,trans,meansFull,covarsFull,mixmat,obs,stateOrd2,noteNum,sr);
65 | 
66 | % create 3*N matrices of the alignments, where the first row is the
67 | % current states, the second row is the time which the state ends, and
68 | % the third row is the state index and N is the total number of states
69 | allstate=stateOrd;
70 | allstate(2,1:length(cumsumvals))=cumsumvals;
71 | selectstate=stateOrd2;
72 | selectstate(2,1:length(cumsumvals2))=cumsumvals2;
73 | selectstate(3,:) = noteNum;


--------------------------------------------------------------------------------
/runDTWAlignment.m:
--------------------------------------------------------------------------------
 1 | function [align,spec] = runDTWAlignment(audiofile, midorig, tres)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % align = runDTWAlignment(sig, sr, midorig, tres, plot)
 5 | %
 6 | % Description: 
 7 | %  Performs a dynamic time warping alignment between specified audio and
 8 | %  MIDI files and returns a matrix with the aligned onset and offset times 
 9 | %  (with corresponding MIDI note numbers) and a spectrogram of the audio
10 | %
11 | % Inputs:
12 | %  sig - audio file
13 | %  midorig - midi file
14 | %  tres - time resolution for MIDI to spectrum information conversion
15 | %
16 | % Outputs: 
17 | %  align - dynamic time warping MIDI-audio alignment structure
18 | %   align.on - onset times
19 | %   align.off - offset times
20 | %   align.midiNote - MIDI note numbers
21 | %  spec - sepctrogram 
22 | %
23 | % Dependencies:
24 | %  Ellis, D. P. W. 2003. Dynamic Time Warp (DTW) in Matlab. Available 
25 | %   from: http://www.ee.columbia.edu/~dpwe/resources/matlab/dtw/ 
26 | %  Ellis, D. P. W. 2008. Aligning MIDI scores to music audio. Available 
27 | %   from: http://www.ee.columbia.edu/~dpwe/resources/matlab/alignmidiwav/ 
28 | %  Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from:
29 | %   https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials
30 | %          /miditoolbox/
31 | %
32 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
33 | % http://www.ampact.org
34 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved.
35 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
36 | 
37 | if nargin < 5
38 |   tres = 0.025;
39 | end
40 | 
41 | mid = midorig;
42 | 
43 | % run alignment using peak structure distance as a feature
44 | [dtw.M,dtw.MA,dtw.RA,dtw.S,spec,dtw.notemask] = alignmidiwav(mid,...
45 |     audiofile,tres,1);
46 | 
47 | % read midi file and map the times in the midi file to the audio
48 | align.nmat = readmidi(mid);
49 | align.nmat(:,7) = align.nmat(:,6) + align.nmat(:,7);
50 | align.nmat(:,1:2) = maptimes(align.nmat(:,6:7),(dtw.MA-1)*tres,(dtw.RA-1)*tres);
51 | 
52 | % create output alignment 
53 | align.on = align.nmat(:,1);
54 | align.off = align.nmat(:,2);
55 | align.midiNote = align.nmat(:,4);
56 | 


--------------------------------------------------------------------------------
/runHMMAlignment.m:
--------------------------------------------------------------------------------
  1 | function [vpath,startingState,prior,trans,meansFull,covarsFull,mixmat,obs,stateOrd] = runHMMAlignment(notenum, means, covars, align, yinres, sr, learnparams)
  2 | 
  3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | %[vpath,startingState,prior,trans,meansFull,covarsFull,mixmat,obs,stateOrd]
  5 | % = runHMMAlignment(notenum, means, covars, align, yinres, sr, learnparams)
  6 | %
  7 | % Description: 
  8 | %   Refines DTW alignment values with a three-state HMM, identifying 
  9 | %   silence,transient, and steady state parts of the signal. The HMM  
 10 | %   uses the DTW alignment as a prior. 
 11 | %
 12 | % Inputs:
 13 | %   notenum - number of notes to be aligned
 14 | %   means - 3x2 matrix of mean aperiodicy and power values HMM states
 15 | %           column: silence, trans, steady state
 16 | %           rows: aperiodicity, power
 17 | %   covars - 3x2 matrix of covariances for the aperiodicy and power
 18 | %            values (as per means)
 19 | %   res - structure containing inital DTW aligment
 20 | %   yinres - structure containg yin analysis of the signal
 21 | %   sr - sampling rate of the signal
 22 | %
 23 | % Outputs: 
 24 | %   vpath - verterbi path
 25 | %   startingState - starting state for the HMM
 26 | %   prior - prior matrix from DTW alignment
 27 | %   trans - transition matrix
 28 | %   meansFull - means matrix
 29 | %   covarsFull - covariance matrix
 30 | %   mixmat - matrix of priors for GMM for each state
 31 | %   obs - two row matrix observations (aperiodicty and power)
 32 | %   stateOrd - modified state order sequence
 33 | %
 34 | % Dependencies:
 35 | %   Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab.
 36 | %    Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html 
 37 | %
 38 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
 39 | % http://www.ampact.org - Johanna Devaney, 2011
 40 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved.
 41 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 42 | 
 43 | if ~exist('learnparams', 'var'), shift = 0; end
 44 | 
 45 | % create vectors of onsets and offsets times from DTW alignment 
 46 | ons=floor(align.on*sr/32);
 47 | offs=floor(align.off*sr/32);
 48 | 
 49 | % create observation matrix
 50 | obs(1,:)=sqrt(yinres.ap(1:offs(notenum)+50));
 51 | obs(2,:)=sqrt(yinres.pwr(1:offs(notenum)+50));
 52 | obs(3,:)=69+12*yinres.f0(1:offs(notenum)+50); % convert octave to midi note
 53 | 
 54 | % replace any NaNs in the observation matrix with zeros
 55 | obs(isnan(obs))=0;
 56 | 
 57 | % refine the list of onsets and offsets according to the number of notes
 58 | % specified in the input arg 'not
 59 | prior_ons=ons(1:notenum);
 60 | prior_offs=offs(1:notenum);
 61 | notes = length(prior_ons);
 62 | 
 63 | % states: silence, trans, steady state
 64 | % rows: aperiodicity, power
 65 | stateOrdSeed = [1 2 3 2 1];
 66 | stateOrd = [repmat(stateOrdSeed(1:end-1),1,notes) stateOrdSeed(end)];
 67 | 
 68 | % use stateOrd to expand means and covars to each appearance
 69 | midiNotes = repmat(align.midiNote(1:notenum)', length(stateOrdSeed)-1, 1);
 70 | midiNotes = [midiNotes(:)' midiNotes(end)];
 71 | meansFull  = [means(:,stateOrd); midiNotes];
 72 | 
 73 | covars(3,3,1) = 100;
 74 | covars(3,3,2) = 5;
 75 | covars(3,3,3) = 1;
 76 | covarsFull = covars(:,:,stateOrd);
 77 | 
 78 | mixmat = ones(length(stateOrd),1);
 79 | 
 80 | % transititon matrix seed
 81 | % {steady state, transient, silence, transient, steady state}
 82 | transseed=zeros(5,5);
 83 | transseed(1,1)=.99;
 84 | transseed(2,2)=.98;
 85 | transseed(3,3)=.98;
 86 | transseed(4,4)=.98;
 87 | transseed(5,5)=.99;
 88 | transseed(1,2)=.0018;
 89 | transseed(1,3)=.0007;
 90 | transseed(1,4)=.0042;
 91 | transseed(1,5)=.0033;
 92 | transseed(2,3)=0.0018;
 93 | transseed(2,4)=0.0102;
 94 | transseed(2,5)=0.0080;
 95 | transseed(3,4)=0.0112;
 96 | transseed(3,5)=0.0088;
 97 | transseed(4,5)=0.02;
 98 | 
 99 | % call filltransmat to expand the transition matrix to the appropriate size
100 | trans = filltransmat(transseed,notes);
101 | 
102 | % create starting state space matrix
103 | startingState = [1; zeros(4*notes,1)];
104 | 
105 | % call fillpriormat_gauss to create a prior matrix
106 | prior=fillpriormat_gauss(size(obs,2),prior_ons,prior_offs,5);
107 | 
108 | if learnparams
109 |     % use the mhmm_em function from Kevin Murphy's HMM toolkit to
110 |     % learn the HMM parameters
111 |     save orig_hmm_params
112 |     [LL, startingState, trans, meansFull, covarsFull, mixmat1] = ...
113 |     mhmm_em(obs, startingState, trans, meansFull, covarsFull, mixmat, 'max_iter', 1, 'adj_prior', 0, 'adj_trans', 0, 'adj_mix', 0, 'cov_type', 'diag');
114 |     save new_hmm_params
115 | end
116 | 
117 | % create a likelihood matrix with the mixgauss_prob function from Kevin
118 | % Murphy's HMM toolkit
119 | like = mixgauss_prob(obs, meansFull, covarsFull, mixmat,1);
120 | 
121 | % use the veterbi path function from Kevin Murphy's HMM toolkit to find the
122 | % most likely path
123 | prlike=prior.*like;
124 | clear like
125 | vpath=viterbi_path(startingState, trans, prlike);
126 | 


--------------------------------------------------------------------------------
/runPolyAlignment.m:
--------------------------------------------------------------------------------
  1 | function [estimatedOns estimatedOffs]=runPolyAlignment(audiofile, midifile, meansCovarsMat, voiceType)
  2 | 
  3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | % estimatedOns estimatedOffs]=runPolyAlignment(audiofile, midifile)
  5 | %
  6 | % Description: Main function for runing polyphonic MIDI-audio alignment
  7 | %              An intial DTW alignment is refined to estimate asychroncies 
  8 | %              between notated simultaneities
  9 | %
 10 | %               Note that this current version assumes that each note ends
 11 | %               immediately before it starts again (i.e., no rests)
 12 | %
 13 | % Inputs:
 14 | %  audiofile - audio file file
 15 | %  midifile - midi file
 16 | %  meansCovarsMat - specifies means and covariance matrix to use
 17 | %  voiceType - vector indicating which voice (or instrument) to use for
 18 | %              each musical line
 19 | %
 20 | % Outputs:
 21 | %  estimatedOns - cell array of onset times 
 22 | %  estimatedOffs - cell array of offset times
 23 | %
 24 | % Dependencies:
 25 | %  Ellis, D. P. W. 2003. Dynamic Time Warp (DTW) in Matlab. Available
 26 | %   from: http://www.ee.columbia.edu/~dpwe/resources/matlab/dtw/
 27 | %  Ellis, D. P. W. 2008. Aligning MIDI scores to music audio. Available
 28 | %   from: http://www.ee.columbia.edu/~dpwe/resources/matlab/alignmidiwav/
 29 | %  Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from:
 30 | %   https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials
 31 | %          /miditoolbox/
 32 | %   Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab.
 33 | %    Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html 
 34 | %
 35 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT)
 36 | % http://www.ampact.org
 37 | % (c) copyright 2014 Johanna Devaney (j@devaney.ca), all rights reserved.
 38 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 39 | 
 40 | %%%%%%% if no arguments %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 41 | 
 42 | if nargin < 4
 43 |     voiceType = [2 1 1 1];    
 44 | end
 45 | 
 46 | if nargin < 3
 47 |     meansCovarsMat='polySingingMeansCovars.mat';
 48 | end
 49 | 
 50 | if nargin < 2
 51 |     midifile = 'polyExample.mid';
 52 | end
 53 | 
 54 | if nargin < 1
 55 |     audiofile = 'polyExample.wav';
 56 | end
 57 | 
 58 | 
 59 | 
 60 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 61 | %%%%%%%%% Initial DTW alignment stuff %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 62 | % read MIDI file
 63 | nmatAll=midi2nmat(midifile);
 64 | 
 65 | if min(nmatAll(:,3)) == 0
 66 |     nmatAll(:,3)=nmatAll(:,3)+1;
 67 | end
 68 | 
 69 | for i = sort(unique(nmatAll(:,3)))'    
 70 |     nmat{i} = nmatAll(nmatAll(:,3)==i,:);
 71 | end
 72 | 
 73 | maxNotes=max(nmatAll(:,3));
 74 | 
 75 | %%%%%%%% Initialize HMM variables %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 76 | % needs to be here for calculations in initial DTW alignment
 77 | % starting state for HMM
 78 | 
 79 | for i = 1 : maxNotes
 80 |     startingState{i} = [1; zeros(3^i-1,1)];
 81 | end
 82 | 
 83 | % get transition matrix for HMM
 84 | [notes trans] = genPolyTrans(50, 0, 5);
 85 | for i = 1 : maxNotes
 86 |     notesInd{i} = cat(1, notes{i}{:})';
 87 | end
 88 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 89 | % run DTW alignment using composite midifile
 90 | [align,spec] = runDTWAlignment(audiofile, midifile, 0.025);
 91 | 
 92 | % calculate how many voices change at each transition
 93 | %nmatAll(:,1)=floor(nmatAll(:,1)*1000)/1000;
 94 | [uniqueBeats, idx1, idx2] = unique(onset(nmatAll), 'first');
 95 | uniqueAlignOns = align.nmat(idx1, 1);
 96 | onsetMap = zeros(length(uniqueBeats),maxNotes); 
 97 | for i = 1 : length(uniqueBeats)
 98 |     %num = 1;
 99 |     for j = 1:maxNotes
100 |         if sum(onset(nmat{j}) == uniqueBeats(i))
101 |             onsetMap(i,j) = 1;
102 |         end
103 |         %num = num + 1;
104 |     end
105 | end
106 | 
107 | % create new onset map using alignment values
108 | % THIS IS CURRENTLY ASSUMING THAT THERE ARE NO NOTATED RESTS
109 | for i = 1 : size(onsetMap,1) % number of onsets
110 |     for j = 1 : size(onsetMap,2) % number of voices
111 |         if onsetMap(i,j) == 1, 
112 |             onsMap2(i,j) = uniqueAlignOns(i);
113 |         end
114 |     end 
115 |     lv2(i) = find(onsetMap(i,:), 1, 'first');
116 |     onVals(i)=onsMap2(i,lv2(i));
117 | end
118 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
119 | 
120 | %%%%%%% Audio analysis %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
121 | % set paramters for audio analysis
122 | offset1=0.125;
123 | offset2=0.125;
124 | [audio,sr]=wavread(audiofile);
125 | audio=resample(audio,1,2); 
126 | sr = sr/2;
127 | tuning=estimateTuning(audio);
128 | parameter.winLenSTMSP=441;
129 | parameter.shiftFB = tuning;
130 | 
131 | % create a matrix of the notes in the audio in midi note numbers for each
132 | % transition, as defined by onsetMap
133 | for i = 1 : maxNotes
134 |     idxCell{i}=1;
135 |     pitches{1}(i,3)=nmat{i}(1,4)+tuning;
136 | end
137 | for i = 2 : size(onsetMap,1)
138 |     for j = 1 : maxNotes
139 |         if onsetMap(i,j) == 1
140 |             pitches{i}(j,1)=nmat{j}(idxCell{j},4)+tuning;
141 |             pitches{i}(j,2)=0;
142 |             try
143 |                 pitches{i}(j,3)=nmat{j}(idxCell{j}+1,4)+tuning;
144 |             end
145 |             idxCell{j}=idxCell{j}+1;            
146 |         else
147 |             pitches{i}(j,1)=pitches{i-1}(j,3)+tuning;
148 |             pitches{i}(j,2)=pitches{i-1}(j,3)+tuning;
149 |             try
150 |                 pitches{i}(j,3)=pitches{i-1}(j,3)+tuning;
151 |             end
152 |         end
153 |     end
154 | end
155 | 
156 | % get means and covars for the singing voice
157 | % differentiate for different voices
158 | load(meansCovarsMat)
159 | for i = 1 : size(nmat,2)
160 |     [meansSeed{i} covarsSeed{i} versions]=genMeansCovars(notes, vals{i},voiceType);
161 | end
162 | % set the harmonics that are going to be considered
163 | harmonics=[-1 0 1];
164 | harmonics2=[-1 0 1 12 19 24 28 31 36];
165 | 
166 | % run audio analysis
167 | fpitchAll=audio_to_pitch_via_FB(audio,parameter);
168 | hop = length(audio)/size(fpitchAll,2);
169 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
170 | 
171 | 
172 | %%%%%%% NAME %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
173 | % initialize indexing cell array
174 | for i = 1 : maxNotes
175 |     idxCell{i}=1;
176 | end
177 | for i = 1 : length(onsetMap)
178 | %for i = 2 : length(onsetMap)-1
179 |     numVoices = sum(onsetMap(i,:),2);
180 |     try
181 |         fpitch{i}=fpitchAll(:,round((onVals(i)-offset1)*sr/hop):round((onVals(i)+offset2)*sr/hop));
182 |     catch
183 |         fpitch{i}=fpitchAll(:,max(1,round((onVals(i)-offset1)*sr/hop)):end);
184 |     end
185 |     numFrames(i)=size(fpitch{i},2);
186 |     lengthSignal(i)=length(audio(max(1,round((onVals(i)-offset1)*sr)):max(round((onVals(i)+offset2)*sr),1)));
187 |     [a,b,c]=find(onsetMap(i,:), size(nmat,2));
188 |     num = 1;
189 |     for j = b
190 |         obs{i}(num,:)=db(sum(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics,:)));
191 |         if sum(onsetMap(i+1:end,j))~=0
192 | 
193 |             % db of sum fpitch vals - no harmonics            
194 |             obs{i}(num+1,:)=db(sum(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics,:)));   
195 | 
196 |             % alternative features
197 |             %             % db of mean fpitch vals - no harmonics
198 |             %             db(mean(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics,:)));
199 |             %             db(mean(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics,:)));   
200 |             % 
201 |             %             % db of mean fpitch vals - harmonics
202 |             %             db(mean(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics2,:)));
203 |             %             db(mean(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics2,:)));    
204 |             % 
205 |             %             % db of sum fpitch vals - harmonics
206 |             %             db(sum(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics2,:)));
207 |             %             db(sum(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics2,:)));    
208 | 
209 |             idxCell{j}=idxCell{j}+1;
210 |             
211 |         else
212 |              obs{i}(num+1,:)=db(sum(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics,:)));
213 | %              numVoices = numVoices-1;
214 | %              b = b(b~=j);
215 |         end
216 |         num = num + 2;
217 |     end
218 |     
219 |     if numVoices
220 |         for j = 1 : size(versions{numVoices},1)
221 |             if all(versions{numVoices}(j,:)==b);
222 |                 idx = j;
223 |             end
224 |         end
225 |         
226 |         % get appropriate trans, meansSeed, covarsSeed, and calculate mixmat
227 |             curTrans = trans{numVoices};            
228 | 
229 |             curMeansSeed = meansSeed{3}{numVoices}{idx};
230 |             curCovarsSeed = covarsSeed{3}{numVoices}{idx};
231 |             mixmat = ones(length(curMeansSeed),1); 
232 |             sState = startingState{numVoices};
233 |             states = [1 2 3];
234 | 
235 |             if i == 1
236 |                 
237 |                 curTrans = curTrans(sum(notesInd{numVoices}==1,1)<1,sum(notesInd{numVoices}==1,1)<1);
238 |                 curMeansSeed = curMeansSeed(:,sum(notesInd{numVoices}==1,1)<1);
239 |                 curCovarsSeed = curCovarsSeed(:,:,sum(notesInd{numVoices}==1,1)<1);
240 |                 mixmat = mixmat(sum(notesInd{numVoices}==1,1)<1);
241 |                 sState = sState(sum(notesInd{numVoices}==1,1)<1);
242 |                 sState(1) = 1;
243 |                 notesIndTmp{i}=notesInd{numVoices}(:,sum(notesInd{4}==1,1)<1);
244 |                 states = [2 3];
245 |                 
246 | %                 curTrans = curTrans(sum(notesInd{numVoices}~=3)>(maxNotes-1),:);
247 | %                 curMeansSeed = curMeansSeed(:,sum(notesInd{numVoices}~=3)>(maxNotes-1));
248 | %                 curCovarsSeed = curCovarsSeed(:,:,sum(notesInd{numVoices}~=3)>(maxNotes-1));
249 | %                 mixmat = mixmat(sum(notesInd{numVoices}~=3)>(maxNotes-1));
250 | %                 sState = sState(sum(notesInd{numVoices}~=3)>(maxNotes-1));
251 | %                 notesIndTmp=notesInd{maxNotes}(:,sum(notesInd{numVoices}~=3)>(maxNotes-1));
252 |             elseif i == length(onsetMap)
253 |                 curTrans = curTrans(sum(notesInd{numVoices}<3,1)>(numVoices-1),:);
254 |                 curMeansSeed = curMeansSeed(:,sum(notesInd{numVoices}<3,1)>(numVoices-1));
255 |                 curCovarsSeed = curCovarsSeed(:,:,sum(notesInd{numVoices}<3,1)>(numVoices-1));
256 |                 mixmat = mixmat(sum(notesInd{numVoices}<3,1)>(numVoices-1));
257 |                 sState = sState(sum(notesInd{numVoices}<3,1)>(numVoices-1));
258 |                 states = [1 2];
259 |                 notesIndTmp{i}=notesInd{numVoices}(:,sum(notesInd{numVoices}<3,1)>(numVoices-1));
260 |             else
261 |                 notesIndTmp{i}=notesInd{numVoices};
262 |             end
263 |                  
264 |             like1{i} = mixgauss_prob(obs{i}, curMeansSeed, curCovarsSeed, mixmat,1);
265 |             like1{i}(:,1)=[1; zeros(length(like1{i}(:,end))-1,1)]; 
266 |             like1{i}(:,end)=[zeros(length(like1{i}(:,end))-1,1); 1]; 
267 |             vpath1{i}=viterbi_path(sState, curTrans, like1{i});
268 |     end
269 |     
270 |     % for each note 
271 |     % i is the note
272 |     % b(j) is the voice
273 |     for j = 1 : numVoices
274 |         try
275 |             noteVals{i}{j}=notesIndTmp{i}(j,vpath1{i});
276 |         end
277 |         for m = states
278 |              try
279 |                 notePos{i}{j}(m)=find(noteVals{i}{j}==m,1,'last');
280 |              catch
281 |                  notePos{i}{j}(m)=notePos{i}{j}(m-1);
282 |              end
283 |         end        
284 |     end
285 |     
286 | end
287 | 
288 | 
289 | 
290 | 
291 | % % last note
292 | numVoices=maxNotes;
293 | curTrans = trans{numVoices};
294 | idxEnd=sum(notesInd{numVoices}<3,1)>(numVoices-1);
295 | curTrans = curTrans(idxEnd,idxEnd);
296 | 
297 | curMeansSeed = meansSeed{3}{numVoices}{1};
298 | curMeansSeed = curMeansSeed(:,idxEnd);
299 |             
300 | curCovarsSeed = covarsSeed{3}{numVoices}{1};
301 | curCovarsSeed = curCovarsSeed(:,:,idxEnd);
302 |                 
303 | mixmat = ones(length(curMeansSeed),1); 
304 | %mixmat = mixmat(sum(notesInd{numVoices}<3,1)>(numVoices-1));
305 | 
306 | sState = startingState{numVoices};
307 | sState = sState(1:length(mixmat));
308 |             
309 | states = [1 2];
310 | 
311 | 
312 | lastOffset=length(onsetMap)+1;
313 | notesIndTmp{lastOffset}=notesInd{numVoices}(:,idxEnd);
314 | fpitch{lastOffset}=fpitchAll(:,round((onVals(end)+offset1)*sr/hop):end);
315 | numFrames(lastOffset)=size(fpitch{lastOffset},2);
316 | lengthSignal(lastOffset)=length(audio(max(1,round((onVals(end)+offset1)*sr)):end));
317 | num = 1;
318 | for note = 1 : numVoices
319 |     obs{lastOffset}(num,:)=db(sum(fpitch{lastOffset}(nmat{note}(idxCell{note},4)+harmonics,:)));
320 |     obs{lastOffset}(num+1,:)=db(sum(fpitch{lastOffset}(nmat{note}(idxCell{note},4)+harmonics,:)))
321 |     num = num + 2;
322 | end
323 | 
324 | like1{lastOffset} = mixgauss_prob(obs{lastOffset}, curMeansSeed, curCovarsSeed, mixmat,1);
325 | like1{lastOffset}(:,1)=[1; zeros(length(like1{lastOffset}(:,end))-1,1)]; 
326 | like1{lastOffset}(:,end)=[zeros(length(like1{lastOffset}(:,end))-1,1); 1]; 
327 | vpath1{lastOffset}=viterbi_path(sState, curTrans, like1{lastOffset});
328 | 
329 | for j = 1 : numVoices
330 |         noteVals{lastOffset}{j}=notesIndTmp{lastOffset}(j,vpath1{lastOffset});
331 |     
332 |     for m = states
333 |             notePos{lastOffset}{j}(m)=find(noteVals{lastOffset}{j}==m,1,'last');
334 | %          catch
335 | %              notePos{lastOffset}{j}(m)=notePos{lastOffset}{j}(m-1);
336 | %          end
337 |     end        
338 | end
339 | 
340 | 
341 | 
342 | 
343 | for i = 1 : length(onsetMap)
344 |     for j = find(onsetMap(i,:)): sum(onsetMap(i,:))
345 | %        if onsetMap(i,j) == 1 && sum(onsetMap(i+1:end,j))~=0
346 |             noteSecs{i}{j}=notePos{i}{j}*lengthSignal(i)/numFrames(i)/sr+onVals(i)-offset1;
347 |             if i > 1
348 |                 % this doesn't work
349 |                 estimatedOffs{j}(i-1) = noteSecs{i}{j}(1);
350 |             end
351 |             estimatedOns{j}(i) = noteSecs{i}{j}(2);
352 | %         else
353 | %             estimatedOffs{j}(i)=0;
354 | %             estimatedOns{j}(i)=0;
355 | %        end
356 |     end
357 | end
358 | 
359 | for j = 1 : maxNotes
360 |     noteSecs{lastOffset}{j}=notePos{lastOffset}{j}*lengthSignal(lastOffset)/numFrames(lastOffset)/sr+onVals(end)+offset1;
361 |     estimatedOffs{j}(length(estimatedOns{j}))=noteSecs{lastOffset}{j}(1); 
362 | end


--------------------------------------------------------------------------------
/runPolyAlignment.m~:
--------------------------------------------------------------------------------
  1 | function [estimatedOns estimatedOffs]=runPolyAlignment(audiofile, midifile, meansCovarsMat, voiceType)
  2 | 
  3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | % estimatedOns estimatedOffs]=runPolyAlignment(audiofile, midifile)
  5 | %
  6 | % Description: Main function for runing polyphonic MIDI-audio alignment
  7 | %              An intial DTW alignment is refined to estimate asychroncies 
  8 | %              between notated simultaneities
  9 | %
 10 | % Inputs:
 11 | %  audiofile - audio file file
 12 | %  midifile - midi file
 13 | %  meansCovarsMat - specifies means and covariance matrix to use
 14 | %  voiceType - vector indicating which voice (or instrument) to use for
 15 | %              each musical line
 16 | %
 17 | % Outputs:
 18 | %  estimatedOns - cell array of onset times 
 19 | %  estimatedOffs - cell array of offset times
 20 | %
 21 | % Dependencies:
 22 | %  Ellis, D. P. W. 2003. Dynamic Time Warp (DTW) in Matlab. Available
 23 | %   from: http://www.ee.columbia.edu/~dpwe/resources/matlab/dtw/
 24 | %  Ellis, D. P. W. 2008. Aligning MIDI scores to music audio. Available
 25 | %   from: http://www.ee.columbia.edu/~dpwe/resources/matlab/alignmidiwav/
 26 | %  Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from:
 27 | %   https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials
 28 | %          /miditoolbox/
 29 | %   Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab.
 30 | %    Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html 
 31 | %
 32 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT)
 33 | % http://www.ampact.org
 34 | % (c) copyright 2014 Johanna Devaney (j@devaney.ca), all rights reserved.
 35 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 36 | 
 37 | %%%%%%% if no arguments %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 38 | 
 39 | if nargin < 4
 40 |     voiceType = [2 1 1 1];    
 41 | end
 42 | 
 43 | if nargin < 3
 44 |     meansCovarsMat='polySingingMeansCovars.mat';
 45 | end
 46 | 
 47 | if nargin < 2
 48 |     midifile = 'polyExample.mid';
 49 | end
 50 | 
 51 | if nargin < 1
 52 |     audiofile = 'polyExample.wav';
 53 | end
 54 | 
 55 | 
 56 | 
 57 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 58 | %%%%%%%%% Initial DTW alignment stuff %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 59 | % read MIDI file
 60 | nmatAll=midi2nmat(midifile);
 61 | 
 62 | if min(nmatAll(:,3)) == 0
 63 |     nmatAll(:,3)=nmatAll(:,3)+1;
 64 | end
 65 | 
 66 | for i = sort(unique(nmatAll(:,3)))'    
 67 |     nmat{i} = nmatAll(nmatAll(:,3)==i,:);
 68 | end
 69 | 
 70 | maxNotes=max(nmatAll(:,3));
 71 | 
 72 | %%%%%%%% Initialize HMM variables %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 73 | % needs to be here for calculations in initial DTW alignment
 74 | % starting state for HMM
 75 | 
 76 | for i = 1 : maxNotes
 77 |     startingState{i} = [1; zeros(3^i-1,1)];
 78 | end
 79 | 
 80 | % get transition matrix for HMM
 81 | [notes trans] = genPolyTrans(50, 0, 5);
 82 | for i = 1 : maxNotes
 83 |     notesInd{i} = cat(1, notes{i}{:})';
 84 | end
 85 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 86 | % run DTW alignment using composite midifile
 87 | [align,spec] = runDTWAlignment(audiofile, midifile, 0.025);
 88 | 
 89 | % calculate how many voices change at each transition
 90 | %nmatAll(:,1)=floor(nmatAll(:,1)*1000)/1000;
 91 | [uniqueBeats, idx1, idx2] = unique(onset(nmatAll), 'first');
 92 | uniqueAlignOns = align.nmat(idx1, 1);
 93 | onsetMap = zeros(length(uniqueBeats),maxNotes); 
 94 | for i = 1 : length(uniqueBeats)
 95 |     %num = 1;
 96 |     for j = 1:maxNotes
 97 |         if sum(onset(nmat{j}) == uniqueBeats(i))
 98 |             onsetMap(i,j) = 1;
 99 |         end
100 |         %num = num + 1;
101 |     end
102 | end
103 | 
104 | % create new onset map using alignment values
105 | % THIS IS CURRENTLY ASSUMING THAT THERE ARE NO NOTATED RESTS
106 | for i = 1 : size(onsetMap,1) % number of onsets
107 |     for j = 1 : size(onsetMap,2) % number of voices
108 |         if onsetMap(i,j) == 1, 
109 |             onsMap2(i,j) = uniqueAlignOns(i);
110 |         end
111 |     end 
112 |     lv2(i) = find(onsetMap(i,:), 1, 'first');
113 |     onVals(i)=onsMap2(i,lv2(i));
114 | end
115 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
116 | 
117 | %%%%%%% Audio analysis %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
118 | % set paramters for audio analysis
119 | offset1=0.125;
120 | offset2=0.125;
121 | [audio,sr]=wavread(audiofile);
122 | audio=resample(audio,1,2); 
123 | sr = sr/2;
124 | tuning=estimateTuning(audio);
125 | parameter.winLenSTMSP=441;
126 | parameter.shiftFB = tuning;
127 | 
128 | % create a matrix of the notes in the audio in midi note numbers for each
129 | % transition, as defined by onsetMap
130 | for i = 1 : maxNotes
131 |     idxCell{i}=1;
132 |     pitches{1}(i,3)=nmat{i}(1,4)+tuning;
133 | end
134 | for i = 2 : size(onsetMap,1)
135 |     for j = 1 : maxNotes
136 |         if onsetMap(i,j) == 1
137 |             pitches{i}(j,1)=nmat{j}(idxCell{j},4)+tuning;
138 |             pitches{i}(j,2)=0;
139 |             try
140 |                 pitches{i}(j,3)=nmat{j}(idxCell{j}+1,4)+tuning;
141 |             end
142 |             idxCell{j}=idxCell{j}+1;            
143 |         else
144 |             pitches{i}(j,1)=pitches{i-1}(j,3)+tuning;
145 |             pitches{i}(j,2)=pitches{i-1}(j,3)+tuning;
146 |             try
147 |                 pitches{i}(j,3)=pitches{i-1}(j,3)+tuning;
148 |             end
149 |         end
150 |     end
151 | end
152 | 
153 | % get means and covars for the singing voice
154 | % differentiate for different voices
155 | load(meansCovarsMat)
156 | for i = 1 : size(nmat,2)
157 |     [meansSeed{i} covarsSeed{i} versions]=genMeansCovars(notes, vals{i},voiceType);
158 | end
159 | % set the harmonics that are going to be considered
160 | harmonics=[-1 0 1];
161 | harmonics2=[-1 0 1 12 19 24 28 31 36];
162 | 
163 | % run audio analysis
164 | fpitchAll=audio_to_pitch_via_FB(audio,parameter);
165 | hop = length(audio)/size(fpitchAll,2);
166 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
167 | 
168 | 
169 | %%%%%%% NAME %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
170 | % initialize indexing cell array
171 | for i = 1 : maxNotes
172 |     idxCell{i}=1;
173 | end
174 | for i = 1 : length(onsetMap)
175 | %for i = 2 : length(onsetMap)-1
176 |     numVoices = sum(onsetMap(i,:),2);
177 |     try
178 |         fpitch{i}=fpitchAll(:,round((onVals(i)-offset1)*sr/hop):round((onVals(i)+offset2)*sr/hop));
179 |     catch
180 |         fpitch{i}=fpitchAll(:,max(1,round((onVals(i)-offset1)*sr/hop)):end);
181 |     end
182 |     numFrames(i)=size(fpitch{i},2);
183 |     lengthSignal(i)=length(audio(max(1,round((onVals(i)-offset1)*sr)):max(round((onVals(i)+offset2)*sr),1)));
184 |     [a,b,c]=find(onsetMap(i,:), size(nmat,2));
185 |     num = 1;
186 |     for j = b
187 |         obs{i}(num,:)=db(sum(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics,:)));
188 |         if sum(onsetMap(i+1:end,j))~=0
189 | 
190 |             % db of sum fpitch vals - no harmonics            
191 |             obs{i}(num+1,:)=db(sum(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics,:)));   
192 | 
193 |             % alternative features
194 |             %             % db of mean fpitch vals - no harmonics
195 |             %             db(mean(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics,:)));
196 |             %             db(mean(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics,:)));   
197 |             % 
198 |             %             % db of mean fpitch vals - harmonics
199 |             %             db(mean(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics2,:)));
200 |             %             db(mean(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics2,:)));    
201 |             % 
202 |             %             % db of sum fpitch vals - harmonics
203 |             %             db(sum(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics2,:)));
204 |             %             db(sum(fpitch{i}(nmat{j}(idxCell{j}+1,4)+harmonics2,:)));    
205 | 
206 |             idxCell{j}=idxCell{j}+1;
207 |             
208 |         else
209 |              obs{i}(num+1,:)=db(sum(fpitch{i}(nmat{j}(idxCell{j},4)+harmonics,:)));
210 | %              numVoices = numVoices-1;
211 | %              b = b(b~=j);
212 |         end
213 |         num = num + 2;
214 |     end
215 |     
216 |     if numVoices
217 |         for j = 1 : size(versions{numVoices},1)
218 |             if all(versions{numVoices}(j,:)==b);
219 |                 idx = j;
220 |             end
221 |         end
222 |         
223 |         % get appropriate trans, meansSeed, covarsSeed, and calculate mixmat
224 |             curTrans = trans{numVoices};            
225 | 
226 |             curMeansSeed = meansSeed{3}{numVoices}{idx};
227 |             curCovarsSeed = covarsSeed{3}{numVoices}{idx};
228 |             mixmat = ones(length(curMeansSeed),1); 
229 |             sState = startingState{numVoices};
230 |             states = [1 2 3];
231 | 
232 |             if i == 1
233 |                 
234 |                 curTrans = curTrans(sum(notesInd{numVoices}==1,1)<1,sum(notesInd{numVoices}==1,1)<1);
235 |                 curMeansSeed = curMeansSeed(:,sum(notesInd{numVoices}==1,1)<1);
236 |                 curCovarsSeed = curCovarsSeed(:,:,sum(notesInd{numVoices}==1,1)<1);
237 |                 mixmat = mixmat(sum(notesInd{numVoices}==1,1)<1);
238 |                 sState = sState(sum(notesInd{numVoices}==1,1)<1);
239 |                 sState(1) = 1;
240 |                 notesIndTmp{i}=notesInd{numVoices}(:,sum(notesInd{4}==1,1)<1);
241 |                 states = [2 3];
242 |                 
243 | %                 curTrans = curTrans(sum(notesInd{numVoices}~=3)>(maxNotes-1),:);
244 | %                 curMeansSeed = curMeansSeed(:,sum(notesInd{numVoices}~=3)>(maxNotes-1));
245 | %                 curCovarsSeed = curCovarsSeed(:,:,sum(notesInd{numVoices}~=3)>(maxNotes-1));
246 | %                 mixmat = mixmat(sum(notesInd{numVoices}~=3)>(maxNotes-1));
247 | %                 sState = sState(sum(notesInd{numVoices}~=3)>(maxNotes-1));
248 | %                 notesIndTmp=notesInd{maxNotes}(:,sum(notesInd{numVoices}~=3)>(maxNotes-1));
249 |             elseif i == length(onsetMap)
250 |                 curTrans = curTrans(sum(notesInd{numVoices}<3,1)>(numVoices-1),:);
251 |                 curMeansSeed = curMeansSeed(:,sum(notesInd{numVoices}<3,1)>(numVoices-1));
252 |                 curCovarsSeed = curCovarsSeed(:,:,sum(notesInd{numVoices}<3,1)>(numVoices-1));
253 |                 mixmat = mixmat(sum(notesInd{numVoices}<3,1)>(numVoices-1));
254 |                 sState = sState(sum(notesInd{numVoices}<3,1)>(numVoices-1));
255 |                 states = [1 2];
256 |                 notesIndTmp{i}=notesInd{numVoices}(:,sum(notesInd{numVoices}<3,1)>(numVoices-1));
257 |             else
258 |                 notesIndTmp{i}=notesInd{numVoices};
259 |             end
260 |                  
261 |             like1{i} = mixgauss_prob(obs{i}, curMeansSeed, curCovarsSeed, mixmat,1);
262 |             like1{i}(:,1)=[1; zeros(length(like1{i}(:,end))-1,1)]; 
263 |             like1{i}(:,end)=[zeros(length(like1{i}(:,end))-1,1); 1]; 
264 |             vpath1{i}=viterbi_path(sState, curTrans, like1{i});
265 |     end
266 |     
267 |     % for each note 
268 |     % i is the note
269 |     % b(j) is the voice
270 |     for j = 1 : numVoices
271 |         try
272 |             noteVals{i}{j}=notesIndTmp{i}(j,vpath1{i});
273 |         end
274 |         for m = states
275 |              try
276 |                 notePos{i}{j}(m)=find(noteVals{i}{j}==m,1,'last');
277 |              catch
278 |                  notePos{i}{j}(m)=notePos{i}{j}(m-1);
279 |              end
280 |         end        
281 |     end
282 |     
283 | end
284 | 
285 | 
286 | 
287 | 
288 | % % last note
289 | numVoices=maxNotes;
290 | curTrans = trans{numVoices};
291 | idxEnd=sum(notesInd{numVoices}<3,1)>(numVoices-1);
292 | curTrans = curTrans(idxEnd,idxEnd);
293 | 
294 | curMeansSeed = meansSeed{3}{numVoices}{1};
295 | curMeansSeed = curMeansSeed(:,idxEnd);
296 |             
297 | curCovarsSeed = covarsSeed{3}{numVoices}{1};
298 | curCovarsSeed = curCovarsSeed(:,:,idxEnd);
299 |                 
300 | mixmat = ones(length(curMeansSeed),1); 
301 | %mixmat = mixmat(sum(notesInd{numVoices}<3,1)>(numVoices-1));
302 | 
303 | sState = startingState{numVoices};
304 | sState = sState(1:length(mixmat));
305 |             
306 | states = [1 2];
307 | 
308 | 
309 | lastOffset=length(onsetMap)+1;
310 | notesIndTmp{lastOffset}=notesInd{numVoices}(:,idxEnd);
311 | fpitch{lastOffset}=fpitchAll(:,round((onVals(end)+offset1)*sr/hop):end);
312 | numFrames(lastOffset)=size(fpitch{lastOffset},2);
313 | lengthSignal(lastOffset)=length(audio(max(1,round((onVals(end)+offset1)*sr)):end));
314 | num = 1;
315 | for note = 1 : numVoices
316 |     obs{lastOffset}(num,:)=db(sum(fpitch{lastOffset}(nmat{note}(idxCell{note},4)+harmonics,:)));
317 |     obs{lastOffset}(num+1,:)=db(sum(fpitch{lastOffset}(nmat{note}(idxCell{note},4)+harmonics,:)))
318 |     num = num + 2;
319 | end
320 | 
321 | like1{lastOffset} = mixgauss_prob(obs{lastOffset}, curMeansSeed, curCovarsSeed, mixmat,1);
322 | like1{lastOffset}(:,1)=[1; zeros(length(like1{lastOffset}(:,end))-1,1)]; 
323 | like1{lastOffset}(:,end)=[zeros(length(like1{lastOffset}(:,end))-1,1); 1]; 
324 | vpath1{lastOffset}=viterbi_path(sState, curTrans, like1{lastOffset});
325 | 
326 | for j = 1 : numVoices
327 |         noteVals{lastOffset}{j}=notesIndTmp{lastOffset}(j,vpath1{lastOffset});
328 |     
329 |     for m = states
330 |             notePos{lastOffset}{j}(m)=find(noteVals{lastOffset}{j}==m,1,'last');
331 | %          catch
332 | %              notePos{lastOffset}{j}(m)=notePos{lastOffset}{j}(m-1);
333 | %          end
334 |     end        
335 | end
336 | 
337 | 
338 | 
339 | 
340 | for i = 1 : length(onsetMap)
341 |     for j = find(onsetMap(i,:)): sum(onsetMap(i,:))
342 | %        if onsetMap(i,j) == 1 && sum(onsetMap(i+1:end,j))~=0
343 |             noteSecs{i}{j}=notePos{i}{j}*lengthSignal(i)/numFrames(i)/sr+onVals(i)-offset1;
344 |             if i > 1
345 |                 % this doesn't work
346 |                 estimatedOffs{j}(i-1) = noteSecs{i}{j}(1);
347 |             end
348 |             estimatedOns{j}(i) = noteSecs{i}{j}(2);
349 | %         else
350 | %             estimatedOffs{j}(i)=0;
351 | %             estimatedOns{j}(i)=0;
352 | %        end
353 |     end
354 | end
355 | 
356 | for j = 1 : maxNotes
357 |     noteSecs{lastOffset}{j}=notePos{lastOffset}{j}*lengthSignal(lastOffset)/numFrames(lastOffset)/sr+onVals(end)+offset1;
358 |     estimatedOffs{j}(length(estimatedOns{1}))=noteSecs{lastOffset}{j}(1); 
359 | end
360 | 
361 | return


--------------------------------------------------------------------------------
/selectStates.m:
--------------------------------------------------------------------------------
 1 | function cumsumvals2=selectStates(startingState,prior,...
 2 |     trans,meansFull,covarsFull,mixmat,obs,stateO,noteNum,sr)
 3 | 
 4 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 5 | % [vpath2,histvals2,cumsumvals2]=selectStates(startingState,prior,trans,
 6 | %   meansFull,covarsFull,mixmat,obs,stateO,noteNum,sr)
 7 | %
 8 | % Description: 
 9 | %  Refines the HMM parameters according to the modified state 
10 | %  sequence vector (stateO) passed into the function.
11 | %
12 | % Inputs:
13 | %  startingState - starting state for the HMM
14 | %  prior - prior matrix from DTW alignment
15 | %  trans - transition matrix
16 | %  meansFull - means matrix
17 | %  covarsFull - covariance matrix
18 | %  mixmat - matrix of priors for GMM for each state
19 | %  obs - two row matrix observations (aperiodicty and power)
20 | %  stateO - modified state order sequence
21 | %  noteNum - number of notes to be aligned
22 | %  sr - sampling rate
23 | %
24 | % Outputs: 
25 | %  vpath2 - viterbi path
26 | %  histvals2 - tally of the number of frames in each state
27 | %  cumsumvals2 - ending time of each state in seconds 
28 | %
29 | % Dependencies:
30 | %   Murphy, K. 1998. Hidden Markov Model (HMM) Toolbox for Matlab.
31 | %    Available from http://www.cs.ubc.ca/~murphyk/Software/HMM/hmm.html 
32 | %
33 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
34 | % http://www.ampact.org
35 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca), all rights reserved.
36 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
37 | 
38 | % create new versions the inputted variables based on the state sequence 
39 | % StateO
40 | vec = (stateO + (noteNum - 1)*4);
41 | startingState2 = startingState(vec, :);
42 | prior2 = prior(vec, :);
43 | trans2 = trans(vec, vec);
44 | trans2 = diag(1./sum(trans2,2))*trans2;
45 | meansFull2 = meansFull(:,vec);
46 | covarsFull2 = covarsFull(:,:,vec);
47 | mixmat2 = mixmat(vec,:);
48 | 
49 | % calculate the likelihood and vitiberi path with the new variables
50 | like2 = mixgauss_prob(obs, meansFull2, covarsFull2, mixmat2);
51 | vpath2=viterbi_path(startingState2, trans2, prior2.*like2);
52 | 
53 | % create a vector of the modified alignment times 
54 | histvals2 = hist(vpath2, 1:max(vpath2));
55 | cumsumvals2 = cumsum(histvals2*32/sr);
56 | 


--------------------------------------------------------------------------------
/smoothNote.m:
--------------------------------------------------------------------------------
 1 | function smoothed = smoothNote(x, x_mid, y_mid)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % smoothed = smoothNote(x, x_mid, y_mid)
 5 | %
 6 | % Description: Generate a smoothed trajectory of a note by connecting the
 7 | %              midpoints between peaks and troughs.
 8 | %
 9 | % Inputs:
10 | %  x - inputted signal 
11 | %  x_mid - midpoint locations in x axis between peaks and troughs  
12 | %  y_mid - midpoint locations in y axis between peaks and troughs  
13 | %
14 | % Outputs:
15 | %  smoothed - smoothed version of inputted signal x
16 | %
17 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
18 | % http://www.ampact.org
19 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
20 | %                    (mim@mr-pc.org), all rights reserved
21 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
22 | 
23 | % Make a note the same size as x
24 | smoothed = zeros(size(x));
25 | 
26 | % But only populate it with non-zero elements between the x_mid values
27 | x = min(x_mid) : max(x_mid);
28 | 
29 | % Interpolate the mid points at all of the sample points in the signal
30 | smoothed(x) = interp1(x_mid, y_mid, x);


--------------------------------------------------------------------------------
/visualiser.m:
--------------------------------------------------------------------------------
 1 | function visualiser(trace,mid,spec)
 2 | 
 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | % visualiser(trace,sig,sr,mid,highlight)
 5 | %
 6 | % Description: 
 7 | %  Plots a gross DTW alignment overlaid with the fine alignment
 8 | %  resulting from the HMM aligner on the output of YIN.  Trace(1,:)
 9 | %  is the list of states in the hmm (currently ignored, assumed to
10 | %  be 1,2,3,2,1,2,3,2...), and trace(2,:) is the number of YIN
11 | %  frames for which that state is occupied.  Highlight is a list of 
12 | %  notes for which the steady state will be highlighted.
13 | %
14 | % Inputs:
15 | %  trace - 3-D matrix of a list of states (trace(1,:)), the times   
16 | %          they end at (trace(2,:)), and the state indices (trace(3,:))
17 | %  mid - midi file
18 | %  spec - spectogram of audio file (from alignmidiwav.m)
19 | %
20 | % Dependencies:
21 | %   Toiviainen, P. and T. Eerola. 2006. MIDI Toolbox. Available from:
22 | %     https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials
23 | %      /miditoolbox/
24 | %
25 | % Automatic Music Performance Analysis and Analysis Toolkit (AMPACT) 
26 | % http://www.ampact.org
27 | % (c) copyright 2011 Johanna Devaney (j@devaney.ca) and Michael Mandel
28 | %                    (mim@mr-pc.org), all rights reserved.
29 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
30 | 
31 | % Fix for ending zeros that mess up the plot
32 | if trace(2,end)==0
33 |     trace=trace(:,1:end-1);
34 | end
35 | if trace(2, end-1)==0
36 |     trace(2,end-1)=trace(2,end-2);
37 | end
38 | 
39 | % hop size between frames
40 | stftHop = 0.025;
41 | 
42 | % read midi file
43 | nmat=readmidi(mid);
44 | 
45 | % plot spectogram of audio file
46 | imagesc(20*log10(spec));
47 | title(['Spectrogram with Aligned MIDI Notes Overlaid']); 
48 | xlabel(['Time (.05s)']); 
49 | ylabel(['Midinote']); 
50 | axis xy;
51 | caxis(max(caxis)+[-50 0])
52 | colormap(1-gray)
53 | 
54 | % zoom in fundamental frequencies
55 | notes = nmat(:,4)';
56 | notes = (2.^((notes-105)/12))*440;
57 | notes(end+1) = notes(end);
58 | nlim = length(notes);
59 | 
60 | % plot alignment
61 | plotFineAlign(trace(1,:), trace(2,:), notes(1:nlim), stftHop);
62 | if size(trace,1) >= 3
63 |     notenums = trace(3,2:end);
64 | else
65 |     nlim = length(notes);
66 |     notenums = [reshape(repmat(1:nlim,4,1),1,[]) nlim];
67 | end
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------