├── trace.h
├── test
    ├── cde.gro
    ├── cde.mid
    ├── cde-11khz.wav
    └── save.sh
├── changelog.txt
├── gen_chroma.h
├── sautils.cpp
├── sautils.h
├── audiofilereader.h
├── main.h
├── trace.cpp
├── comp_chroma.h
├── alignfiles.h
├── regression.cpp
├── score-align.sln
├── audioreader.h
├── regression.h
├── compare_transcripts
    ├── compare_transcripts.sln
    └── compare.cpp
├── hillclimb.h
├── license.txt
├── curvefit.h
├── comp_chroma.cpp
├── audiofilereader.cpp
├── Makefile.osx
├── Makefile.linux
├── alignfiles.cpp
├── audioreader.cpp
├── fft3
    ├── FFT3.h
    └── FFT3.cpp
├── scorealign.h
├── compare_transcripts.vcproj
├── hillclimb.cpp
├── README.txt
├── curvefit.cpp
├── score-align.vcproj
├── main.cpp
├── gen_chroma.cpp
├── scorealign.xcodeproj
    └── project.pbxproj
└── scorealign.cpp


/trace.h:
--------------------------------------------------------------------------------
1 | void trace(char *format, ...);
2 | 
3 | 


--------------------------------------------------------------------------------
/test/cde.gro:
--------------------------------------------------------------------------------
1 | T0 -tempor:120.0
2 | c4 q
3 | d4
4 | e4
5 | 


--------------------------------------------------------------------------------
/test/cde.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cataska/scorealign/HEAD/test/cde.mid


--------------------------------------------------------------------------------
/test/cde-11khz.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cataska/scorealign/HEAD/test/cde-11khz.wav


--------------------------------------------------------------------------------
/changelog.txt:
--------------------------------------------------------------------------------
1 | Change Log for scorealign
2 | 
3 | 03-Jul-2008 RBD modified to use latest changes to portsmf
4 |                 added this file and license.txt
5 | 
6 | 


--------------------------------------------------------------------------------
/gen_chroma.h:
--------------------------------------------------------------------------------
1 | #define CHROMA_BIN_COUNT 12
2 | 
3 | bool is_midi_file(char *filename);
4 | 
5 | #define AREF2(chrom_energy, row, column) \
6 |     (chrom_energy[row * (CHROMA_BIN_COUNT + 1) + column])
7 | 


--------------------------------------------------------------------------------
/test/save.sh:
--------------------------------------------------------------------------------
1 | mv path.data $1.path.data
2 | mv smooth.data $1.smooth.data
3 | mv beatmap.txt $1.beatmap.txt
4 | mv debug-log.txt $1.debug-log.txt
5 | mv midi.mid $1.midi.mid
6 | mv transcription.txt $1.transcription.txt
7 | echo "moved outputs to backup files prefixed with $1."
8 | 
9 |  


--------------------------------------------------------------------------------
/sautils.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  sautils.cpp
 3 |  *  scorealign
 4 |  *
 5 |  *  Created by Roger Dannenberg on 10/20/07.
 6 |  *  Copyright 2007 __MyCompanyName__. All rights reserved.
 7 |  *
 8 |  */
 9 | 
10 | #include "sautils.h"
11 | 
12 | double interpolate(double x1, double y1, double x2, double y2, double x)
13 | {
14 |     return y1 + (y2 - y1) * (x - x1) / (x2 - x1);
15 | }
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/sautils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  sautils.h
 3 |  *  scorealign
 4 |  *
 5 |  *  Created by Roger Dannenberg on 10/20/07.
 6 |  *  Copyright 2007 by Roger B. Dannenberg. All rights reserved.
 7 |  *
 8 |  */
 9 | 
10 | #define ALLOC(t, n) (t *) malloc(sizeof(t) * (n))
11 | 
12 | #define ROUND(x) ((int) (0.5 + (x)))
13 | 
14 | double interpolate(double x1, double y1, double x2, double y2, double x);
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/audiofilereader.h:
--------------------------------------------------------------------------------
 1 | #define MAX_NAME_LEN 255
 2 | 
 3 | class Audio_file_reader : public Audio_reader {
 4 |  public:
 5 |     virtual long read(float *data, long n);
 6 |     SNDFILE *sf;
 7 |     SF_INFO sf_info;
 8 |     char name[MAX_NAME_LEN + 1];
 9 |     int bytes_per_frame;
10 |     long total_frames;
11 |     bool open(char *filename, Scorealign &sa, bool verbose);
12 |     void close();
13 |     double get_sample_rate();
14 |     long get_frames();
15 |     void print_info();
16 | };
17 | 
18 | 


--------------------------------------------------------------------------------
/main.h:
--------------------------------------------------------------------------------
 1 | /* main.h -- declarations of some command-line functions
 2 |  *
 3 |  * If VERBOSE is on in some files, some print functions are called.
 4 |  * Since these are only appropriate for the command-line interface,
 5 |  * there are some print functions declared in main.cpp. main.h
 6 |  * declares these functions for use in scorealign.cpp (and maybe others)
 7 |  *
 8 |  * 14-Jul-08  RBD
 9 |  */
10 | 
11 | void print_path_range(short *pathx, short *pathy, int i, int j);
12 | void print_chroma_table(float *chrom_energy, int frames);
13 | 


--------------------------------------------------------------------------------
/trace.cpp:
--------------------------------------------------------------------------------
 1 | #include "stdarg.h"
 2 | #include "stdio.h"
 3 | 
 4 | #ifdef __linux__
 5 | #define _vsnprintf vsnprintf
 6 | #elif defined(__MACH__)
 7 | #define _vsnprintf vsnprintf
 8 | #else
 9 | #include "crtdbg.h"
10 | #endif
11 | 
12 | void trace(char *format, ...)
13 | {
14 |     char msg[256];
15 |     va_list args;
16 |     va_start(args, format);
17 |     _vsnprintf(msg, 256, format, args);
18 |     va_end(args);
19 | 
20 | #if defined(_DEBUG) && !defined(__linux__)
21 |     _CrtDbgReport(_CRT_WARN, NULL, NULL, NULL, msg);
22 | #else
23 |     printf(msg);
24 | #endif
25 | }
26 | 


--------------------------------------------------------------------------------
/comp_chroma.h:
--------------------------------------------------------------------------------
 1 | #include	<stdio.h>
 2 | #include	<stdlib.h>
 3 | #include	<string.h>
 4 | #include	<ctype.h>
 5 | #include	<math.h>
 6 | 
 7 | #define SILENCE_THRESHOLD 0.001
 8 | #define SILENCE_DISTANCE 16.0
 9 | 
10 | /*				NORM_CHROMA
11 |  *
12 |  * This function normalizes the chroma for each frame of the
13 |  * chrom_energy to mean 0 and std. dev. 1.
14 |  */
15 | void norm_chroma( int len, float *chrom_energy );
16 | 
17 | /*				GEN_DIST
18 |  *
19 |  * This function generates the Euclidean distance for points i
20 |  * and j in two chroma vectors for use with dynamic time warping of 
21 |  * the chroma vectors.
22 |  */
23 | float gen_dist(int i, int j, float *chrom_energy1, 
24 | 		       float *chrom_energy2 );
25 | 


--------------------------------------------------------------------------------
/alignfiles.h:
--------------------------------------------------------------------------------
 1 | /* alignfiles.h -- perform alignment given filenames
 2 |  *
 3 |  * this module is an intermediate between the command-line interface 
 4 |  * main.cpp and the alignment code in scorealign.cpp. The scorealign.cpp
 5 |  * module is supposed to work on data from any source, e.g. it could be
 6 |  * a file, or it could be an object that sucks samples out of an
 7 |  * Audacity wave track. This module is supposed to not assume a command
 8 |  * line, type-script based interface, but *does* assume that you want
 9 |  * to read data from files, so you pass filenames into this module and
10 |  * it reads the files and calls scorealign.cpp to do the alignment work.
11 |  *
12 |  * 14-Jul-08  RBD
13 |  */
14 | 
15 | bool align_files(char *infilename1, char *infilename2, 
16 |                  Scorealign &sa, bool verbose);
17 | 
18 | bool is_midi_file(char *filename);
19 | 


--------------------------------------------------------------------------------
/regression.cpp:
--------------------------------------------------------------------------------
 1 | // Regression is a class to compute a linear regression
 2 | // 
 3 | #include "regression.h"
 4 | 
 5 | 
 6 | Regression::Regression()
 7 | {
 8 |     init();
 9 | }
10 | 
11 | void Regression::init()
12 | {
13 |     sumxx = 0;
14 |     sumyy = 0;
15 |     sumxy = 0;
16 |     sumx = 0;
17 |     sumy = 0;
18 |     n = 0;
19 | }
20 | 
21 | void Regression::point(float x, float y)
22 | {
23 |     sumx = sumx + x;
24 |     sumy = sumy + y;
25 |     sumxx = sumxx + x * x;
26 |     sumyy = sumyy + y * y;
27 |     sumxy = sumxy + x * y;
28 |     n = n + 1;
29 | }
30 | 
31 | void Regression::regress()
32 | {
33 |     float sxx = sumxx - sumx * sumx / n;
34 |     float sxy = sumxy - sumx * sumy / n;
35 |     b = sxy / sxx;
36 |     a = (sumy - b * sumx) / n;
37 | }
38 | 
39 | 
40 | float Regression::f(float x)
41 | {
42 |     return a + b * x;
43 | }
44 | 
45 | 
46 | float Regression::f_inv(float y)
47 | {
48 |     return (y - a) / b;
49 | }
50 | 


--------------------------------------------------------------------------------
/score-align.sln:
--------------------------------------------------------------------------------
 1 | 
 2 | Microsoft Visual Studio Solution File, Format Version 9.00
 3 | # Visual C++ Express 2005
 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "score-align", "score-align.vcproj", "{5F04DE5D-0A34-496E-8A34-BE30BA9C70A1}"
 5 | EndProject
 6 | Global
 7 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 8 | 		Debug|Win32 = Debug|Win32
 9 | 		Release|Win32 = Release|Win32
10 | 	EndGlobalSection
11 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
12 | 		{5F04DE5D-0A34-496E-8A34-BE30BA9C70A1}.Debug|Win32.ActiveCfg = Debug|Win32
13 | 		{5F04DE5D-0A34-496E-8A34-BE30BA9C70A1}.Debug|Win32.Build.0 = Debug|Win32
14 | 		{5F04DE5D-0A34-496E-8A34-BE30BA9C70A1}.Release|Win32.ActiveCfg = Release|Win32
15 | 		{5F04DE5D-0A34-496E-8A34-BE30BA9C70A1}.Release|Win32.Build.0 = Release|Win32
16 | 	EndGlobalSection
17 | 	GlobalSection(SolutionProperties) = preSolution
18 | 		HideSolutionNode = FALSE
19 | 	EndGlobalSection
20 | EndGlobal
21 | 


--------------------------------------------------------------------------------
/audioreader.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <stdlib.h>
 3 | 
 4 | class Scorealign;
 5 | 
 6 | class Audio_reader {
 7 |  public:
 8 |     long samples_per_frame;
 9 |     long hop_samples;
10 |     double actual_frame_period;
11 |     long frame_count; // number of chroma vectors (analysis windows)
12 |     virtual void print_info() = 0;
13 |     long read_window(float *data);
14 |     virtual long read(float *data, long n) = 0;
15 |     virtual double get_sample_rate() = 0;
16 |     virtual long get_frames() = 0; // returns frames of input audio 
17 |     // i.e. (samples/channels)
18 |     void calculate_parameters(Scorealign &sa, bool verbose);
19 |     Audio_reader() {
20 |         reading_first_window = true;
21 |         reading_last_window = false;
22 |         temp_data = NULL;
23 |     }
24 |     ~Audio_reader() {
25 |         if (temp_data) free(temp_data);
26 |     }
27 |  protected:
28 |     bool reading_first_window;
29 |     bool reading_last_window;
30 |     float *temp_data;
31 | };
32 | 
33 | 


--------------------------------------------------------------------------------
/regression.h:
--------------------------------------------------------------------------------
 1 | // Regression is a class to compute a linear regression
 2 | // 
 3 | // call point(x, y) to add a data point
 4 | // call regress() to compute the regression
 5 | // call f(x) to evaluate the linear regression at x
 6 | // call f_inv(y) to evaluate the inverse linear regression at y
 7 | // to restart with new points, call init()
 8 | // regress() can be called after each point()
 9 | //
10 | // other forms of regression should be added. This one does
11 | // standard least squares regression
12 | 
13 | class Regression {
14 |     float sumxx; // sum of x^2
15 |     float sumyy; // sum of y^2
16 |     float sumxy; // sum of xy
17 |     float sumx;  // sum of x
18 |     float sumy;  // sum of y
19 |     int n;       // number of points
20 |     float a, b;  // regression line is a + b*x
21 |  public:
22 |     Regression();
23 |     void init();
24 |     void point(float x, float y);
25 |     void regress();
26 |     float f(float x);
27 |     float f_inv(float y);
28 | };
29 | 
30 | 


--------------------------------------------------------------------------------
/compare_transcripts/compare_transcripts.sln:
--------------------------------------------------------------------------------
 1 | 
 2 | Microsoft Visual Studio Solution File, Format Version 9.00
 3 | # Visual Studio 2005
 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "compare_transcripts", "compare_transcripts.vcproj", "{C7876B12-4C66-4C26-9318-3363CD9A3FE9}"
 5 | EndProject
 6 | Global
 7 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 8 | 		Debug|Win32 = Debug|Win32
 9 | 		Release|Win32 = Release|Win32
10 | 	EndGlobalSection
11 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
12 | 		{C7876B12-4C66-4C26-9318-3363CD9A3FE9}.Debug|Win32.ActiveCfg = Debug|Win32
13 | 		{C7876B12-4C66-4C26-9318-3363CD9A3FE9}.Debug|Win32.Build.0 = Debug|Win32
14 | 		{C7876B12-4C66-4C26-9318-3363CD9A3FE9}.Release|Win32.ActiveCfg = Release|Win32
15 | 		{C7876B12-4C66-4C26-9318-3363CD9A3FE9}.Release|Win32.Build.0 = Release|Win32
16 | 	EndGlobalSection
17 | 	GlobalSection(SolutionProperties) = preSolution
18 | 		HideSolutionNode = FALSE
19 | 	EndGlobalSection
20 | EndGlobal
21 | 


--------------------------------------------------------------------------------
/hillclimb.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  hillclimb.h
 3 |  *  scorealign
 4 |  *
 5 |  *  Created by Roger Dannenberg on 10/20/07.
 6 |  *  Copyright 2007 by Roger B. Dannenberg. All rights reserved.
 7 |  *
 8 |  * USAGE:
 9 |  *    Subclass and define evaluate, a function of n parameters.
10 |  * Set parameters, step_size, min, max, and n. Parameters must
11 |  * be near a local maximum and step_size defines the grid that
12 |  * is searched. Search will not go outside of min and max.
13 |  * Call optimize()
14 |  * retrieve optimized parameter values with get_parameters.
15 |  * 
16 |  */
17 | 
18 | class Hillclimb {
19 | protected:
20 |     double *parameters; // parameters to optimize
21 |     double *step_size;  // step size for each parameter (these are 
22 |         // provided by the user and remain fixed)
23 |     double *min_param; // minimum parameter values
24 |     double *max_param; // maximum parameter values
25 |     int n; // number of parameters
26 | public:
27 |     void set_parameters(double *parameters_, double *step_size_, 
28 |                         double *min_, double *max_, int n_);
29 |     // retrieve parameters after optimization:
30 |     double *get_parameters() { return parameters; }
31 |     virtual double evaluate() = 0;
32 |     double optimize();
33 | };
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/compare_transcripts/compare.cpp:
--------------------------------------------------------------------------------
 1 | #include "stdio.h"
 2 | #include "stdlib.h"
 3 | #include "string.h"
 4 | #include "math.h"
 5 | 
 6 | bool read_stuff(FILE *f, int &p, float &start, float &dur)
 7 | {
 8 | 	int n, chan, vel;
 9 | 	int c;
10 | 	while ((c = getc(f)) == '#') {
11 | 		while ((c = getc(f)) != '\n' && c != EOF) ;
12 | 	}
13 | 	ungetc(c, f);
14 | 	int fields = fscanf(f, "%d %d %d %d %f %f", &n, &chan, &p, &vel, &start, &dur);
15 | 	if (fields == EOF) {
16 | 		return false;
17 | 	} else if (fields != 6) {
18 | 		printf("Error scanning file\n");
19 | 		exit(1);
20 | 	}
21 | 	while ((c = getc(f)) != '\n' && c != EOF) ;
22 | 	return true;
23 | }
24 | 
25 | 
26 | void print_usage(char *progname)
27 | {
28 | 	printf("%s file1 file2\n", progname);
29 | }
30 | 
31 | 
32 | int main(int argc, char *argv[])
33 | {
34 | 	char *file1;
35 | 	char *file2;
36 | 	char *progname = strrchr(argv[0], '/');
37 | 	progname = progname ? progname + 1 : argv[0];
38 | 	if (argc < 3) {
39 | 		print_usage(progname);
40 | 		return 1;
41 | 	}
42 | 	file1 = argv[1];
43 | 	file2 = argv[2];
44 | 	FILE *f1 = fopen(file1, "r");
45 | 	FILE *f2 = fopen(file2, "r");
46 | 	int count = 0;
47 | 	float sum = 0.0;
48 | 	float sumsqr = 0.0;
49 | 	while (true) {
50 | 		int p1, p2;
51 | 		float start1, start2;
52 | 		float dur1, dur2;
53 | 		bool ok1 = read_stuff(f1, p1, start1, dur1);
54 | 		bool ok2 = read_stuff(f2, p2, start2, dur2);
55 | 		if (ok1 != ok2 || p1 != p2) {
56 | 			printf("Transcripts are not compatible\n");
57 | 			exit(1);
58 | 		}
59 | 		if (!ok1) break;
60 | 		count++;
61 | 		float diff = start2 - start1;
62 | 		sum += diff;
63 | 		sumsqr += diff * diff;
64 | 	}
65 | 	float avg = sum / count;
66 | 	float stddev = sqrt((sumsqr / count) - (avg * avg));
67 | 	printf("average error = %g\nstandard deviation = %g\n", avg, stddev);
68 | 
69 | 	return 0;
70 | }


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * scorealign: audio/score alignment program
 3 |  *
 4 |  * license.txt -- a copy of the scorealign copyright notice and license 
 5 |  *                information
 6 |  *
 7 |  * Latest version available at: http://sourceforge.net/projects/portmedia
 8 |  *
 9 |  * Copyright (c) 1999-2000 Ross Bencina and Phil Burk
10 |  * Copyright (c) 2001-2006 Roger B. Dannenberg
11 |  *
12 |  * Permission is hereby granted, free of charge, to any person obtaining
13 |  * a copy of this software and associated documentation files
14 |  * (the "Software"), to deal in the Software without restriction,
15 |  * including without limitation the rights to use, copy, modify, merge,
16 |  * publish, distribute, sublicense, and/or sell copies of the Software,
17 |  * and to permit persons to whom the Software is furnished to do so,
18 |  * subject to the following conditions:
19 |  *
20 |  * The above copyright notice and this permission notice shall be
21 |  * included in all copies or substantial portions of the Software.
22 |  *
23 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
26 |  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
27 |  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
28 |  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
29 |  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 |  */
31 | 
32 | /*
33 |  * The text above constitutes the entire Portsmf license; however, 
34 |  * the PortMusic community also makes the following non-binding requests:
35 |  *
36 |  * Any person wishing to distribute modifications to the Software is
37 |  * requested to send the modifications to the original developer so that
38 |  * they can be incorporated into the canonical version. It is also
39 |  * requested that these non-binding requests be included along with the 
40 |  * license above.
41 |  */
42 | 


--------------------------------------------------------------------------------
/curvefit.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  curvefit.h
 3 |  *  scorealign
 4 |  *
 5 |  *  Created by Roger B. Dannenberg on 10/20/07.
 6 |  *  Copyright 2007 by Roger B. Dannenberg. All rights reserved.
 7 |  *
 8 |  * Dynamic programming does a good job of getting a rough alignment
 9 |  * that is very good in a global sense, but there are often short-term
10 |  * "digressions" where the optimal path wanders off the "true" path.
11 |  * These digressions are hard to correct with simple smoothing. This
12 |  * module is intended to assert a "steady tempo" constraint to improve
13 |  * the path. It starts with the dynamic programming path, which is likely
14 |  * to be close to the correct path. The DP path (in pathx[] and pathy[])
15 |  * is divided evenly into segments of approximately line_time seconds
16 |  * along the x axis. For a segment from x1 to x2, linear regression is 
17 |  * performed on the DP path from x1 to x2. This specifies an initial
18 |  * line segment. Next, the end-points are joined by averaging: if
19 |  * the segment from x1 to x2 ends at y-end and the segment from x2 to x3
20 |  * starts at y-start, then the end of line x1--x2 and the beginning of
21 |  * line x2--x3 are adjusted to (y-end + y-start)/2. Now the fun starts:
22 |  * the endpoints of all the lines are adjusted up and down in order to 
23 |  * minimize a distance function. The distance function estimates the
24 |  * integral of the distance matrix value along the line. Since the line
25 |  * falls between discrete points in the matrix, interpolation is used.
26 |  * The end result is converted back into a discrete path. (Maybe in the 
27 |  * future, the pathx[]/pathy[] representation should be generalized to
28 |  * allow for non-integer path coordinates.) The resulting path will
29 |  * have steady tempo at least within each segment. What I hope will
30 |  * happen is that when there are chord changes or melody changes, there
31 |  * will be "narrow" pathways in the distance matrix. Getting the 
32 |  * alignment wrong at these transitions will cost a lot. Other places
33 |  * are not so critical, which is why I think DP wanders off the true
34 |  * path. The straight-line path will ensure that for the most part, the
35 |  * score alignment is determined by the transitions, and where alignment
36 |  * is not critical, the alignment will avoid any rubato or over-fitting.
37 |  */
38 | 
39 | void curve_fitting(Scorealign *sa, bool verbose);
40 | 
41 | 


--------------------------------------------------------------------------------
/comp_chroma.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <fstream>
 3 | #include "allegro.h"
 4 | #include "audioreader.h"
 5 | #include "gen_chroma.h"
 6 | #include  "comp_chroma.h"
 7 | 
 8 | using namespace std;
 9 | 
10 | /*				NORM_CHROMA
11 |  *
12 |  * This function normalizes the chroma for each frame of the
13 |  * chrom_energy to mean 0 and std. dev. 1. But if this is a
14 |  * "silent frame", set the 13th element to 1.
15 |  */
16 | void norm_chroma( int len, float *chrom_energy ) {
17 | 
18 |   float avg = 0;
19 |   float dev = 0;
20 |   float sum = 0;
21 | 
22 |   for( int i = 0; i < len; i++ ) {
23 | 
24 |     /* Calculate avg for this frame */
25 |     sum = 0;
26 |     for ( int j = 0; j < 12; j++ )
27 |         sum += AREF2(chrom_energy, i, j);
28 |     avg = sum / 12.0;
29 | 
30 | 	/* Silence detection: */ 
31 | 	float silence = 0.0F;
32 | 	if (avg < SILENCE_THRESHOLD) { /* assume silent */
33 | 		silence = 1.0F;
34 | 	}
35 |     AREF2(chrom_energy, i, 12) = silence;
36 | 	
37 | 	// printf("avg at %g: %g\n", i * 0.25, avg);
38 | 
39 |     /* Normalize this frame to avg. 0 */
40 |     for ( int j = 0; j < 12; j++ )
41 |         AREF2(chrom_energy, i, j) -= avg;
42 | 
43 |     /* Calculate std. dev. for this frame */
44 |     sum = 0;
45 |     for ( int j = 0; j < 12; j++ ) {
46 |         float x = AREF2(chrom_energy, i, j);
47 |         sum += x * x;
48 |     }
49 |     dev = sqrt( sum / 12.0 );
50 | 	if (dev == 0.0) dev = 1.0F; /* don't divide by zero */
51 | 
52 |     /* Normalize this frame to std. dev. 1*/
53 |     for ( int j = 0; j < 12; j++ )
54 |         AREF2(chrom_energy, i, j) /= dev;
55 |   }
56 | }
57 | 
58 | /* Returns the minimum of two values */
59 | double min2( double x, double y ) {
60 |     return (x < y ? x : y);
61 | }
62 | 
63 | /*				GEN_DIST
64 |  *
65 |  * This function generates the Euclidean distance for points i
66 |  * and j in two chroma vectors for use with dynamic time warping of 
67 |  * the chroma vectors.
68 |  */
69 | float gen_dist( int i, int j, float *chrom_energy1, 
70 | 	       float *chrom_energy2 ) {
71 | 
72 |   float sum = 0;
73 |   float MAX = 12.0;
74 | 
75 |   if (AREF2(chrom_energy1, i, CHROMA_BIN_COUNT) !=
76 | 	  AREF2(chrom_energy2, j, CHROMA_BIN_COUNT)) {
77 |       //printf("gd%g ", SILENCE_DISTANCE); // print result
78 |       return SILENCE_DISTANCE;
79 |   }
80 |   /* Determine the distance between these vectors 
81 |      chroma1[i] and chroma2[j] to return */
82 |   for (int k = 0; k < 12; k++) {
83 |       float x = AREF2(chrom_energy1, i, k);
84 |       float y = AREF2(chrom_energy2, j, k);
85 |       float diff = x - y;
86 | 
87 |       sum += diff*diff ;
88 |   }
89 |   sum = min2( sqrt( sum ), MAX );
90 |   //printf("gd%g ", sum); // print the result
91 |   return sum;
92 | }
93 | 


--------------------------------------------------------------------------------
/audiofilereader.cpp:
--------------------------------------------------------------------------------
 1 | /* audiofilereader.cpp -- implements a class to read samples
 2 |  *
 3 |  * 14-Jun-08  RBD
 4 |  * 16-Jun-08  RBD revised to use libsndfile
 5 |  */
 6 | #include "assert.h"
 7 | #include "stdio.h"
 8 | #include "string.h"
 9 | #include "sndfile.h"
10 | #include "audioreader.h"
11 | #include "audiofilereader.h"
12 | 
13 | double Audio_file_reader::get_sample_rate()
14 | {
15 |     return sf_info.samplerate;
16 | }
17 | 
18 | 
19 | long Audio_file_reader::get_frames()
20 | {
21 |     return total_frames;
22 | }
23 | 
24 | 
25 | long Audio_file_reader::read(float *data, long n)
26 | {
27 |     // note that "samples_per_frame" is really "frames_per_window" in this
28 |     // context, so we're computing bytes per window
29 |     float *input_data = (float *) alloca(bytes_per_frame * samples_per_frame);
30 |     assert(input_data != NULL) ;
31 | 	
32 |     // read into the end of data
33 |     long frames_read = sf_readf_float(sf, input_data, n);
34 |     long chans = sf_info.channels;
35 |     // now convert and move to beginning of data
36 |     if (chans > 1) {
37 |         for (int frame = 0; frame < frames_read; frame++) {
38 |             float sum = 0;
39 |             for (int chan = 0; chan < sf_info.channels; chan++) {
40 |                 // sum over channels within a frame
41 |                 sum += input_data[frame * chans + chan];
42 |             }
43 |             // write the frame sum to result array
44 |             data[frame] = sum;
45 |         }
46 |     }
47 |     return frames_read;
48 | }
49 | 
50 | 
51 | bool Audio_file_reader::open(char *filename, Scorealign &sa, bool verbose)
52 | {
53 |     bytes_per_frame = 0; // initialize now in case an error occurs
54 |     name[0] = 0;
55 |     bzero(&sf_info, sizeof(sf_info));
56 |     sf = sf_open(filename, SFM_READ, &sf_info);
57 |     if (!sf) return false;
58 |     strncpy(name, filename, MAX_NAME_LEN);
59 |     name[MAX_NAME_LEN] = 0; // just in case
60 |     total_frames = (long) sf_seek(sf, 0, SEEK_END);
61 |     sf_seek(sf, 0, SEEK_SET);
62 |     // we're going to read floats, but they might be multi-channel...
63 |     bytes_per_frame = sf_info.channels * sizeof(float);
64 |     calculate_parameters(sa, verbose);
65 |     return true;
66 | }
67 | 
68 | 
69 | void Audio_file_reader::close()
70 | {
71 |     sf_close(sf);
72 | }
73 | 
74 | 
75 | void Audio_file_reader::print_info()
76 | {
77 |     printf("   file name = %s\n", name);
78 |     double sample_rate = sf_info.samplerate;
79 |     printf("   sample rate = %g\n", sample_rate);
80 |     printf("   channels = %d\n", sf_info.channels);
81 |     /*=============================================================*/
82 |     printf("   total frames number is = %d\n", total_frames);
83 |     printf("   audio duration = %g seconds\n", total_frames / sample_rate);
84 |     /*=============================================================*/
85 | }
86 | 


--------------------------------------------------------------------------------
/Makefile.osx:
--------------------------------------------------------------------------------
  1 | # OS X Makefile for scorealign
  2 | # NOTE: This is out of date. See the score-align.vcproj -OR- Makefile.linux
  3 | 
  4 | # profile = -pg
  5 | profile = 
  6 | 
  7 | DEBUGFLAGS = 
  8 | 
  9 | #optimize = -g $(DEBUGFLAGS) $(profile)
 10 | optimize = -O
 11 | 
 12 | #PORTSMF LIBRARY PATH:
 13 | # (set this to the location of allegro.cpp, allegrosmfrd.cpp, allegrosmfwr.cpp, etc.)
 14 | # (if you do not have this directory, look for portsmf in the portmedia project
 15 | #  on sourceforge)
 16 | PORTSMF = ../portsmf
 17 | 
 18 | INCLUDES = -Isnd -Ifft3 -I$(PORTSMF)
 19 | 
 20 | #OPTIONS FOR LINUX:
 21 | c_opts = $(optimize) -DLINUX $(INCLUDES)
 22 | 
 23 | cc = gcc $(c_opts)
 24 | c++ = g++ $(c_opts)
 25 | cclink = gcc
 26 | c++link = g++
 27 | syslibs = -lm
 28 | 
 29 | scorealign_o = comp_chroma.o gen_chroma.o scorealign.o snd/audionone.o snd/sndcvt.o snd/sndheader.o snd/sndio.o snd/snd.o snd/ieeecvt.o snd/sndlinux.o snd/sndfaillinux.o fft3/FFT.o mfmidi.o $(PORTSMF)/allegro.o $(PORTSMF)/allegrosmfrd.o $(PORTSMF)/allegrosmfwr.o $(PORTSMF)/allegrord.o $(PORTSMF)/strparse.o trace.o regression.o
 30 | 
 31 | .SUFFIXES: .cpp .c .h .aur .o
 32 | 
 33 | .cpp.o:
 34 | 	$(c++) -c $< -o $*.o
 35 | 
 36 | .c.o:
 37 | 	$(cc) -c $< -o $*.o
 38 | 
 39 | all: scorealign
 40 | 
 41 | header:
 42 | 	echo "**********************************************************"
 43 | 	echo "**********************************************************"
 44 | 
 45 | clean: 
 46 | 	echo "removing objects"
 47 | 	find . -name "*~" | xargs rm -f
 48 | 	rm -f $(scorealign_o)
 49 | 	rm -f core*
 50 | 
 51 | cleaner: clean
 52 | 	rm -f TAGS
 53 | 
 54 | cleanest: cleaner
 55 | 	rm -f scorealign
 56 | 	rm -f scorealign.ncb
 57 | 	rm -f scorealign.opt
 58 | 	rm -f scorealign.plg
 59 | 
 60 | TAGS: 
 61 | 	find . \( -name "*.cpp" -o -name "*.h" \) -print | etags -
 62 | 
 63 | tags: TAGS
 64 | 
 65 | backup: cleanest
 66 | 	tar cvf ../scorealign.tar .
 67 | 	gzip ../scorealign.tar
 68 | 	echo "please rename ../scorealign.tar.gz"
 69 | 
 70 | 
 71 | scorealign: $(scorealign_o)
 72 | 	$(c++link) $(scorealign_o) -o scorealign $(syslibs)
 73 | 
 74 | test0:
 75 | 	./scorealign /home/rbd/wdh/music/nyquist-temp.wav /home/rbd/wdh/music/chrom_nyquist-temp.wav > output.txt
 76 | 	diff output.txt /home/rbd/wdh/music/output.txt | less
 77 | 
 78 | test:
 79 | 	./scorealign /home/rbd/scorealign/data/live-beeth-s5m1-25s-mono.wav  /home/rbd/scorealign/data/midi-beeth-s5m1-25s-mono.wav > output.txt
 80 | 	diff output.txt /home/rbd/wdh/music/output.txt | less
 81 | 
 82 | bigtest:
 83 | 	./scorealign data/live-Beethoven_Symphony_No5_Mvt1.wav data/midi-Beethoven_Symphony_No5_Mvt1.wav
 84 | 
 85 | bigtest2: 
 86 | 	./scorealign data/midi-Beethoven_Symphony_No5_Mvt1.wav data/live-Beethoven_Symphony_No5_Mvt1.wav
 87 | 
 88 | 
 89 | # DEPENDENCIES
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/Makefile.linux:
--------------------------------------------------------------------------------
  1 | # Linux makefile for scorealign
  2 | # please use either make -f Makfile.linux
  3 | # -OR- ln -s Makfile.linux Makefile
  4 | 
  5 | # profile = -pg
  6 | profile = 
  7 | 
  8 | DEBUGFLAGS = 
  9 | 
 10 | optimize = -g $(DEBUGFLAGS) $(profile)
 11 | # optimize = -O
 12 | 
 13 | #PORTSMF LIBRARY PATH:
 14 | # (set this to the location of allegro.cpp, allegrosmfrd.cpp, allegrosmfwr.cpp, etc.)
 15 | # (if you do not have this directory, look for portsmf in the portmedia project
 16 | #  on sourceforge)
 17 | PORTSMF = ../portsmf
 18 | 
 19 | INCLUDES = -Isnd -Ifft3 -I$(PORTSMF)
 20 | 
 21 | #OPTIONS FOR LINUX:
 22 | c_opts = $(optimize) -DLINUX $(INCLUDES)
 23 | 
 24 | cc = gcc $(c_opts)
 25 | c++ = g++ $(c_opts)
 26 | cclink = gcc
 27 | c++link = g++
 28 | syslibs = -lsndfile -lm
 29 | 
 30 | scorealign_o = comp_chroma.o gen_chroma.o scorealign.o \
 31 |                fft3/FFT3.o $(PORTSMF)/mfmidi.o $(PORTSMF)/allegro.o \
 32 |                $(PORTSMF)/allegrosmfrd.o $(PORTSMF)/allegrosmfwr.o \
 33 |                $(PORTSMF)/allegrord.o $(PORTSMF)/strparse.o trace.o \
 34 |                regression.o sautils.o curvefit.o hillclimb.o main.o \
 35 |                audioreader.o audiofilereader.o alignfiles.o
 36 | 
 37 | .SUFFIXES: .cpp .c .h .aur .o
 38 | 
 39 | .cpp.o:
 40 | 	$(c++) -c $< -o $*.o
 41 | 
 42 | .c.o:
 43 | 	$(cc) -c $< -o $*.o
 44 | 
 45 | all: scorealign
 46 | 
 47 | header:
 48 | 	echo "**********************************************************"
 49 | 	echo "**********************************************************"
 50 | 
 51 | clean: 
 52 | 	echo "removing objects"
 53 | 	find . -name "*~" | xargs rm -f
 54 | 	rm -f $(scorealign_o)
 55 | 	rm -f core*
 56 | 
 57 | cleaner: clean
 58 | 	rm -f TAGS
 59 | 
 60 | cleanest: cleaner
 61 | 	rm -f scorealign
 62 | 	rm -f scorealign.ncb
 63 | 	rm -f scorealign.opt
 64 | 	rm -f scorealign.plg
 65 | 
 66 | TAGS: 
 67 | 	find . \( -name "*.cpp" -o -name "*.h" \) -print | etags -
 68 | 
 69 | tags: TAGS
 70 | 
 71 | backup: cleanest
 72 | 	tar cvf ../scorealign.tar .
 73 | 	gzip ../scorealign.tar
 74 | 	echo "please rename ../scorealign.tar.gz"
 75 | 
 76 | 
 77 | scorealign: $(scorealign_o)
 78 | 	$(c++link) $(scorealign_o) -o scorealign $(syslibs)
 79 | 
 80 | test0:
 81 | 	./scorealign /home/rbd/wdh/music/nyquist-temp.wav /home/rbd/wdh/music/chrom_nyquist-temp.wav > output.txt
 82 | 	diff output.txt /home/rbd/wdh/music/output.txt | less
 83 | 
 84 | test:
 85 | 	./scorealign /home/rbd/scorealign/data/live-beeth-s5m1-25s-mono.wav  /home/rbd/scorealign/data/midi-beeth-s5m1-25s-mono.wav > output.txt
 86 | 	diff output.txt /home/rbd/wdh/music/output.txt | less
 87 | 
 88 | bigtest:
 89 | 	./scorealign data/live-Beethoven_Symphony_No5_Mvt1.wav data/midi-Beethoven_Symphony_No5_Mvt1.wav
 90 | 
 91 | bigtest2: 
 92 | 	./scorealign data/midi-Beethoven_Symphony_No5_Mvt1.wav data/live-Beethoven_Symphony_No5_Mvt1.wav
 93 | 
 94 | 
 95 | # DEPENDENCIES
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/alignfiles.cpp:
--------------------------------------------------------------------------------
 1 | /* alignfiles.cpp -- do alignment on files
 2 |  *
 3 |  * 14-Jul-08 RBD
 4 |  *
 5 |  * This is an intermediate layer between main.cpp, the client, and 
 6 |  * scorealign.cpp, where the real work is done. 
 7 |  */
 8 |  
 9 | #include "stdio.h"
10 | #include "string.h"
11 | #include "sndfile.h"
12 | #include <fstream>
13 | #include "allegro.h"
14 | #include "audioreader.h"
15 | #include "audiofilereader.h"
16 | #include "alignfiles.h"
17 | #include "scorealign.h"
18 | 
19 | using namespace std;
20 | 
21 | /* is_midi_file -- see if file name ends in .mid */
22 | /**/
23 | bool is_midi_file(char *filename)
24 | {
25 |     size_t len = strlen(filename);
26 |     return (len > 4 && strcmp(filename + len - 4, ".mid") == 0);
27 | }
28 | 
29 | 
30 | bool align_files(char *infilename1, char *infilename2, 
31 |                 Scorealign &sa, bool verbose)
32 | {
33 |     if (verbose) printf("opening %s\n", infilename1);
34 |     if (is_midi_file(infilename1)) {
35 |         // get sequence from infilename1
36 |         Alg_seq seq(infilename1, true);
37 |         if (seq.get_read_error()) { // error opening file
38 |             if (verbose)
39 |                 printf ("Error: Not able to open input file %s\n", 
40 |                         infilename1);
41 |             return false;
42 |         }
43 |         if (verbose) printf("opening %s\n", infilename2);
44 |         if (is_midi_file(infilename2)) {
45 |             // get sequence from infilename2
46 |             Alg_seq seq2(infilename2, true);
47 |             if (seq2.get_read_error()) { // error opening file
48 |                 if (verbose)
49 |                     printf ("Error: Not able to open input file %s\n", 
50 |                             infilename2);
51 |                 return false;
52 |             }
53 |             sa.align_midi_to_midi(seq, seq2, verbose);
54 |             return true;
55 |         } else {
56 |             // get audio from infilename2
57 |             Audio_file_reader reader;
58 |             if (!reader.open(infilename2, sa, verbose)) {
59 |                 if (verbose)
60 |                     printf ("Error: Not able to open input file %s\n", 
61 |                             infilename2);
62 |                 return false;
63 |             }
64 |             sa.align_midi_to_audio(seq, reader, verbose);
65 |             return true;
66 |         }
67 |     } else { // if first file is audio, so is second file
68 |         Audio_file_reader reader1;
69 |         if (!reader1.open(infilename1, sa, verbose)) {
70 |             if (verbose)
71 |                 printf ("Error: Not able to open input file %s\n", 
72 |                         infilename1);
73 |             return false;
74 |         }
75 |         if (verbose) printf("opening %s\n", infilename2);
76 |         Audio_file_reader reader2;
77 |         if (!reader2.open(infilename2, sa, verbose)) {
78 |             if (verbose)
79 |                 printf ("Error: Not able to open input file %s\n", 
80 |                         infilename2);
81 |             return false;
82 |         }
83 |         sa.align_audio_to_audio(reader1, reader2, verbose);
84 |         return true;
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/audioreader.cpp:
--------------------------------------------------------------------------------
 1 | /* audioreader.cpp -- reads sequence of overlapping windows
 2 |  *
 3 |  * 14-Jul-08  RBD
 4 |  */
 5 | 
 6 | #include "assert.h"
 7 | #include "stdio.h"
 8 | #include "math.h"
 9 | #include "sautils.h"
10 | #include "string.h"
11 | #include <fstream>
12 | #include "allegro.h"
13 | #include "audioreader.h"
14 | #include "scorealign.h"
15 | 
16 | using namespace std;
17 | 
18 | long Audio_reader::read_window(float *data)
19 | // reads the next window of samples
20 | //   the first time, fill half the window with zeros and the second half 
21 | // with data from the file
22 | // after that, shift the window by hop_size and fill the end of the window 
23 | //   with hop_size new samples
24 | // the window is actually constructed in temp_data, then copied to data. 
25 | // That way, the caller can apply a smoothing function to data and we'll
26 | //   still have a copy.
27 | // the function returns false on the next call when detecting that there
28 | //   is no more samples, 
29 | // data -- the window to be returned
30 | // temp_data -- since we destroy data by windowing, temp_data saves 
31 | //   overlapping samples so we don't have to read them again
32 | // samples_per_frame -- must be even, note that first window is padded
33 | //   half-full with zeros
34 | // hop_samples -- additional samples read each time after the first window
35 | {
36 |     int frames_read;    // how many frames did we read?
37 | 
38 |     int hop = hop_samples;
39 |     if (reading_first_window) {
40 |         hop = samples_per_frame / 2; // first time we read more data	
41 |         // zero end of temp_data, which will shift to beginning
42 |         memset(temp_data + hop, 0, 
43 |                sizeof(float) * (samples_per_frame - hop));
44 |         reading_first_window = false;
45 |     }
46 | 	
47 |     // before reading in new sounds, shift temp_data by hop_size
48 |     memmove(temp_data, temp_data + hop, 
49 |             (samples_per_frame - hop) * sizeof(float));
50 | 
51 |     frames_read = read(temp_data + samples_per_frame - hop, hop);
52 |     // zero any leftovers (happens at last frame):
53 |     //printf("check fr %i  hs %i ws %i ",frames_read,hop_size,window_size); 
54 |     memset(temp_data + samples_per_frame - hop + frames_read, 0, 
55 |            sizeof(float) * (hop - frames_read));
56 |     assert(samples_per_frame - frames_read >= 0);
57 | 
58 |     // now copy temp_data to data	
59 |     memcpy(data, temp_data, sizeof(float) * samples_per_frame);
60 |     
61 |     if (frames_read != hop && reading_last_window == false) {
62 |         reading_last_window = true;
63 |         return true; 
64 |     } else if (reading_last_window == true) {
65 |         return false; 
66 |     } else {
67 |         return true; 
68 |     }
69 | }
70 | 
71 | 
72 | void Audio_reader::calculate_parameters(Scorealign &sa, bool verbose)
73 | {
74 |     double sample_rate = get_sample_rate();
75 |     long pcm_frames = get_frames();
76 |     // we want to make sure samples_per_frame is even, to keep things 
77 |     // consistent we'll change hopsize_samples the same way
78 |     samples_per_frame = (int) (sa.window_size * sample_rate + 0.5);
79 |     if (samples_per_frame % 2 == 1) 
80 |         samples_per_frame = samples_per_frame + 1;
81 |    
82 |    /*=============================================================*/
83 | 	
84 |     hop_samples = (int)(sa.frame_period * sample_rate + 0.5);
85 |     if (hop_samples % 2 == 1) 
86 |         hop_samples = hop_samples + 1;
87 |     actual_frame_period = (hop_samples / sample_rate);
88 | 
89 |     // this is stored back in a field in sa as well as here in the reader
90 |     frame_count= (int) ceil(((float) pcm_frames / hop_samples + 1)); 	
91 |     this->frame_count = frame_count;
92 |     temp_data = ALLOC(float, samples_per_frame);
93 |     memset(temp_data, 0, samples_per_frame * sizeof(temp_data[0]));
94 |     assert(temp_data);
95 | }
96 | 
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/fft3/FFT3.h:
--------------------------------------------------------------------------------
  1 | /**********************************************************************
  2 | 
  3 |   FFT3.h -- FFT routines, based on the following but renamed with "3"
  4 |   to avoid naming problems. This early implementation from Audacity has
  5 |   been upated and the current Audacity version imports wxChar, but if I
  6 |   adapt the latest code into the scorealign library, then scorealign
  7 |   will depend upon wxWindows. On the other hand, if I don't update to
  8 |   the latest, then there are name conflicts when scorealign is used
  9 |   within Audacity. For now, at least, I'm just renaming functions, 
 10 |   which has no real impact on scorealign (other than function names
 11 |   with the mystery "3" appended), but will result in duplicated code
 12 |   in Audacity. -RBD
 13 | 
 14 |   FFT.h
 15 | 
 16 |   Dominic Mazzoni
 17 | 
 18 |   September 2000
 19 | 
 20 |   This file contains a few FFT routines, including a real-FFT
 21 |   routine that is almost twice as fast as a normal complex FFT,
 22 |   and a power spectrum routine which is more convenient when
 23 |   you know you don't care about phase information.  It now also
 24 |   contains a few basic windowing functions.
 25 | 
 26 |   Some of this code was based on a free implementation of an FFT
 27 |   by Don Cross, available on the web at:
 28 | 
 29 |     http://www.intersrv.com/~dcross/fft.html
 30 | 
 31 |   The basic algorithm for his code was based on Numerical Recipes
 32 |   in Fortran.  I optimized his code further by reducing array
 33 |   accesses, caching the bit reversal table, and eliminating
 34 |   float-to-float conversions, and I added the routines to
 35 |   calculate a real FFT and a real power spectrum.
 36 | 
 37 |   Note: all of these routines use single-precision floats.
 38 |   I have found that in practice, floats work well until you
 39 |   get above 8192 samples.  If you need to do a larger FFT,
 40 |   you need to use floats.
 41 | 
 42 | **********************************************************************/
 43 | 
 44 | #ifndef M_PI
 45 | #define	M_PI		3.14159265358979323846  /* pi */
 46 | #endif
 47 | 
 48 | #define false 0
 49 | #define true 1
 50 | 
 51 | /*
 52 |  * This is the function you will use the most often.
 53 |  * Given an array of floats, this will compute the power
 54 |  * spectrum by doing a Real FFT and then computing the
 55 |  * sum of the squares of the real and imaginary parts.
 56 |  * Note that the output array is half the length of the
 57 |  * input array, and that NumSamples must be a power of two.
 58 |  */
 59 | 
 60 | void PowerSpectrum3(int NumSamples, float *In, float *Out);
 61 | 
 62 | /*
 63 |  * Computes an FFT when the input data is real but you still
 64 |  * want complex data as output.  The output arrays are half
 65 |  * the length of the input, and NumSamples must be a power of
 66 |  * two.
 67 |  */
 68 | 
 69 | void RealFFT3(int NumSamples,
 70 |              float *RealIn, float *RealOut, float *ImagOut);
 71 | 
 72 | /*
 73 |  * Computes a FFT of complex input and returns complex output.
 74 |  * Currently this is the only function here that supports the
 75 |  * inverse transform as well.
 76 |  */
 77 | 
 78 | void FFT3(int NumSamples,
 79 |          int InverseTransform,
 80 |          float *RealIn, float *ImagIn, float *RealOut, float *ImagOut);
 81 | 
 82 | /*
 83 |  * Applies a windowing function to the data in place
 84 |  *
 85 |  * 0: Rectangular (no window)
 86 |  * 1: Bartlett    (triangular)
 87 |  * 2: Hamming
 88 |  * 3: Hanning
 89 |  */
 90 | 
 91 | void WindowFunc3(int whichFunction, int NumSamples, float *data);
 92 | 
 93 | /*
 94 |  * Returns the name of the windowing function (for UI display)
 95 |  */
 96 | 
 97 | char *WindowFuncName3(int whichFunction);
 98 | 
 99 | /*
100 |  * Returns the number of windowing functions supported
101 |  */
102 | 
103 | int NumWindowFuncs3();   
104 | 


--------------------------------------------------------------------------------
/scorealign.h:
--------------------------------------------------------------------------------
  1 | /* scorealign.h 
  2 |  *
  3 |  * RBD
  4 |  */
  5 | 
  6 | // turn on lots of debugging, comment this line out to disable
  7 | // #define SA_VERBOSE 1
  8 | 
  9 | #ifdef SA_VERBOSE
 10 | #define SA_V(stmt) stmt
 11 | #else
 12 | #define SA_V(stmt) 
 13 | #endif
 14 | 
 15 | 
 16 | class Scorealign {
 17 |  public:
 18 |     float frame_period; // time in seconds
 19 |     float window_size;
 20 |     float presmooth_time;
 21 |     float line_time;
 22 |     float smooth_time; // duration of smoothing window
 23 |     int smooth; // number of points used to compute the smooth time map
 24 | 
 25 |     Scorealign() {
 26 |         frame_period = 0.25;
 27 |         window_size = 0.25;
 28 |         presmooth_time = 0.0;
 29 |         line_time = 0.0;
 30 |         smooth_time = 1.75;
 31 |         pathlen = 0;
 32 |         path_count = 0;
 33 |         pathx = NULL;
 34 |         pathy = NULL;
 35 |     }
 36 | 
 37 |     ~Scorealign() {
 38 |         if (pathx) free(pathx);
 39 |         if (pathy) free(pathy);
 40 |     }
 41 | 
 42 |     // chromagrams and lengths, path data
 43 |     float *chrom_energy1;
 44 |     int file1_frames; // number of frames in file1
 45 |     float *chrom_energy2;
 46 |     int file2_frames; //number of frames in file2
 47 |     short *pathx;  //for midi (when aligning midi and audio)
 48 |     short *pathy; //for audio (when aligning midi and audio)
 49 |     int pathlen;
 50 |     void set_pathlen(int p) { pathlen = p; }
 51 |     float *time_map;
 52 |     float *smooth_time_map;
 53 | 
 54 |     // chroma vectors are calculated from an integer number of samples
 55 |     // that approximates the nominal frame_period. Actual frame period
 56 |     // is calculated and stored here:
 57 |     // time in seconds for midi (when aligning midi and audio)
 58 |     float actual_frame_period_1; 
 59 |     // time in seconds for audio (when aligning midi and audio)
 60 |     float actual_frame_period_2; 
 61 | 
 62 |     /* gen_chroma.cpp stuff:
 63 |        generates the chroma energy for a given file
 64 |        with a low cutoff and high cutoff.  
 65 |        The chroma energy is placed in the float** chrom_energy.
 66 |        this 2D is an array of pointers.  the pointers point to an array 
 67 |        of length 12, representing the 12 chroma bins
 68 |        The function returns the number of frames 
 69 |        (i.e. the length of the 1st dimention of chrom_energy
 70 |     */
 71 |     int gen_chroma_audio(Audio_reader &reader, int hcutoff, int lcutoff, 
 72 |                          float **chrom_energy, float *actual_frame_period,
 73 |                          int id, bool verbose);
 74 | 
 75 |     int gen_chroma_midi(Alg_seq &seq,  int hcutoff, int lcutoff, 
 76 |                         float **chrom_energy, float *actual_frame_period,
 77 |                         int id, bool verbose);
 78 | 
 79 |     /* scorealign.cpp stuff: */
 80 |     float map_time(float t1);
 81 |     void midi_tempo_align(Alg_seq &seq , char *midiname, char *beatname);
 82 |     void align_midi_to_audio(Alg_seq &seq, Audio_reader &reader, 
 83 |                             bool verbose);
 84 |     void align_midi_to_midi(Alg_seq &seq1, Alg_seq &seq2, bool verbose);
 85 |     void align_audio_to_audio(Audio_reader &reader1, 
 86 |                              Audio_reader &reader2, bool verbose);
 87 |     void align_chromagrams(bool verbose);
 88 | 
 89 |     int path_count; // for debug log formatting
 90 |     void path_step(int i, int j);
 91 |     void path_reverse();
 92 |     int sec_to_pathy_index(float sec);
 93 |     void compare_chroma(bool verbose);
 94 |     void linear_regression(int n, int width, float &a, float &b);
 95 |     void compute_smooth_time_map();
 96 |     void presmooth();
 97 |     void compute_regression_lines();
 98 |     void midi_tempo_align(Alg_seq &seq, bool verbose);
 99 | };
100 | 
101 | #define DEBUG_LOG 1
102 | #if DEBUG_LOG
103 | extern FILE *dbf;
104 | #endif
105 | 
106 | int find_midi_duration(Alg_seq &seq, float *dur);
107 | 


--------------------------------------------------------------------------------
/compare_transcripts.vcproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="Windows-1252"?>
  2 | <VisualStudioProject
  3 | 	ProjectType="Visual C++"
  4 | 	Version="8.00"
  5 | 	Name="compare_transcripts"
  6 | 	ProjectGUID="{C7876B12-4C66-4C26-9318-3363CD9A3FE9}"
  7 | 	RootNamespace="compare_transcripts"
  8 | 	Keyword="Win32Proj"
  9 | 	>
 10 | 	<Platforms>
 11 | 		<Platform
 12 | 			Name="Win32"
 13 | 		/>
 14 | 	</Platforms>
 15 | 	<ToolFiles>
 16 | 	</ToolFiles>
 17 | 	<Configurations>
 18 | 		<Configuration
 19 | 			Name="Debug|Win32"
 20 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 21 | 			IntermediateDirectory="$(ConfigurationName)"
 22 | 			ConfigurationType="1"
 23 | 			CharacterSet="1"
 24 | 			>
 25 | 			<Tool
 26 | 				Name="VCPreBuildEventTool"
 27 | 			/>
 28 | 			<Tool
 29 | 				Name="VCCustomBuildTool"
 30 | 			/>
 31 | 			<Tool
 32 | 				Name="VCXMLDataGeneratorTool"
 33 | 			/>
 34 | 			<Tool
 35 | 				Name="VCWebServiceProxyGeneratorTool"
 36 | 			/>
 37 | 			<Tool
 38 | 				Name="VCMIDLTool"
 39 | 			/>
 40 | 			<Tool
 41 | 				Name="VCCLCompilerTool"
 42 | 				Optimization="0"
 43 | 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 44 | 				MinimalRebuild="true"
 45 | 				BasicRuntimeChecks="3"
 46 | 				RuntimeLibrary="3"
 47 | 				UsePrecompiledHeader="0"
 48 | 				WarningLevel="3"
 49 | 				Detect64BitPortabilityProblems="true"
 50 | 				DebugInformationFormat="4"
 51 | 			/>
 52 | 			<Tool
 53 | 				Name="VCManagedResourceCompilerTool"
 54 | 			/>
 55 | 			<Tool
 56 | 				Name="VCResourceCompilerTool"
 57 | 			/>
 58 | 			<Tool
 59 | 				Name="VCPreLinkEventTool"
 60 | 			/>
 61 | 			<Tool
 62 | 				Name="VCLinkerTool"
 63 | 				LinkIncremental="2"
 64 | 				GenerateDebugInformation="true"
 65 | 				SubSystem="1"
 66 | 				TargetMachine="1"
 67 | 			/>
 68 | 			<Tool
 69 | 				Name="VCALinkTool"
 70 | 			/>
 71 | 			<Tool
 72 | 				Name="VCManifestTool"
 73 | 			/>
 74 | 			<Tool
 75 | 				Name="VCXDCMakeTool"
 76 | 			/>
 77 | 			<Tool
 78 | 				Name="VCBscMakeTool"
 79 | 			/>
 80 | 			<Tool
 81 | 				Name="VCFxCopTool"
 82 | 			/>
 83 | 			<Tool
 84 | 				Name="VCAppVerifierTool"
 85 | 			/>
 86 | 			<Tool
 87 | 				Name="VCWebDeploymentTool"
 88 | 			/>
 89 | 			<Tool
 90 | 				Name="VCPostBuildEventTool"
 91 | 			/>
 92 | 		</Configuration>
 93 | 		<Configuration
 94 | 			Name="Release|Win32"
 95 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 96 | 			IntermediateDirectory="$(ConfigurationName)"
 97 | 			ConfigurationType="1"
 98 | 			CharacterSet="1"
 99 | 			WholeProgramOptimization="1"
100 | 			>
101 | 			<Tool
102 | 				Name="VCPreBuildEventTool"
103 | 			/>
104 | 			<Tool
105 | 				Name="VCCustomBuildTool"
106 | 			/>
107 | 			<Tool
108 | 				Name="VCXMLDataGeneratorTool"
109 | 			/>
110 | 			<Tool
111 | 				Name="VCWebServiceProxyGeneratorTool"
112 | 			/>
113 | 			<Tool
114 | 				Name="VCMIDLTool"
115 | 			/>
116 | 			<Tool
117 | 				Name="VCCLCompilerTool"
118 | 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
119 | 				RuntimeLibrary="2"
120 | 				UsePrecompiledHeader="0"
121 | 				WarningLevel="3"
122 | 				Detect64BitPortabilityProblems="true"
123 | 				DebugInformationFormat="3"
124 | 			/>
125 | 			<Tool
126 | 				Name="VCManagedResourceCompilerTool"
127 | 			/>
128 | 			<Tool
129 | 				Name="VCResourceCompilerTool"
130 | 			/>
131 | 			<Tool
132 | 				Name="VCPreLinkEventTool"
133 | 			/>
134 | 			<Tool
135 | 				Name="VCLinkerTool"
136 | 				LinkIncremental="1"
137 | 				GenerateDebugInformation="true"
138 | 				SubSystem="1"
139 | 				OptimizeReferences="2"
140 | 				EnableCOMDATFolding="2"
141 | 				TargetMachine="1"
142 | 			/>
143 | 			<Tool
144 | 				Name="VCALinkTool"
145 | 			/>
146 | 			<Tool
147 | 				Name="VCManifestTool"
148 | 			/>
149 | 			<Tool
150 | 				Name="VCXDCMakeTool"
151 | 			/>
152 | 			<Tool
153 | 				Name="VCBscMakeTool"
154 | 			/>
155 | 			<Tool
156 | 				Name="VCFxCopTool"
157 | 			/>
158 | 			<Tool
159 | 				Name="VCAppVerifierTool"
160 | 			/>
161 | 			<Tool
162 | 				Name="VCWebDeploymentTool"
163 | 			/>
164 | 			<Tool
165 | 				Name="VCPostBuildEventTool"
166 | 			/>
167 | 		</Configuration>
168 | 	</Configurations>
169 | 	<References>
170 | 	</References>
171 | 	<Files>
172 | 		<Filter
173 | 			Name="Source Files"
174 | 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
175 | 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
176 | 			>
177 | 			<File
178 | 				RelativePath=".\compare_transcripts\compare.cpp"
179 | 				>
180 | 			</File>
181 | 		</Filter>
182 | 		<Filter
183 | 			Name="Header Files"
184 | 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
185 | 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
186 | 			>
187 | 		</Filter>
188 | 		<Filter
189 | 			Name="Resource Files"
190 | 			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
191 | 			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
192 | 			>
193 | 		</Filter>
194 | 	</Files>
195 | 	<Globals>
196 | 	</Globals>
197 | </VisualStudioProject>
198 | 


--------------------------------------------------------------------------------
/hillclimb.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  hillclimb.cpp
  3 |  *  scorealign
  4 |  *
  5 |  *  Created by Roger Dannenberg on 10/20/07.
  6 |  *  Copyright 2007 __MyCompanyName__. All rights reserved.
  7 |  *
  8 |  * Hillclimb is an abstract class for optimization. It models problems where
  9 |  * you have a vector of parameters (stored as an array), a corresponding set
 10 |  * of step sizes, and a non-linear function. The function is a virtual
 11 |  * member function that subclasses must implement.
 12 |  *
 13 |  * The optimization algorithm is as follows:
 14 |  * An initial set of parameters and step sizes is given.
 15 |  *
 16 |  * Estimate the partial derivatives with respect to each parameter value
 17 |  * by taking a step along that dimension (use step sizes to determine
 18 |  * how far to go) and calling the evaluate virtual function.
 19 |  * Find the parameter that causes the maximum absolute change. If the
 20 |  * change is positive for that parameter, take the step along that
 21 |  * dimension. If the change is negative, take a negative step along that
 22 |  * dimension. 
 23 |  *
 24 |  * Repeat the previous paragraph as long as the result of evaluate is
 25 |  * increasing. When it stops, you are at the top of a hill, a local
 26 |  * maximum.
 27 |  */
 28 | 
 29 | #include "hillclimb.h"
 30 | #include "stdio.h"
 31 | 
 32 | #define HC_VERBOSE 0
 33 | #define V if (HC_VERBOSE)
 34 | 
 35 | void Hillclimb::set_parameters(double *p, double *ss, 
 36 |                                double *min_, double *max_, int plen)
 37 | {
 38 |     parameters = p;
 39 |     step_size = ss;
 40 |     min_param = min_;
 41 |     max_param = max_;
 42 |     n = plen;
 43 | }
 44 | 
 45 | /* this optimize assumes that the surface is smooth enought that if the
 46 |  * function decreases when parameter[i] increases, then the function will
 47 |  * increase when parameter[i] decreases. The alternative version does more
 48 |  * evaluation, but checks in both directions to find the best overall move.
 49 | 
 50 | double Hillclimb::optimize()
 51 | {
 52 |     double best = evaluate();
 53 |     while (true) {
 54 |         printf("best %g ", best);
 55 |         // eval partial derivatives
 56 |         int i;
 57 |         // variables to search for max partial derivative
 58 |         double max = 0; // max of |dy| so far
 59 |         int max_i; // index where max was found
 60 |         int max_sign = 1; // sign of dy
 61 |         double max_y; // value of evaluate() at max_i
 62 |         // now search over all parameters for max change
 63 |         for (i = 0; i < n; i++) {
 64 |             int sign = 1; // sign of derivative in the +step direction
 65 |             int step_direction = 1; // how to undo parameter variation
 66 |             parameters[i] += step_size[i];
 67 |             if (parameters[i] > max_param[i]) {
 68 |                 // try stepping in the other direction
 69 |                 parameters[i] -= step_size[i] * 2;
 70 |                 sign = -1;
 71 |                 step_direction = -1;
 72 |             }
 73 |             
 74 |             double y = evaluate();
 75 |             // restore parameter i
 76 |             parameters[i] -= step_size[i] * step_direction;
 77 |             
 78 |             double dy = y - best;
 79 |             if (dy < 0) {
 80 |                 dy = -dy;
 81 |                 sign = -sign;
 82 |             }
 83 |             // is this the best yet and legal move?
 84 |             double proposal = parameters[i] + step_size[i] * sign;
 85 |             if (dy > max && proposal <= max_param[i] && 
 86 |                 proposal >= min_param[i]) {
 87 |                 max = dy;
 88 |                 max_i = i;
 89 |                 max_y = y;
 90 |                 max_sign = sign;
 91 |             }
 92 |         }
 93 |         // best move is parameter max_i in max_sign direction
 94 |         parameters[max_i] += step_size[max_i] * max_sign;
 95 |         printf("moved %d to %g", max_i, parameters[max_i]);
 96 |         // what's the value now? put it in max_y
 97 |         if (max_sign == -1) max_y = evaluate();
 98 |         printf(" to get %g (vs. best %g)\n", max_y, best);
 99 |         // otherwise, max_y already has the new value
100 |         if (max_y <= best) { // no improvement, we're done
101 |             parameters[max_i] -= step_size[max_i] * max_sign;
102 |             printf("\nCompleted hillclimbing, best %g\n", best);
103 |             return best;
104 |         }
105 |         // improvement because max_y higher than best:
106 |         best = max_y;
107 |     }
108 | }
109 | */
110 | 
111 | double Hillclimb::optimize()
112 | {
113 |     double best = evaluate();
114 |     while (true) {
115 |         V printf("best %g ", best);
116 |         // eval partial derivatives
117 |         int i;
118 |         // variables to search for max partial derivative
119 |         double max_y = best; // max of evaluate() so far
120 |         int max_i; // index where best max was found
121 |         double max_parameter; // the good parameter value for max_i
122 |         // now search over all parameters for best improvement
123 |         for (i = 0; i < n; i++) {
124 |             V printf("optimize at %d param %g ", i, parameters[i]);
125 |             double save_param = parameters[i];
126 |             parameters[i] = save_param + step_size[i];
127 |             if (parameters[i] <= max_param[i]) {
128 |                 double y = evaluate();
129 |                 V printf("up->%g ", y);
130 |                 if (y > max_y) {
131 |                     V printf("NEW MAX! ");
132 |                     max_y = y;
133 |                     max_i = i;
134 |                     max_parameter = parameters[i];
135 |                 }
136 |             }
137 |             parameters[i] = save_param - step_size[i];
138 |             if (parameters[i] >= min_param[i]) {
139 |                 double y = evaluate();
140 |                 V printf("dn->%g ", y);
141 |                 if (y > max_y) {
142 |                     V printf("NEW MAX! ");
143 |                     max_y = y;
144 |                     max_i = i;
145 |                     max_parameter = parameters[i];
146 |                 }
147 |             }
148 |             parameters[i] = save_param;
149 |             V printf("\n");
150 |         }
151 |         if (max_y <= best) { // no improvement, we're done
152 |             V printf("\nCompleted hillclimbing, best %g\n", best);
153 |             return best;
154 |         }
155 |         // improvement because max_y higher than best:
156 |         parameters[max_i] = max_parameter;
157 |         best = max_y;
158 |     }
159 | }
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
  1 | scorealign -- a program for audio-to-audio and audio-to-midi alignment
  2 | 
  3 | Last updated July 17, 2008 by RBD
  4 | 
  5 | Contributors include: 
  6 |              Ning Hu
  7 |              Roger B. Dannenberg
  8 |              Joshua Hailpern
  9 |              Umpei Kurokawa
 10 |              Greg Wakefield
 11 |              Mark Bartsch
 12 |  
 13 | scorealign works by computing chromagrams of the two sources. Midi chromagrams
 14 | are estimated directly from pitch data without synthesis. A similarity matrix
 15 | is constructed and dynamic programming finds the lowest-cost path through the
 16 | matrix.
 17 | 
 18 | (some more details should be added here about handling boundaries)
 19 | 
 20 | Output includes a map from one version to the other. If one file is MIDI, 
 21 | output also includes (1) an estimated transcript in ASCII format with time, 
 22 | pitch, MIDI channel, and duration of each notes in the audio file, (2) a
 23 | time-aligned midi file, and (3) a text file with beat times.
 24 | 
 25 | scorealign uses libsndfile (http://www.mega-nerd.com/libsndfile/). You must
 26 | install libsndfile to build scorealign.
 27 | 
 28 | For Macintosh OS X, use Xcode to open scorealign.xcodeproj
 29 | For Linux, use "make -f Makefile.linux"
 30 | For Windows, open score-align.vcproj (probably out of date now -- please
 31 |     update the project following Makefile.linux, or contact rbd at cs.cmu.edu)
 32 | 
 33 | Command line parameters:
 34 | 
 35 | scorealign [-<flags> [<period><windowsize><path> <smooth><trans> <midi>]] 
 36 |                  <file1> [<file2>]
 37 |    specifying only <file1> simply transcribes MIDI in <file1> to  
 38 |    transcription.txt. Otherwise, align <file1> and <file2>.
 39 |    -h 0.25 indicates a frame period of 0.25 seconds
 40 |    -w 0.25 indicates a window size of 0.25 seconds. 
 41 |    -r indicates filename to write raw alignment path to (default path.data)
 42 |    -s is filename to write smoothed alignment path(default is smooth.data)
 43 |    -t is filename to write the time aligned transcription 
 44 |       (default is transcription.txt)
 45 |    -m is filename to write the time aligned midi file (default is midi.mid)
 46 |    -b is filename to write the time aligned beat times (default is beatmap.txt)
 47 |    -o 2.0 indicates a smoothing window of 2.0s
 48 |    -p 3.0 means pre-smooth with a 3s window
 49 |    -x 6.0 indicates 6s line segment approximation
 50 |    
 51 | A bit more detail:
 52 | 
 53 | The -o flag (smoothing) controls a post-process on the path. Since the
 54 | path is discrete, it will have small jumps ahead or pauses whenever it
 55 | differs from the diagonal. A linear regression is performed at each frame
 56 | using a set of points whose size is determined by the -o parameter, and the
 57 | discrete time indicated by the path is replaced by a continuous time estimated
 58 | from neighboring points. This smooths out local irregularities in the time
 59 | map.
 60 | 
 61 | The -p flag (presmoothing) operates on the discrete path. It tries to fit a 
 62 | straight line segment (length is set by -p) to the path. If the path fits
 63 | well to the first half of the path and the second half of the path, the 
 64 | entire path is replaced with a straight line approximation. To "fit well",
 65 | half of the path points must fall very close to the straight line (currently,
 66 | within 1.5 frames). For example, if the line segment spans 40 frames, then 10
 67 | path points must be close to the first 20 frames and 10 path points must be 
 68 | close to the last 20 frames. The step is repeated on overlapping windows
 69 | through the whole piece. This presmoothing step is designed to detect
 70 | places where dynamic programming "wanders off" from the true path and then
 71 | realigns to the true path. The off-track points are replaced, so they do not
 72 | adversely affect the smoothing step. This approach does not seem to be 
 73 | robust, but sometimes works well.
 74 | 
 75 | The -x flag is another approach to deal with dynamic programming errors. It
 76 | divides the entire piece into segments whose lengths are about equal and about
 77 | the length specified by the -x parameter. The line segments are fit to the
 78 | path by linear regression, and their endpoints are joined by averaging their
 79 | linear regression values. Next, a hill-climbing search is performed to 
 80 | minimize the total distance along the path. This is like dynamic programming
 81 | except that each line spans many frames, so the resulting path is forced to 
 82 | be fairly straight. Linear interpolation is used to estimate chroma distance
 83 | since the lines do always pass through integer frame locations. This approach
 84 | is probably good when the audio is known to have a steady tempo or be 
 85 | performed with tempo changes that match those in the midi file.
 86 | 
 87 | Some notes on the software architecture of scorealign:
 88 | 
 89 | scorealign was originally implemented as a fairly monolithic program
 90 | in MatLab. It was ported to C++. To incorporate this code into Audacity,
 91 | the code was restructured so that audio input is obtained from
 92 | Audio_reader, an abstract class that calls on a subclass to implement
 93 | read(). The subclass just copies floats into the provided buffer. It is
 94 | responsible for sample format conversion, stereo-to-mono conversion, etc.
 95 | The Audio_reader returns possibly overlapping buffers of floats. The
 96 | Audio_file_reader subclass uses libsndfile to read in samples and convert
 97 | them to float. It does its own conversion to mono.
 98 | 
 99 | When scorealign is used in Audacity, a different subclass of Audio_reader
100 | will call into Audacity using a Mixer object to retrieve samples from
101 | selected tracks.
102 | 
103 | For use from the command line, scorealign has a module main.cpp that 
104 | parses command line arguments. A lot of parameters and options that 
105 | were formerly globals are now stored in a Scorealign object that is
106 | passed around to many routines and methods. main.cpp creates a (global)
107 | Scorealign object and uses code in the module alignfiles.cpp to do the
108 | work. The purpose of alignfiles is to provide an API that does not 
109 | depend upon a command line interface, but which assumes you are aligning
110 | files. Finally, alignfiles.cpp uses an Audio_file_reader to offer
111 | samples to the main score alignment algorithm.
112 | 
113 | To summarize:
114 |    scorealign.cpp and gen_chroma.cpp do most of the pure alignment work
115 |    audioreader.cpp abstracts the source of audio, whether it comes from
116 |       a file or some other source
117 |    alignfiles.cpp opens files and invokes the modules above
118 |    main.cpp parses the command line and invokes alignfiles.
119 | 
120 | 


--------------------------------------------------------------------------------
/curvefit.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  curvefit.cpp
  3 |  *  scorealign
  4 |  *
  5 |  *  Created by Roger Dannenberg on 10/20/07.
  6 |  *  Copyright 2007 __MyCompanyName__. All rights reserved.
  7 |  *
  8 |  */
  9 | 
 10 | #include "assert.h"
 11 | #include "comp_chroma.h"
 12 | #include "sautils.h"
 13 | // the following are needed to get Scorealign
 14 | #include <fstream>
 15 | #include "allegro.h"
 16 | #include "audioreader.h"
 17 | #include "scorealign.h"
 18 | #include "hillclimb.h"
 19 | #include "curvefit.h"
 20 | 
 21 | void save_path(char *filename);
 22 | 
 23 | /* Curvefit class: do hill-climbing to fit lines to data
 24 |  *
 25 |  * This class implements the algorithm described above.
 26 |  * The problem is partitioned into the general search algorithm
 27 |  * (implemented in Hillclimb::optimize) and the evaluation function
 28 |  * (implemented in Curvefit::evaluate). A brute-force evaluation
 29 |  * would simply recompute the cost of the entire path every time,
 30 |  * but note that the search algorithm works by adjusting one parameter
 31 |  * at a time. This affects at most two line segments, so the rest
 32 |  * contribute a cost that does not need to be recomputed. Thus the
 33 |  * total cost can be computed incrementally. It is hard to see how
 34 |  * to use this optimization within the general Hillclimb:optimize
 35 |  * method, so to avoid making that algorithm very specific and ugly,
 36 |  * I decided to hide the incremental nature of evaluate inside
 37 |  * the evaluate function itself. The way this works is that evaluate
 38 |  * keeps a cache of the coordinates of each line segment and the
 39 |  * resulting cost of the segment. Before recomputing any segment,
 40 |  * the cache is consulted. If the end points have not moved, the
 41 |  * cached value is retrieved. Ideally, there should be a 3-element
 42 |  * cache because endpoints are moved and then restored. (The three
 43 |  * elements would hold the results of the original, changed left,
 44 |  * and changed right endpoints.) The bigger cache would eliminate
 45 |  * 1/3 of the computation, but the simple cache already eliminates
 46 |  * about (n-2)/n of the work, so that should help a lot.
 47 |  */
 48 | 
 49 | class Curvefit : public Hillclimb {
 50 | public:
 51 |     Curvefit(Scorealign *sa_, bool verbose_) { sa = sa_; verbose = verbose_; }
 52 |     virtual double evaluate();
 53 |     void setup(int n);
 54 |     double *get_x() { return x; }
 55 | private:
 56 |     Scorealign *sa;
 57 |     bool verbose;
 58 |     double line_dist(int i); // get cost of line segment i
 59 |     double compute_dist(int i); // compute cost of line segment i
 60 |     double distance_rc(int row, int col);
 61 |     double distance_xy(double x, double y);
 62 | 
 63 |     double *p1_cache; // left endpoint y values
 64 |     double *p2_cache; // right endpoint y values
 65 |     double *d_cache; // cached cost of line segment
 66 |     double *x;       // the x values of line segment endpoints
 67 |         // (the y values are in parameters[])
 68 | };
 69 | 
 70 | 
 71 | double Curvefit::evaluate()
 72 | {
 73 |     double sum = 0;
 74 |     // why does this loop go to n-2? Because i represents the left endpoint
 75 |     // of the line segment. There are n parameters, but only n-1 segments.
 76 |     for (int i = 0; i < n-1; i++) {
 77 |         sum += line_dist(i); // look up in cache or recompute each segment
 78 |     }
 79 |     return -sum; // return negative of distance so that bigger will be better
 80 | }
 81 | 
 82 | 
 83 | double Curvefit::line_dist(int i)
 84 | {
 85 |     if (p1_cache[i] == parameters[i] &&
 86 |         p2_cache[i] == parameters[i+1]) {
 87 |         // endpoints have not changed:
 88 |         return d_cache[i];
 89 |     }
 90 |     // otherwise, we need to recompute and save dist in cache
 91 |     double d = compute_dist(i);
 92 |     p1_cache[i] = parameters[i];
 93 |     p2_cache[i] = parameters[i+1];
 94 |     d_cache[i] = d;
 95 |     return d;
 96 | }
 97 | 
 98 | 
 99 | void Curvefit::setup(int segments)
100 | {
101 |     // number of parameters is greater than segments because the left
102 |     // col of segment i is parameter i, so the right col of 
103 |     // the last segment == parameter[segments].
104 |     n = segments + 1;
105 |     parameters = ALLOC(double, n);
106 |     p1_cache = ALLOC(double, n);
107 |     p2_cache = ALLOC(double, n);
108 |     d_cache = ALLOC(double, n);
109 |     x = ALLOC(double, n);
110 |     step_size = ALLOC(double, n);
111 |     min_param = ALLOC(double, n);
112 |     max_param = ALLOC(double, n);
113 |     int i;
114 |     // ideal frames per segment
115 |     float seg_length = ((float) (sa->file1_frames - 1)) / segments;
116 |     for (i = 0; i < n; i++) { // initialize cache keys to garbage
117 |         p1_cache[i] = p2_cache[i] = -999999.99;
118 |         // initialize x values
119 |         x[i] = ROUND(i * seg_length);
120 |         // now initialize parameters based on pathx/pathy/time_map
121 |         // time_map has y values for each x
122 |         parameters[i] = sa->time_map[(int) x[i]];
123 |         if (verbose)
124 |             printf("initial x[%d] = %g, parameters[%d] = %g\n", 
125 |                    i, x[i], i, parameters[i]);
126 |         step_size[i] = 0.5;
127 |         min_param[i] = 0;
128 |         max_param[i] = sa->file2_frames - 1;
129 |     }
130 | }
131 | 
132 | 
133 | // distance_rc -- look up or compute distance between chroma vectors
134 | //     at row, col in similarity matrix
135 | //
136 | // Note: in current implementation, there is no stored representation
137 | // of the matrix, so we have to recompute every time. It would be
138 | // possible to store the whole matrix, but it's large and it would
139 | // double the memory requirements (we already allocate the large
140 | // PATH array in compare_chroma to compute the optimal path.
141 | // 
142 | // Since distance can be computed relatively quickly, a better plan
143 | // would be to cache values along the path. Here's a brief design
144 | // (for the future, assuming this routine is actually a hot spot):
145 | // Allocate a matrix that is, say, 20 x file1_frames to contain distances
146 | // that are +/- 10 frames from the path. Initialize cells to -1.
147 | // Allocate an array of integer offsets of size file1_frames.
148 | // Fill in the integer offsets with the column number (pathy) value of
149 | // the path. 
150 | // Now, to get distance_rc(row, col):
151 | //    offset = offsets[row]
152 | //    i = 10 + col - offset;
153 | //    if (i < 0 || i > 20) /* not in cache */ return compute_distance(...);
154 | //    dist = distances[20 * row + i];
155 | //    if (dist == -1) { return distances[20 * row + i] = compute_distance...}
156 | //    return dist;
157 | //
158 | double Curvefit::distance_rc(int row, int col)
159 | {
160 |     return gen_dist(row, col, sa->chrom_energy1, sa->chrom_energy2);
161 | }
162 | 
163 | 
164 | // compute distance from distance matrix using interpolation. A least
165 | // one of x, y should be an integer value so interpolation is only 2-way
166 | double Curvefit::distance_xy(double x, double y)
167 | {
168 |     int xi = (int) x;
169 |     int yi = (int) y;
170 |     if (xi == x) { // x is integer, interpolate along y axis
171 |         double d1 = distance_rc(xi, yi);
172 |         double d2 = distance_rc(xi, yi + 1);
173 |         return interpolate(yi, d1, yi + 1, d2, y);
174 |     } else if (yi == y) { // y is integer, interpolate along x axis
175 |         double d1 = distance_rc(xi, yi);
176 |         double d2 = distance_rc(xi + 1, yi);
177 |         return interpolate(xi, d1, xi + 1, d2, x);
178 |     } else {
179 |         printf("FATAL INTERNAL ERROR IN distance_xy: neither x nor y is "
180 |                "an integer\n");
181 |         assert(false);
182 |     }
183 | }
184 | 
185 | 
186 | double Curvefit::compute_dist(int i)
187 | {
188 |     double x1 = x[i], x2 = x[i+1];
189 |     double y1 = parameters[i], y2 = parameters[i+1];
190 |     double dx = x2 - x1, dy = y2 - y1;
191 |     double sum = 0;
192 |     int n;
193 |     if (dx > dy) { // evauate at each x
194 |         n = (int) dx;
195 |         for (int x = (int) x1; x < x2; x++) {
196 |             double y = interpolate(x1, y1, x2, y2, x);
197 |             sum += distance_xy(x, y);
198 |         }
199 |     } else { // evaluate at each y
200 |         n = (int) dy;
201 |         for (int y = (int) y1; y < y2; y++) {
202 |             double x = interpolate(y1, x1, y2, x2, y);
203 |             sum += distance_xy(x, y);
204 |         }
205 |     }
206 |     // normalize using line length: sum/n is average distance. Multiply
207 |     // avg. distance (cost per unit length) by length to get total cost:
208 |     double rslt = sqrt(dx*dx + dy*dy) * sum / n;
209 |     // printf("compute_dist %d: x1 %g y1 %g x2 %g y2 %g sum %g rslt %g\n",
210 |     //        i, x1, y1, x2, y2, sum, rslt);
211 |     return rslt;
212 | }
213 | 
214 |     
215 | void curve_fitting(Scorealign *sa, bool verbose)
216 | {
217 |     if (verbose)
218 |         printf("Performing line-segment approximation with %gs segments.\n", 
219 |                sa->line_time);
220 |     Curvefit curvefit(sa, verbose);
221 |     double *parameters;
222 |     double *x;
223 |     // how many segments? About total time / line_time:
224 |     int segments = 
225 |         (int) (0.5 + (sa->actual_frame_period_1 * sa->file1_frames) /
226 |                      sa->line_time);
227 |     curvefit.setup(segments);
228 |     curvefit.optimize();
229 |     parameters = curvefit.get_parameters();
230 |     x = curvefit.get_x();
231 |     // now, rewrite pathx and pathy according to segments
232 |     // pathx and pathy are generously allocated, so we can change pathlen
233 |     // each segment goes from x[i], parameters[i] to x[i+1], parameters[i+1]
234 |     int i;
235 |     int j = 0; // index into path
236 |     for (i = 0; i < segments; i++) {
237 |         int x1 = (int) x[i];
238 |         int x2 = (int) x[i+1];
239 |         int y1 = (int) parameters[i];
240 |         int y2 = (int) parameters[i+1];
241 |         int dx = x2 - x1;
242 |         int dy = y2 - y1;
243 |         if (dx >= dy) { // output point at each x
244 |             int x;
245 |             for (x = x1; x < x2; x++) {
246 |                 sa->pathx[j] = x;
247 |                 sa->pathy[j] = (int) (0.5 + interpolate(x1, y1, x2, y2, x));
248 |                 j++;
249 |             }
250 |         } else {
251 |             int y;
252 |             for (y = y1; y < y2; y++) {
253 |                 sa->pathx[j] = (int) (0.5 + interpolate(y1, x1, y2, x2, y));
254 |                 sa->pathy[j] = y;
255 |                 j++;
256 |             }
257 |         }
258 |     }
259 |     // output last point
260 |     sa->pathx[j] = (int) x[segments];
261 |     sa->pathy[j] = (int) (0.5 + parameters[segments]);
262 |     j++;
263 |     sa->set_pathlen(j);
264 | }
265 | 
266 | 
267 | 
268 | 


--------------------------------------------------------------------------------
/fft3/FFT3.cpp:
--------------------------------------------------------------------------------
  1 | /**********************************************************************
  2 | 
  3 |   FFT3.cpp -- see notes in FFT3.h -RBD
  4 | 
  5 |   FFT.cpp
  6 | 
  7 |   Dominic Mazzoni
  8 | 
  9 |   September 2000
 10 | 
 11 |   This file contains a few FFT routines, including a real-FFT
 12 |   routine that is almost twice as fast as a normal complex FFT,
 13 |   and a power spectrum routine when you know you don't care
 14 |   about phase information.
 15 | 
 16 |   Some of this code was based on a free implementation of an FFT
 17 |   by Don Cross, available on the web at:
 18 | 
 19 |     http://www.intersrv.com/~dcross/fft.html
 20 | 
 21 |   The basic algorithm for his code was based on Numerican Recipes
 22 |   in Fortran.  I optimized his code further by reducing array
 23 |   accesses, caching the bit reversal table, and eliminating
 24 |   float-to-float conversions, and I added the routines to
 25 |   calculate a real FFT and a real power spectrum.
 26 | 
 27 | **********************************************************************/
 28 | 
 29 | #include <stdlib.h>
 30 | #include <stdio.h>
 31 | #include <math.h>
 32 | #ifndef __MACH__
 33 |    #include <malloc.h>
 34 | #endif
 35 | #include "FFT3.h"
 36 | 
 37 | int **gFFTBitTable3 = NULL;
 38 | const int MaxFastBits = 16;
 39 | 
 40 | int IsPowerOfTwo(int x)
 41 | {
 42 |    if (x < 2)
 43 |       return false;
 44 | 
 45 |    if (x & (x - 1))             /* Thanks to 'byang' for this cute trick! */
 46 |       return false;
 47 | 
 48 |    return true;
 49 | }
 50 | 
 51 | int NumberOfBitsNeeded(int PowerOfTwo)
 52 | {
 53 |    int i;
 54 | 
 55 |    if (PowerOfTwo < 2) {
 56 |       fprintf(stderr, "Error: FFT called with size %d\n", PowerOfTwo);
 57 |       exit(1);
 58 |    }
 59 | 
 60 |    for (i = 0;; i++)
 61 |       if (PowerOfTwo & (1 << i))
 62 |          return i;
 63 | }
 64 | 
 65 | int ReverseBits(int index, int NumBits)
 66 | {
 67 |    int i, rev;
 68 | 
 69 |    for (i = rev = 0; i < NumBits; i++) {
 70 |       rev = (rev << 1) | (index & 1);
 71 |       index >>= 1;
 72 |    }
 73 | 
 74 |    return rev;
 75 | }
 76 | 
 77 | void InitFFT()
 78 | {
 79 |    gFFTBitTable3 = (int **) malloc(sizeof(int *) * MaxFastBits);
 80 | 
 81 |    int len = 2;
 82 |    int b;
 83 |    for (b = 1; b <= MaxFastBits; b++) {
 84 | 
 85 |        gFFTBitTable3[b - 1] = (int *) malloc(len * sizeof(int));
 86 |        int i;
 87 |       for (i = 0; i < len; i++)
 88 |          gFFTBitTable3[b - 1][i] = ReverseBits(i, b);
 89 | 
 90 |       len <<= 1;
 91 |    }
 92 | }
 93 | 
 94 | inline int FastReverseBits3(int i, int NumBits)
 95 | {
 96 |    if (NumBits <= MaxFastBits)
 97 |       return gFFTBitTable3[NumBits - 1][i];
 98 |    else
 99 |       return ReverseBits(i, NumBits);
100 | }
101 | 
102 | /*
103 |  * Complex Fast Fourier Transform
104 |  */
105 | 
106 | void FFT3(int NumSamples,
107 |          int InverseTransform,
108 |          float *RealIn, float *ImagIn, float *RealOut, float *ImagOut)
109 | {
110 |    int NumBits;                 /* Number of bits needed to store indices */
111 |    int i, j, k, n;
112 |    int BlockSize, BlockEnd;
113 | 
114 |    float angle_numerator = 2.0 * M_PI;
115 |    float tr, ti;                /* temp real, temp imaginary */
116 | 
117 |    if (!IsPowerOfTwo(NumSamples)) {
118 |       fprintf(stderr, "%d is not a power of two\n", NumSamples);
119 |       exit(1);
120 |    }
121 | 
122 |    if (!gFFTBitTable3)
123 |       InitFFT();
124 | 
125 |    if (InverseTransform)
126 |       angle_numerator = -angle_numerator;
127 | 
128 |    NumBits = NumberOfBitsNeeded(NumSamples);
129 | 
130 |    /*
131 |     **   Do simultaneous data copy and bit-reversal ordering into outputs...
132 |     */
133 | 
134 |    for (i = 0; i < NumSamples; i++) {
135 |       j = FastReverseBits3(i, NumBits);
136 |       RealOut[j] = RealIn[i];
137 |       ImagOut[j] = (ImagIn == NULL) ? 0.0F : ImagIn[i];
138 |    }
139 | 
140 |    /*
141 |     **   Do the FFT itself...
142 |     */
143 | 
144 |    BlockEnd = 1;
145 |    for (BlockSize = 2; BlockSize <= NumSamples; BlockSize <<= 1) {
146 | 
147 |       float delta_angle = angle_numerator / (float) BlockSize;
148 | 
149 |       float sm2 = sin(-2 * delta_angle);
150 |       float sm1 = sin(-delta_angle);
151 |       float cm2 = cos(-2 * delta_angle);
152 |       float cm1 = cos(-delta_angle);
153 |       float w = 2 * cm1;
154 |       float ar0, ar1, ar2, ai0, ai1, ai2;
155 | 
156 |       for (i = 0; i < NumSamples; i += BlockSize) {
157 |          ar2 = cm2;
158 |          ar1 = cm1;
159 | 
160 |          ai2 = sm2;
161 |          ai1 = sm1;
162 | 
163 |          for (j = i, n = 0; n < BlockEnd; j++, n++) {
164 |             ar0 = w * ar1 - ar2;
165 |             ar2 = ar1;
166 |             ar1 = ar0;
167 | 
168 |             ai0 = w * ai1 - ai2;
169 |             ai2 = ai1;
170 |             ai1 = ai0;
171 | 
172 |             k = j + BlockEnd;
173 |             tr = ar0 * RealOut[k] - ai0 * ImagOut[k];
174 |             ti = ar0 * ImagOut[k] + ai0 * RealOut[k];
175 | 
176 |             /*     if(k==NumSamples-1)
177 |                 printf("j=NumSamples-1 => %g - %g=",RealOut[j],tr);
178 |             */
179 |             RealOut[k] = RealOut[j] - tr;
180 |             ImagOut[k] = ImagOut[j] - ti;
181 | /*
182 |             if(k==NumSamples-1)
183 |                 printf("%g\n",RealOut[k]);
184 | */
185 |             RealOut[j] += tr;
186 |             ImagOut[j] += ti;
187 |          }
188 |       }
189 | 
190 |       BlockEnd = BlockSize;
191 |    }
192 | 
193 |    /*
194 |       **   Need to normalize if inverse transform...
195 |     */
196 | 
197 |    if (InverseTransform) {
198 |       float denom = (float) NumSamples;
199 | 
200 |       for (i = 0; i < NumSamples; i++) {
201 |          RealOut[i] /= denom;
202 |          ImagOut[i] /= denom;
203 |       }
204 |    }
205 | }
206 | 
207 | /*
208 |  * Real Fast Fourier Transform
209 |  *
210 |  * This function was based on the code in Numerical Recipes in C.
211 |  * In Num. Rec., the inner loop is based on a single 1-based array
212 |  * of interleaved real and imaginary numbers.  Because we have two
213 |  * separate zero-based arrays, our indices are quite different.
214 |  * Here is the correspondence between Num. Rec. indices and our indices:
215 |  *
216 |  * i1  <->  real[i]
217 |  * i2  <->  imag[i]
218 |  * i3  <->  real[n/2-i]
219 |  * i4  <->  imag[n/2-i]
220 |  */
221 | 
222 | void RealFFT3(int NumSamples, float *RealIn, float *RealOut, float *ImagOut)
223 | {
224 |    int Half = NumSamples / 2;
225 |    int i;
226 | 
227 |    float theta = M_PI / Half;
228 | 
229 |    float *tmpReal = (float *) alloca(sizeof(float) * Half);
230 |    float *tmpImag = (float *) alloca(sizeof(float) * Half);
231 | 
232 |    for (i = 0; i < Half; i++) {
233 |       tmpReal[i] = RealIn[2 * i];
234 |       tmpImag[i] = RealIn[2 * i + 1];
235 |    }
236 | 
237 |    FFT3(Half, 0, tmpReal, tmpImag, RealOut, ImagOut);
238 | 
239 |    float wtemp = (float) (sin(0.5 * theta));
240 | 
241 |    float wpr = -2.0F * wtemp * wtemp;
242 |    float wpi = (float) (sin(theta));
243 |    float wr = 1.0F + wpr;
244 |    float wi = wpi;
245 | 
246 |    int i3;
247 | 
248 |    float h1r, h1i, h2r, h2i;
249 | 
250 |    for (i = 1; i < Half / 2; i++) {
251 | 
252 |       i3 = Half - i;
253 | 
254 |       h1r = 0.5F * (RealOut[i] + RealOut[i3]);
255 |       h1i = 0.5F * (ImagOut[i] - ImagOut[i3]);
256 |       h2r = 0.5F * (ImagOut[i] + ImagOut[i3]);
257 |       h2i = -0.5F * (RealOut[i] - RealOut[i3]);
258 | 
259 |       RealOut[i] = h1r + wr * h2r - wi * h2i;
260 |       ImagOut[i] = h1i + wr * h2i + wi * h2r;
261 |       RealOut[i3] = h1r - wr * h2r + wi * h2i;
262 |       ImagOut[i3] = -h1i + wr * h2i + wi * h2r;
263 | 
264 |       wr = (wtemp = wr) * wpr - wi * wpi + wr;
265 |       wi = wi * wpr + wtemp * wpi + wi;
266 |    }
267 | 
268 |    RealOut[0] = (h1r = RealOut[0]) + ImagOut[0];
269 |    ImagOut[0] = h1r - ImagOut[0];
270 | 
271 |    //free(tmpReal);
272 |    //free(tmpImag);
273 | }
274 | 
275 | /*
276 |  * PowerSpectrum
277 |  *
278 |  * This function computes the same as RealFFT, above, but
279 |  * adds the squares of the real and imaginary part of each
280 |  * coefficient, extracting the power and throwing away the
281 |  * phase.
282 |  *
283 |  * For speed, it does not call RealFFT, but duplicates some
284 |  * of its code.
285 |  */
286 | 
287 | void PowerSpectrum3(int NumSamples, float *In, float *Out)
288 | {
289 |    int Half = NumSamples / 2;
290 |    int i;
291 | 
292 |    float theta = M_PI / Half;
293 | 
294 |    float *tmpReal = (float *) alloca(sizeof(float) * Half);;
295 |    float *tmpImag = (float *) alloca(sizeof(float) * Half);
296 |    float *RealOut = (float *) alloca(sizeof(float) * Half);
297 |    float *ImagOut = (float *) alloca(sizeof(float) * Half);
298 | 
299 |    for (i = 0; i < Half; i++) {
300 |       tmpReal[i] = In[2 * i];
301 |       tmpImag[i] = In[2 * i + 1];
302 |    }
303 | 
304 |    FFT3(Half, 0, tmpReal, tmpImag, RealOut, ImagOut);
305 | 
306 |    float wtemp = (float) (sin(0.5 * theta));
307 | 
308 |    float wpr = -2.0F * wtemp * wtemp;
309 |    float wpi = (float) (sin(theta));
310 |    float wr = 1.0F + wpr;
311 |    float wi = wpi;
312 | 
313 |    int i3;
314 | 
315 |    float h1r, h1i, h2r, h2i, rt, it;
316 | 
317 |    for (i = 1; i < Half / 2; i++) {
318 | 
319 |       i3 = Half - i;
320 | 
321 |       h1r = 0.5F * (RealOut[i] + RealOut[i3]);
322 |       h1i = 0.5F * (ImagOut[i] - ImagOut[i3]);
323 |       h2r = 0.5F * (ImagOut[i] + ImagOut[i3]);
324 |       h2i = -0.5F * (RealOut[i] - RealOut[i3]);
325 | 
326 |       rt = h1r + wr * h2r - wi * h2i;
327 |       it = h1i + wr * h2i + wi * h2r;
328 | 
329 |       Out[i] = rt * rt + it * it;
330 | 
331 |       rt = h1r - wr * h2r + wi * h2i;
332 |       it = -h1i + wr * h2i + wi * h2r;
333 | 
334 |       Out[i3] = rt * rt + it * it;
335 | 
336 |       wr = (wtemp = wr) * wpr - wi * wpi + wr;
337 |       wi = wi * wpr + wtemp * wpi + wi;
338 |    }
339 | 
340 |    rt = (h1r = RealOut[0]) + ImagOut[0];
341 |    it = h1r - ImagOut[0];
342 |    Out[0] = rt * rt + it * it;
343 | 
344 |    rt = RealOut[Half / 2];
345 |    it = ImagOut[Half / 2];
346 |    Out[Half / 2] = rt * rt + it * it;
347 | 
348 |    //free(tmpReal);
349 |    //free(tmpImag);
350 |    //free(RealOut);
351 |    //free(ImagOut);
352 | }
353 | 
354 | /*
355 |  * Windowing Functions
356 |  */
357 | 
358 | int NumWindowFuncs3()
359 | {
360 |    return 4;
361 | }
362 | 
363 | char *WindowFuncName3(int whichFunction)
364 | {
365 |    switch (whichFunction) {
366 |    default:
367 |    case 0:
368 |       return "Rectangular";
369 |    case 1:
370 |       return "Bartlett";
371 |    case 2:
372 |       return "Hamming";
373 |    case 3:
374 |       return "Hanning";
375 |    }
376 | }
377 | 
378 | void WindowFunc3(int whichFunction, int NumSamples, float *in)
379 | {
380 |    int i;
381 | 
382 |    if (whichFunction == 1) {
383 |       // Bartlett (triangular) window
384 |       for (i = 0; i < NumSamples / 2; i++) {
385 |          in[i] *= (i / (float) (NumSamples / 2));
386 |          in[i + (NumSamples / 2)] *=
387 |              (1.0F - (i / (float) (NumSamples / 2)));
388 |       }
389 |    }
390 | 
391 |    if (whichFunction == 2) {
392 |       // Hamming
393 |       for (i = 0; i < NumSamples; i++)
394 |          in[i] *= 0.54F - 0.46F * (float) cos(2 * M_PI * i / (NumSamples - 1));
395 |    }
396 | 
397 |    if (whichFunction == 3) {
398 |       // Hanning
399 |       for (i = 0; i < NumSamples; i++)
400 |          in[i] *= 0.50F - 0.50F * (float) cos(2 * M_PI * i / (NumSamples - 1));
401 |    }
402 | }      
403 | 


--------------------------------------------------------------------------------
/score-align.vcproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="Windows-1252"?>
  2 | <VisualStudioProject
  3 | 	ProjectType="Visual C++"
  4 | 	Version="8.00"
  5 | 	Name="score-align"
  6 | 	ProjectGUID="{5F04DE5D-0A34-496E-8A34-BE30BA9C70A1}"
  7 | 	RootNamespace="scorealign"
  8 | 	Keyword="Win32Proj"
  9 | 	>
 10 | 	<Platforms>
 11 | 		<Platform
 12 | 			Name="Win32"
 13 | 		/>
 14 | 	</Platforms>
 15 | 	<ToolFiles>
 16 | 	</ToolFiles>
 17 | 	<Configurations>
 18 | 		<Configuration
 19 | 			Name="Debug|Win32"
 20 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
 21 | 			IntermediateDirectory="$(ConfigurationName)"
 22 | 			ConfigurationType="1"
 23 | 			UseOfMFC="2"
 24 | 			CharacterSet="1"
 25 | 			>
 26 | 			<Tool
 27 | 				Name="VCPreBuildEventTool"
 28 | 			/>
 29 | 			<Tool
 30 | 				Name="VCCustomBuildTool"
 31 | 			/>
 32 | 			<Tool
 33 | 				Name="VCXMLDataGeneratorTool"
 34 | 			/>
 35 | 			<Tool
 36 | 				Name="VCWebServiceProxyGeneratorTool"
 37 | 			/>
 38 | 			<Tool
 39 | 				Name="VCMIDLTool"
 40 | 			/>
 41 | 			<Tool
 42 | 				Name="VCCLCompilerTool"
 43 | 				Optimization="0"
 44 | 				AdditionalIncludeDirectories="fft3;snd;..\portsmf"
 45 | 				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
 46 | 				MinimalRebuild="true"
 47 | 				BasicRuntimeChecks="3"
 48 | 				RuntimeLibrary="3"
 49 | 				UsePrecompiledHeader="0"
 50 | 				WarningLevel="3"
 51 | 				Detect64BitPortabilityProblems="true"
 52 | 				DebugInformationFormat="4"
 53 | 				DisableSpecificWarnings="4996"
 54 | 			/>
 55 | 			<Tool
 56 | 				Name="VCManagedResourceCompilerTool"
 57 | 			/>
 58 | 			<Tool
 59 | 				Name="VCResourceCompilerTool"
 60 | 			/>
 61 | 			<Tool
 62 | 				Name="VCPreLinkEventTool"
 63 | 			/>
 64 | 			<Tool
 65 | 				Name="VCLinkerTool"
 66 | 				AdditionalDependencies="wsock32.lib ..\portsmf\Debug\portsmf.lib"
 67 | 				LinkIncremental="2"
 68 | 				AddModuleNamesToAssembly=""
 69 | 				GenerateDebugInformation="true"
 70 | 				SubSystem="1"
 71 | 				TargetMachine="1"
 72 | 			/>
 73 | 			<Tool
 74 | 				Name="VCALinkTool"
 75 | 			/>
 76 | 			<Tool
 77 | 				Name="VCManifestTool"
 78 | 			/>
 79 | 			<Tool
 80 | 				Name="VCXDCMakeTool"
 81 | 			/>
 82 | 			<Tool
 83 | 				Name="VCBscMakeTool"
 84 | 			/>
 85 | 			<Tool
 86 | 				Name="VCFxCopTool"
 87 | 			/>
 88 | 			<Tool
 89 | 				Name="VCAppVerifierTool"
 90 | 			/>
 91 | 			<Tool
 92 | 				Name="VCWebDeploymentTool"
 93 | 			/>
 94 | 			<Tool
 95 | 				Name="VCPostBuildEventTool"
 96 | 			/>
 97 | 		</Configuration>
 98 | 		<Configuration
 99 | 			Name="Release|Win32"
100 | 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
101 | 			IntermediateDirectory="$(ConfigurationName)"
102 | 			ConfigurationType="1"
103 | 			CharacterSet="1"
104 | 			WholeProgramOptimization="1"
105 | 			>
106 | 			<Tool
107 | 				Name="VCPreBuildEventTool"
108 | 			/>
109 | 			<Tool
110 | 				Name="VCCustomBuildTool"
111 | 			/>
112 | 			<Tool
113 | 				Name="VCXMLDataGeneratorTool"
114 | 			/>
115 | 			<Tool
116 | 				Name="VCWebServiceProxyGeneratorTool"
117 | 			/>
118 | 			<Tool
119 | 				Name="VCMIDLTool"
120 | 			/>
121 | 			<Tool
122 | 				Name="VCCLCompilerTool"
123 | 				AdditionalIncludeDirectories="fft3;..\portsmf;snd"
124 | 				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
125 | 				RuntimeLibrary="2"
126 | 				UsePrecompiledHeader="0"
127 | 				WarningLevel="3"
128 | 				Detect64BitPortabilityProblems="true"
129 | 				DebugInformationFormat="3"
130 | 			/>
131 | 			<Tool
132 | 				Name="VCManagedResourceCompilerTool"
133 | 			/>
134 | 			<Tool
135 | 				Name="VCResourceCompilerTool"
136 | 			/>
137 | 			<Tool
138 | 				Name="VCPreLinkEventTool"
139 | 			/>
140 | 			<Tool
141 | 				Name="VCLinkerTool"
142 | 				AdditionalDependencies="wsock32.lib ..\portsmf\Release\portsmf.lib"
143 | 				LinkIncremental="1"
144 | 				GenerateDebugInformation="true"
145 | 				SubSystem="1"
146 | 				OptimizeReferences="2"
147 | 				EnableCOMDATFolding="2"
148 | 				TargetMachine="1"
149 | 			/>
150 | 			<Tool
151 | 				Name="VCALinkTool"
152 | 			/>
153 | 			<Tool
154 | 				Name="VCManifestTool"
155 | 			/>
156 | 			<Tool
157 | 				Name="VCXDCMakeTool"
158 | 			/>
159 | 			<Tool
160 | 				Name="VCBscMakeTool"
161 | 			/>
162 | 			<Tool
163 | 				Name="VCFxCopTool"
164 | 			/>
165 | 			<Tool
166 | 				Name="VCAppVerifierTool"
167 | 			/>
168 | 			<Tool
169 | 				Name="VCWebDeploymentTool"
170 | 			/>
171 | 			<Tool
172 | 				Name="VCPostBuildEventTool"
173 | 			/>
174 | 		</Configuration>
175 | 	</Configurations>
176 | 	<References>
177 | 	</References>
178 | 	<Files>
179 | 		<Filter
180 | 			Name="Source Files"
181 | 			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
182 | 			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
183 | 			>
184 | 			<File
185 | 				RelativePath=".\comp_chroma.cpp"
186 | 				>
187 | 			</File>
188 | 			<File
189 | 				RelativePath=".\curvefit.cpp"
190 | 				>
191 | 			</File>
192 | 			<File
193 | 				RelativePath=".\fft3\FFT.cpp"
194 | 				>
195 | 			</File>
196 | 			<File
197 | 				RelativePath=".\gen_chroma.cpp"
198 | 				>
199 | 			</File>
200 | 			<File
201 | 				RelativePath=".\hillclimb.cpp"
202 | 				>
203 | 			</File>
204 | 			<File
205 | 				RelativePath=".\regression.cpp"
206 | 				>
207 | 			</File>
208 | 			<File
209 | 				RelativePath=".\sautils.cpp"
210 | 				>
211 | 			</File>
212 | 			<File
213 | 				RelativePath=".\scorealign.cpp"
214 | 				>
215 | 			</File>
216 | 			<Filter
217 | 				Name="snd"
218 | 				>
219 | 				<File
220 | 					RelativePath=".\snd\audionone.c"
221 | 					>
222 | 					<FileConfiguration
223 | 						Name="Debug|Win32"
224 | 						>
225 | 						<Tool
226 | 							Name="VCCLCompilerTool"
227 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
228 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
229 | 						/>
230 | 					</FileConfiguration>
231 | 					<FileConfiguration
232 | 						Name="Release|Win32"
233 | 						>
234 | 						<Tool
235 | 							Name="VCCLCompilerTool"
236 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
237 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
238 | 						/>
239 | 					</FileConfiguration>
240 | 				</File>
241 | 				<File
242 | 					RelativePath=".\snd\ieeecvt.c"
243 | 					>
244 | 					<FileConfiguration
245 | 						Name="Debug|Win32"
246 | 						>
247 | 						<Tool
248 | 							Name="VCCLCompilerTool"
249 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
250 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
251 | 						/>
252 | 					</FileConfiguration>
253 | 					<FileConfiguration
254 | 						Name="Release|Win32"
255 | 						>
256 | 						<Tool
257 | 							Name="VCCLCompilerTool"
258 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
259 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
260 | 						/>
261 | 					</FileConfiguration>
262 | 				</File>
263 | 				<File
264 | 					RelativePath=".\snd\snd.c"
265 | 					>
266 | 				</File>
267 | 				<File
268 | 					RelativePath=".\snd\sndcvt.c"
269 | 					>
270 | 					<FileConfiguration
271 | 						Name="Debug|Win32"
272 | 						>
273 | 						<Tool
274 | 							Name="VCCLCompilerTool"
275 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
276 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
277 | 						/>
278 | 					</FileConfiguration>
279 | 					<FileConfiguration
280 | 						Name="Release|Win32"
281 | 						>
282 | 						<Tool
283 | 							Name="VCCLCompilerTool"
284 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
285 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
286 | 						/>
287 | 					</FileConfiguration>
288 | 				</File>
289 | 				<File
290 | 					RelativePath=".\snd\sndfaillinux.c"
291 | 					>
292 | 					<FileConfiguration
293 | 						Name="Debug|Win32"
294 | 						>
295 | 						<Tool
296 | 							Name="VCCLCompilerTool"
297 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
298 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
299 | 						/>
300 | 					</FileConfiguration>
301 | 					<FileConfiguration
302 | 						Name="Release|Win32"
303 | 						>
304 | 						<Tool
305 | 							Name="VCCLCompilerTool"
306 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
307 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
308 | 						/>
309 | 					</FileConfiguration>
310 | 				</File>
311 | 				<File
312 | 					RelativePath=".\snd\sndheader.c"
313 | 					>
314 | 					<FileConfiguration
315 | 						Name="Debug|Win32"
316 | 						>
317 | 						<Tool
318 | 							Name="VCCLCompilerTool"
319 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
320 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
321 | 						/>
322 | 					</FileConfiguration>
323 | 					<FileConfiguration
324 | 						Name="Release|Win32"
325 | 						>
326 | 						<Tool
327 | 							Name="VCCLCompilerTool"
328 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
329 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
330 | 						/>
331 | 					</FileConfiguration>
332 | 				</File>
333 | 				<File
334 | 					RelativePath=".\snd\sndio.c"
335 | 					>
336 | 					<FileConfiguration
337 | 						Name="Debug|Win32"
338 | 						>
339 | 						<Tool
340 | 							Name="VCCLCompilerTool"
341 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
342 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
343 | 						/>
344 | 					</FileConfiguration>
345 | 					<FileConfiguration
346 | 						Name="Release|Win32"
347 | 						>
348 | 						<Tool
349 | 							Name="VCCLCompilerTool"
350 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
351 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
352 | 						/>
353 | 					</FileConfiguration>
354 | 				</File>
355 | 				<File
356 | 					RelativePath=".\snd\sndwin32.c"
357 | 					>
358 | 					<FileConfiguration
359 | 						Name="Debug|Win32"
360 | 						>
361 | 						<Tool
362 | 							Name="VCCLCompilerTool"
363 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
364 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
365 | 						/>
366 | 					</FileConfiguration>
367 | 					<FileConfiguration
368 | 						Name="Release|Win32"
369 | 						>
370 | 						<Tool
371 | 							Name="VCCLCompilerTool"
372 | 							ObjectFile="$(IntDir)\$(InputName)1.obj"
373 | 							XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
374 | 						/>
375 | 					</FileConfiguration>
376 | 				</File>
377 | 			</Filter>
378 | 			<Filter
379 | 				Name="Resource Files"
380 | 				Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
381 | 				UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
382 | 				>
383 | 			</Filter>
384 | 		</Filter>
385 | 		<Filter
386 | 			Name="Header Files"
387 | 			Filter="h;hpp;hxx;hm;inl;inc;xsd"
388 | 			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
389 | 			>
390 | 			<File
391 | 				RelativePath=".\comp_chroma.h"
392 | 				>
393 | 			</File>
394 | 			<File
395 | 				RelativePath=".\curvefit.h"
396 | 				>
397 | 			</File>
398 | 			<File
399 | 				RelativePath=".\fft3\FFT.h"
400 | 				>
401 | 			</File>
402 | 			<File
403 | 				RelativePath=".\gen_chroma.h"
404 | 				>
405 | 			</File>
406 | 			<File
407 | 				RelativePath=".\hillclimb.h"
408 | 				>
409 | 			</File>
410 | 			<File
411 | 				RelativePath=".\regression.h"
412 | 				>
413 | 			</File>
414 | 			<File
415 | 				RelativePath=".\sautils.h"
416 | 				>
417 | 			</File>
418 | 			<File
419 | 				RelativePath=".\scorealign.h"
420 | 				>
421 | 			</File>
422 | 			<File
423 | 				RelativePath=".\snd\snd\switches.h"
424 | 				>
425 | 			</File>
426 | 			<File
427 | 				RelativePath=".\snd\snd\swlogic.h"
428 | 				>
429 | 			</File>
430 | 		</Filter>
431 | 	</Files>
432 | 	<Globals>
433 | 	</Globals>
434 | </VisualStudioProject>
435 | 


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
  1 | /* main.cpp -- the command line interface for scorealign
  2 |  * 
  3 |  * 14-Jul-08 RBD
  4 |  */
  5 | 
  6 | #include "stdio.h"
  7 | #include "main.h"
  8 | #include <fstream>
  9 | #include "allegro.h"
 10 | #include "audioreader.h"
 11 | #include "scorealign.h"
 12 | #include "sautils.h"
 13 | #include "alignfiles.h"
 14 | #include "gen_chroma.h"
 15 | #include "comp_chroma.h"
 16 | 
 17 | // a global object with score alignment parameters and data
 18 | Scorealign sa;
 19 | 
 20 | static void print_usage(char *progname) 
 21 | {
 22 |     printf("\nUsage: %s [-<flags> [<period> <windowsize> <path> <smooth> "
 23 |            "<trans> <midi>]] <file1> [<file2>]\n", progname);
 24 |     printf("   specifying only <file1> simply transcribes MIDI in <file1> "
 25 |            "to\n");
 26 |     printf("   transcription.txt. Otherwise, align <file1> and <file2>.\n");
 27 |     printf("   -h 0.25 indicates a frame period of 0.25 seconds\n");
 28 |     printf("   -w 0.25 indicates a window size of 0.25 seconds. \n");
 29 |     printf("   -r indicates filename to write raw alignment path to "
 30 |            "(default path.data)\n");
 31 |     printf("   -s is filename to write smoothed alignment path(default is "
 32 |            "smooth.data)\n");
 33 |     printf("   -t is filename to write the time aligned transcription "
 34 |            "(default is transcription.txt)\n");
 35 |     printf("   -m is filename to write the time aligned midi file "
 36 |            "(default is midi.mid)\n");
 37 |     printf("   -b is filename to write the time aligned beat times "
 38 |            "(default is beatmap.txt)\n");
 39 |     printf("   -i is filename to write an image of the distance matrix "
 40 |            "(default is distance.pnm)\n");
 41 |     printf("   -o 2.0 indicates a smoothing window time of 2.0s\n");
 42 |     printf("   -p 3.0 indicates presmoothing with a 3s window\n");
 43 |     printf("   -x 6.0 indicates 6s line segment approximation\n");
 44 | #if (defined (_WIN32) || defined (WIN32))
 45 |     printf("   This is a Unix style command line application which\n"
 46 |            "   should be run in a MSDOS box or Command Shell window.\n\n");
 47 |     printf("   Type RETURN to exit.\n") ;
 48 |     getchar();
 49 | #endif
 50 | } /* print_usage */
 51 | 
 52 | 
 53 | /*				SAVE_SMOOTH_FILE 
 54 | 	saves the smooth time map in SMOOTH_FILENAME
 55 | 
 56 | */
 57 | void save_smooth_file(char *smooth_filename, Scorealign &sa) {
 58 |     FILE *smoothf = fopen(smooth_filename, "w");
 59 |     assert(smoothf);
 60 |     for (int i = 0; i < sa.file1_frames; i++) {
 61 |         fprintf(smoothf, "%g \t%g\n", i * sa.actual_frame_period_1,
 62 |                 sa.smooth_time_map[i] * sa.actual_frame_period_2);
 63 |     }
 64 |     fclose(smoothf);
 65 | }
 66 | 
 67 | 
 68 | /*				PRINT_BEAT_MAP
 69 |    prints the allegro beat_map (for debugging) which contain
 70 |    the time, beat pair for a song 
 71 | */
 72 | void print_beat_map(Alg_seq &seq, char *filename) {
 73 |     
 74 |     FILE *beatmap_print = fopen(filename, "w"); 
 75 |     
 76 |     Alg_beats &b = seq.get_time_map()->beats;
 77 |     long num_beats = seq.get_time_map()->length();
 78 |     
 79 |     for(int i = 0; i < num_beats; i++) { 
 80 |         fprintf(beatmap_print," %f  %f \n", b[i].beat, b[i].time); 
 81 |     }	
 82 |     fclose(beatmap_print); 
 83 |     
 84 | }
 85 | 
 86 | 
 87 | /*				EDIT_TRANSCRIPTION
 88 | 	edit the allegro time map structure according
 89 | 	to the warping and output a midi file and transcription
 90 | 	file 
 91 | 
 92 | */
 93 | void edit_transcription(Alg_seq &seq , bool warp, FILE *outf, 
 94 |                         char *midi_filename, char *beat_filename) {
 95 |     int note_x = 1;
 96 |     seq.convert_to_seconds();
 97 |     Alg_iterator iterator(&seq, true);
 98 |     iterator.begin();
 99 | 
100 |     Alg_event_ptr e = iterator.next();
101 | 
102 |     while (e) {
103 |         if (e->is_note()) {
104 |             Alg_note_ptr n = (Alg_note_ptr) e;
105 |             fprintf(outf, "%d %d %d %d ", 
106 |                     note_x++, n->chan, ROUND(n->pitch), ROUND(n->loud));
107 |             // now compute onset time mapped to audio time
108 |             double start = n->time;
109 |             double finish = n->time + n->dur;
110 |             if (warp) {
111 |                 start = sa.map_time(start);
112 |                 finish = sa.map_time(finish);
113 |             }
114 |             fprintf(outf, "%.3f %.3f\n", start, finish-start);
115 |         }
116 |         e = iterator.next();
117 |     }
118 |     iterator.end();
119 |     fclose(outf);
120 |     if (warp) {
121 |         // align the midi file and write out 	
122 |         sa.midi_tempo_align(seq, true);
123 |         seq.smf_write(midi_filename);
124 |         print_beat_map(seq, beat_filename);
125 |     }
126 | }
127 | 
128 | 
129 | // save image of distance matrix
130 | void save_image(char *image_filename, Scorealign &sa)
131 | {
132 |     FILE *outf = fopen(image_filename, "wb");
133 |     float max_d = 0.0;
134 |     float min_d = 999999.0;
135 |     fputs("P5\n", outf);
136 |     fprintf(outf, "%d %d 255\n", sa.file1_frames, sa.file2_frames);
137 |     for (int row = 0; row < sa.file2_frames; row++) {
138 |         for (int col = 0; col < sa.file1_frames; col++) {
139 |             float d = gen_dist(row, col, sa.chrom_energy2, sa.chrom_energy1);
140 |             if (d > max_d) max_d = d;
141 |             if (d < min_d) min_d = d;
142 |             int pixel = (int) (255 * (d / 6.0) + 0.5);
143 |             if (pixel > 255) pixel = 255;
144 |             putc(pixel, outf);
145 |         }
146 |     }
147 |     fclose(outf);
148 |     printf("max distance %g, min distance %g\n", max_d, min_d);
149 | }
150 | 
151 | 
152 | /*		SAVE_TRANSCRIPTION
153 | write note data corresponding to audio file
154 | 
155 | assume audio file is file 1 and midi file is file 2
156 | so pathx is index into audio, pathy is index into MIDI
157 | 
158 | If warp is false, simply write a transcription of the midi file.
159 | 
160 | Every note has 6 fields separated by a space character. The fields are:
161 | <sequence number> <channel> <pitch> <velocity> <onset> <duration> 
162 | Where
163 |    <sequence number> is just an integer note number, e.g. 1, 2, 3, ...
164 |    <channel> is MIDI channel from 0 to 15 
165 |    <pitch> is MIDI key number (60 = middle C)
166 |    <velocity> is MIDI key velocity (1 to 127)
167 |    <onset> is time in seconds, rounded to 3 decimal places (milliseconds)
168 |    <duration> is time in seconds, rounded to 3 decimal places
169 | */
170 | void save_transcription(char *file1, char *file2, 
171 |                         bool warp, char *filename, char *smooth_filename, 
172 |                         char *midi_filename, char *beat_filename)
173 | {
174 |     
175 |     char *midiname; //midi file to be read
176 |     char *audioname; //audio file to be read
177 |     
178 |     if (warp) save_smooth_file(smooth_filename, sa); 
179 |     
180 |     //If either is a midifile
181 |     if (is_midi_file(file1) || is_midi_file(file2)) {
182 | 	
183 |         if (is_midi_file(file1)) {
184 |             midiname=file1;
185 |             audioname=file2;
186 |         } else {
187 |             midiname=file2;
188 |             audioname=file1;
189 |         }
190 | 	
191 |         Alg_seq seq(midiname, true);
192 | 	
193 |         FILE *outf = fopen(filename, "w");
194 |         if (!outf) {
195 |             printf("Error: could not open %s\n", filename);
196 |             return;
197 |         }
198 |         fprintf(outf, "# transcription of %s\n", midiname);
199 |         if (warp) {
200 |             fprintf(outf, "# note times are aligned to %s\n", audioname);
201 |         } else {
202 |             fprintf(outf, "# times are unmodified from those in MIDI file\n");
203 |         }
204 |         fprintf(outf, "# transcription format : <sequence number> "
205 |                 "<channel> <pitch> <velocity> <onset> <duration>\n");
206 |         
207 |         edit_transcription(seq, warp, outf, midi_filename, beat_filename); 
208 |     }
209 | }
210 | 
211 | 
212 | /*		SAVE_PATH
213 | 	write the alignment path to FILENAME
214 | */
215 | void save_path(char *filename, int pathlen, short* pathx, short *pathy,
216 |                float actual_frame_period_1, float actual_frame_period_2)
217 | {
218 |     // print the path to a (plot) file
219 |     FILE *pathf = fopen(filename, "w");
220 |     assert(pathf);
221 |     int p;
222 |     for (p = 0; p < pathlen; p++) {
223 |         fprintf(pathf, "%g %g\n", pathx[p] * actual_frame_period_1, 
224 |                 pathy[p] * actual_frame_period_2);
225 |     }
226 |     fclose(pathf);
227 | }
228 | 
229 | 
230 | /*			
231 | 	Prints the chroma table (for debugging)
232 | */
233 | 
234 | void print_chroma_table(float *chrom_energy, int frames)
235 | {
236 |     int i, j;
237 |     for (j = 0; j < frames; j++) {
238 |         for (i = 0; i <= CHROMA_BIN_COUNT; i++) {
239 |             printf("%5.2f | ", AREF2(chrom_energy, j, i));
240 |         }
241 |         printf("\n");
242 |     }
243 | }
244 | 
245 | 
246 | int main(int argc, char *argv []) 
247 | {	
248 |     char *progname, *infilename1, *infilename2;
249 |     char *smooth_filename, *path_filename, *trans_filename;
250 |     char *midi_filename, *beat_filename, *image_filename;
251 |     
252 |     //just transcribe if trasncribe == 1
253 |     int transcribe = 0;
254 | 	
255 |     // Default for the user definable parameters
256 |     
257 |     path_filename = "path.data";
258 |     smooth_filename = "smooth.data";
259 |     trans_filename = "transcription.txt";
260 |     midi_filename = "midi.mid";
261 |     beat_filename = "beatmap.txt";
262 |     image_filename = "distance.pnm";
263 | 
264 |     progname = strrchr(argv [0], '/'); 
265 |     progname = progname ? progname + 1 : argv[0] ;
266 | 
267 |     // If no arguments, return usage 
268 |     if (argc < 2) {
269 |         print_usage(progname);
270 |         return 1;
271 |     }
272 | 
273 | 	
274 | 
275 |     /*******PARSING CODE BEGINS*********/
276 |     int i = 1; 
277 |     while (i < argc) {
278 |         //expected flagged argument
279 |         if (argv[i][0] == '-') {
280 |             char flag = argv[i][1];
281 |             if (flag == 'h') {
282 |                 sa.frame_period = atof(argv[i+1]);	
283 |             } else if (flag == 'w') {
284 |                 sa.window_size = atof(argv[i+1]); 
285 |             } else if (flag == 'r') {
286 |                 path_filename = argv[i+1];
287 |             } else if (flag == 's') {
288 |                 smooth_filename = argv[i+1];
289 |             } else if (flag == 't') {
290 |                 trans_filename = argv[i+1]; 
291 |             } else if (flag == 'm') {
292 |                 midi_filename = argv[i+1];
293 |             } else if (flag == 'i') {
294 |                 image_filename = argv[i+1];
295 |             } else if (flag == 'b') {
296 |                 beat_filename = argv[i+1];
297 |             } else if (flag == 'o') {
298 |                 sa.smooth_time = atof(argv[i+1]);
299 |             } else if (flag == 'p') {
300 |                 sa.presmooth_time = atof(argv[i+1]);
301 |             } else if (flag == 'x') {
302 |                 sa.line_time = atof(argv[i+1]);
303 |             }
304 |             i++;
305 |         }
306 |         // When aligning audio to midi we must force file1 to be midi 
307 |         else {			
308 |             // file 1 is midi
309 |             if (transcribe == 0) {
310 |                 infilename1 = argv[i];
311 |                 transcribe++;
312 |             }
313 |             // file 2 is audio or a second midi 
314 |             else {
315 |                 infilename2 = argv[i];
316 |                 transcribe++;
317 |             }	
318 |         }
319 |         i++;
320 |     }
321 |     /**********END PARSING ***********/
322 |     if (sa.presmooth_time > 0 && sa.line_time > 0) {
323 |         printf("WARNING: both -p and -x options selected.\n");
324 |     }
325 | #if DEBUG_LOG
326 |     dbf = fopen("debug-log.txt", "w");
327 |     assert(dbf);
328 | #endif
329 | 
330 |     if (transcribe == 1) {
331 | 	// if only one midi file, just write transcription and exit, 
332 |         // no alignment
333 |         save_transcription(infilename1, "", false, trans_filename,NULL, NULL, NULL);
334 |         printf("Wrote %s\n", trans_filename);
335 |         goto finish;
336 |     }
337 | 
338 | 
339 |     // if midi only in infilename2, make it infilename1
340 |     if (is_midi_file(infilename2) && !is_midi_file(infilename1)) {
341 |         char *temp; 
342 |         temp = infilename1; 
343 |         infilename1 = infilename2;
344 |         infilename2 = temp;
345 |     }
346 | 
347 |     if (!align_files(infilename1, infilename2, sa, true /* verbose */)) {
348 |         printf("An error occurred, not saving path and transcription data\n");
349 |         goto finish;
350 |     }
351 |     if (sa.file1_frames <= 2 || sa.file2_frames <= 2) {
352 |         printf("Error: file frame counts are low: %d (for input 1) and %d "
353 |                "for input 2)\n...not saving path and transcription data\n",
354 |                sa.file1_frames, sa.file2_frames);
355 |     goto finish;
356 |     }
357 |     // save path
358 |     save_path(path_filename, sa.pathlen, sa.pathx, sa.pathy, 
359 |               sa.actual_frame_period_1, sa.actual_frame_period_2);
360 |     // save image of distance matrix
361 |     save_image(image_filename, sa);
362 |     // save smooth, midi, transcription
363 |     save_transcription(infilename1, infilename2, true, trans_filename, 
364 |                        smooth_filename, midi_filename, beat_filename);
365 | 
366 |     // print what the chroma matrix looks like
367 |     /*
368 |       printf("file1 chroma table: \n"); 
369 |       print_chroma_table(chrom_energy1,file1_frames);
370 |       printf("\nfile2 chroma table: \n"); 
371 |       print_chroma_table(chrom_energy2, file2_frames); 
372 |     */
373 | 	
374 |     // only path and smooth are written when aligning two audio files
375 |     if (is_midi_file(infilename1) || is_midi_file(infilename2))
376 |         printf("Wrote %s, %s, %s, and %s.", path_filename, smooth_filename, 
377 |                trans_filename, beat_filename);
378 |     else
379 |         printf("Wrote %s and %s.", path_filename, smooth_filename); 
380 |     
381 | finish:
382 | #if DEBUG_LOG
383 |     fclose(dbf);
384 | #endif
385 | 
386 |     return 0 ;
387 | } /* main */
388 | 
389 | 
390 | /* print_path_range -- debugging output */
391 | /**/
392 | void print_path_range(short *pathx, short *pathy, int i, int j)
393 | {
394 |     while (i <= j) {
395 |         printf("%d %d\n", pathx[i], pathy[i]);
396 |         i++;
397 |     }
398 | }
399 | 
400 | 
401 | 


--------------------------------------------------------------------------------
/gen_chroma.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifdef _WIN32
  3 |     #include "malloc.h"
  4 | #endif
  5 | #include "stdlib.h" // for OSX compatibility, malloc.h -> stdlib.h
  6 | #include "stdio.h"
  7 | #include "assert.h"
  8 | #include "string.h"
  9 | #include "math.h"
 10 | #include <fstream>
 11 | #include "allegro.h"
 12 | #include "fft3/FFT3.h"
 13 | #include "audioreader.h"
 14 | #include "scorealign.h"
 15 | #include "gen_chroma.h"
 16 | #include "comp_chroma.h"
 17 | #include "mfmidi.h"
 18 | #include "sautils.h"
 19 | #ifdef SA_VERBOSE
 20 | #include <iostream> // cout
 21 | #endif
 22 | using namespace std;
 23 | 
 24 | //if 1, causes printing internally
 25 | #define PRINT_BIN_ENERGY 1
 26 | 
 27 | #define p1 0.0577622650466621
 28 | #define p2 2.1011784386926213
 29 | 
 30 | // each row is one chroma vector, 
 31 | // data is stored as an array of chroma vectors:
 32 | // vector 1, vector 2, ...
 33 | #define CHROM(row, column) AREF2((*chrom_energy), row, column)
 34 | 
 35 | float hz_to_step(float hz)
 36 | {
 37 |     return float((log(hz) - p2) / p1);
 38 | }
 39 | 
 40 | /*				GEN_MAGNITUDE
 41 |    given the real and imaginary portions of a complex FFT function, compute 
 42 |    the magnitude of the fft bin.
 43 |    given input of 2 arrays (inR and inI) of length n, takes the ith element
 44 |    from each, squares them, sums them, takes the square root of the sum and
 45 |    puts the output into the ith position in the array out.
 46 |    
 47 |    NOTE: out should be length n
 48 | */
 49 | void gen_Magnitude(float* inR,float* inI, int low, int hi, float* out)
 50 | {
 51 |     int i;
 52 |     for (i = low; i < hi; i++) {
 53 |       float magVal = sqrt(inR[i] * inR[i] + inI[i] * inI[i]);
 54 |       //printf("   %d: sqrt(%g^2+%g^2)=%g\n",i,inR[i],inI[i+1],magVal);
 55 |       out[i]= magVal;
 56 | #ifdef SA_VERBOSE
 57 |       if (i == 1000) printf("gen_Magnitude: %d %g\n", i, magVal);
 58 | #endif
 59 |     }
 60 | }
 61 | 
 62 | 
 63 | /*				PRINT_BINS
 64 |     This function is intended for debugging purposes.
 65 |     pass in an array representing the "mid point"
 66 |     of each bin, and the number of bins.  The
 67 |     function will print out:
 68 |     i value
 69 |     index falue
 70 |     low range of the bin
 71 |     middle of the bin
 72 |     high range of the bin
 73 | */
 74 | void print_Bins(float* bins, int numBins){
 75 |     printf("BINS: \n");
 76 |     int i;
 77 |     for (i=0; i<numBins; i++) {
 78 |       int index = i % numBins;
 79 |       int indexNext = (index + 1) % numBins;
 80 |       int indexPrev = (index - 1) % numBins;
 81 |       
 82 |       float maxValue =(bins[index]+bins[indexNext])/2;
 83 |       float minValue=(bins[index]+bins[indexPrev])/2;
 84 |       
 85 |       if(index == 1)
 86 |         maxValue =bins[index]+(bins[index]-((bins[index]+bins[indexPrev])/2));
 87 |       if(index == 2)
 88 |         minValue =bins[index]-(((bins[index]+bins[indexNext])/2)-bins[index]);
 89 |       
 90 |       printf("%d (%d) %g||%g||%g\n",i,index,minValue,bins[i],maxValue);
 91 |     }		
 92 | }
 93 | 
 94 | /*				MIN_BIN_NUM
 95 |     Returns the index in the array of bins
 96 |     of the "smallest" bin.  aka, the bin
 97 |     whose midpoint is the smallest.
 98 | */
 99 | int min_Bin_Num(float* bins, int numBins){
100 |     
101 |     int i;
102 |     int minIndex=0;
103 |     float minValue=bins[0];
104 |     for (i = 0; i < numBins; i++) {   
105 |       if (minValue > bins[i]) {
106 |         minValue = bins[i];
107 |         minIndex = i;
108 |       }
109 |     }
110 |     return minIndex;
111 | }
112 | 
113 | 
114 | /*				GEN_HAMMING
115 |     given data from reading in a section of a sound file
116 |     applies the hamming function to each sample.
117 |     n specifies the length of in and out.
118 | */
119 | void gen_Hamming(float* in, int n, float* out)
120 | {
121 |     int k = 0;
122 |     for(k = 0; k < n; k++) {
123 |       float internalValue = 2.0 * M_PI * k * (1.0 / (n - 1));
124 |       float cosValue = cos(internalValue);
125 |       float hammingValue = 0.54F + (-0.46F * cosValue);
126 | #ifdef SA_VERBOSE
127 |       if (k == 1000) printf("Hamming %g\n", hammingValue);
128 | #endif
129 |       out[k] = hammingValue * in[k];
130 |     }
131 | }
132 | 
133 | /*				NEXTPOWEROF2
134 |     given an int n, finds the next power of 2 larger than
135 |     or equal to n.
136 | */
137 | int nextPowerOf2(int n)
138 | {
139 |     int result = 1;
140 |     while (result < n) result = (result << 1);
141 |     return result;
142 | }
143 | 
144 | 
145 | /* GEN_CHROMA_AUDIO -- compute chroma for an audio file 
146 |  */
147 | /*
148 |     generates the chroma energy for a given sequence
149 |     with a low cutoff and high cutoff.  
150 |     The chroma energy is placed in the float *chrom_energy.
151 |     this 2D is an array of pointers.
152 |     The function returns the number of frames 
153 |     (aka the length of the 1st dimention of chrom_energy)
154 | */
155 | int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff, 
156 |         int lcutoff, float **chrom_energy, float *actual_frame_period,
157 |         int id, bool verbose)
158 | {
159 |     int i;
160 |     double sample_rate = reader.get_sample_rate();
161 |     float reg11[CHROMA_BIN_COUNT]; // temp storage1;
162 |     float reg12[CHROMA_BIN_COUNT]; // temp storage2;
163 | 
164 |     if (verbose) {
165 |         printf ("==============FILE %d====================\n", id);
166 |         reader.print_info();
167 |     }
168 |     // this seems like a poor way to set actual_frame_period_1 or _2 in 
169 |     // the Scorealign object, but I'm not sure what would be better:
170 |     *actual_frame_period = reader.actual_frame_period;
171 | 
172 |     for (i = 0; i < CHROMA_BIN_COUNT; i++) {
173 |         reg11[i] = -999;
174 |       }
175 |     for (i = 0; i < CHROMA_BIN_COUNT; i++){
176 |         reg12[i] = 0;
177 |       }
178 | 
179 |    /*=============================================================*/
180 | 
181 |     // allocate some buffers for use in the loop
182 |     int full_data_size = nextPowerOf2(reader.samples_per_frame);
183 |     if (verbose) {
184 |         printf("   samples per frame is %d \n", reader.samples_per_frame);
185 |         printf("   total chroma frames %d\n", reader.frame_count); 
186 |         // printf("   Window size  %g second \n", reader.window_size);
187 |         printf("   hopsize in samples %d \n", reader.hop_samples);
188 |         printf("   fft size %d\n", full_data_size);
189 |     }
190 | 
191 |     float *full_data = ALLOC(float, full_data_size);
192 |     float *fft_dataR = ALLOC(float, full_data_size);
193 |     float *fft_dataI = ALLOC(float, full_data_size);	
194 |     //set to zero
195 |     memset(full_data, 0, full_data_size * sizeof(float));
196 |     memset(fft_dataR, 0, full_data_size * sizeof(float));	
197 |     memset(fft_dataI, 0, full_data_size * sizeof(float));
198 |     //check to see if memory has been allocated
199 |     assert(full_data != NULL);
200 |     assert(fft_dataR != NULL);
201 |     assert(fft_dataI != NULL);
202 |    
203 |     int *bin_map = ALLOC(int, full_data_size);
204 | 	
205 |     //set up the chrom_energy array;
206 |     *chrom_energy = ALLOC(float, reader.frame_count * (CHROMA_BIN_COUNT + 1));
207 |     int cv_index = 0;
208 | 
209 |     // set up mapping from spectral bins to chroma bins
210 |     // ordinarily, we would add 0.5 to round to nearest bin, but we also
211 |     // want to subtract 0.5 because the bin has a width of +/- 0.5. These
212 |     // two cancel out, so we can just round down and get the right answer.
213 |     int num_bins_to_use = (int) (hcutoff * full_data_size / sample_rate);
214 |     // But then we want to add 1 because the loops will only go to 
215 |     // high_bin - 1:
216 |     int high_bin = min(num_bins_to_use + 1, full_data_size);
217 |     //printf("center freq of high bin is %g\n", (high_bin - 1) * sample_rate / 
218 |     //    full_data_size);
219 |     //printf("high freq of high bin is %g\n", 
220 |     //     (high_bin - 1 + 0.5) * sample_rate / full_data_size);
221 |     // If we add 0.5, we'll round to nearest bin center frequency, but
222 |     // bin covers a frequency range that goes 0.5 bin width lower, so we
223 |     // add 1 before rounding.
224 |     int low_bin = (int) (lcutoff * full_data_size / sample_rate);
225 |     //printf("center freq of low bin is %g\n", low_bin * sample_rate / 
226 |     //    full_data_size);
227 |     //printf("low freq of low bin is %g\n", (low_bin - 0.5) * sample_rate / 
228 |     //    full_data_size);
229 |     //printf("frequency spacing of bins is %g\n", 
230 |     //     sample_rate / full_data_size);
231 |     double freq = low_bin * sample_rate / full_data_size;
232 |     for (i = low_bin; i < high_bin; i++) {
233 |         float raw_bin = hz_to_step(freq);
234 |         int round_bin = (int) (raw_bin + 0.5F);
235 |         int mod_bin = round_bin % 12;
236 |         bin_map[i] = mod_bin;
237 |         freq += sample_rate / full_data_size;
238 |     }
239 |     // printf("BIN_COUNT is !!!!!!!!!!!!!   %d\n",CHROMA_BIN_COUNT);
240 | 
241 |     while (reader.read_window(full_data)) {
242 |         //fill out array with 0's till next power of 2
243 | #ifdef SA_VERBOSE
244 |         printf("samples_per_frame %d sample %g\n", reader.samples_per_frame,
245 |                full_data[0]);
246 | #endif
247 |         for (i = reader.samples_per_frame; i < full_data_size; i++) 
248 |             full_data[i] = 0;
249 | 
250 | #ifdef AS_VERBOSE
251 |         printf("preFFT: full_data[1000] %g\n", full_data[1000]);
252 | #endif
253 | 
254 |         //the data from the wave file, each point mult by a hamming value
255 |         gen_Hamming(full_data, full_data_size, full_data);
256 | 
257 | #ifdef SA_VERBOSE
258 |         printf("preFFT: hammingData[1000] %g\n", full_data[1000]);
259 | #endif
260 |         FFT3(full_data_size, 0, full_data, NULL, fft_dataR, fft_dataI); //fft3
261 |       
262 |         //given the fft, compute the energy of each point
263 |         gen_Magnitude(fft_dataR, fft_dataI, low_bin, high_bin, full_data);
264 |       
265 |         /*-------------------------------------
266 |           GENERATE BINS AND PUT
267 |           THE CORRECT ENERGY IN
268 |           EACH BIN, CORRESPONDING
269 |           TO THE CORRECT PITCH
270 |           -------------------------------------*/
271 | 
272 |         float binEnergy[CHROMA_BIN_COUNT];
273 |         int binCount[CHROMA_BIN_COUNT];
274 | 
275 |         for (i = 0; i < CHROMA_BIN_COUNT; i++) {
276 |             binCount[i] = 0; 
277 |             binEnergy[i] = 0.0;
278 |         }
279 |       
280 |         for (i = low_bin; i < high_bin; i++) {
281 |             int mod_bin = bin_map[i];
282 |             binEnergy[mod_bin] += full_data[i];
283 |             binCount[mod_bin]++;
284 |         }
285 | 
286 |         /*-------------------------------------
287 |           END OF BIN GENERATION
288 |           -------------------------------------*/
289 |         /* THE FOLLOWING LOOKS LIKE SOME OLD CODE TO COMPUTE
290 |          * CHROMA FLUX, BUT IT IS NOT IN USE NOW 
291 |          
292 |         if (PRINT_BIN_ENERGY) {
293 |             float mao1;
294 |             float sum=0.;
295 |          
296 |             for (i = 0; i < CHROMA_BIN_COUNT; i++) {
297 |                 reg12[i]=binEnergy[i] / binCount[i];
298 |             }
299 |        
300 |             if (reg11[0]==-999){
301 |                 printf("Chroma Flux \n\n");
302 |             } else {
303 |                 for (i = 0; i < CHROMA_BIN_COUNT; i++) {
304 |                 }
305 |                 for (int k = 0; k < CHROMA_BIN_COUNT; k++) {
306 |                     float x = reg11[k];
307 |                     float y = reg12[k];
308 |                     float diff = x - y;
309 |                     sum += diff * diff;
310 |                 }
311 |                 mao1 = sqrt(sum);         
312 |                 sequence++;      
313 |                 sum = 0.;
314 |                 mao1 = 0.;
315 |             }
316 |             for (i = 0; i < CHROMA_BIN_COUNT; i++) {
317 |                 reg11[i]=reg12[i];
318 |             }
319 |             //fclose(Pointer);
320 |           }
321 |         */
322 |         //put chrom energy into the returned array
323 | 
324 | #ifdef SA_VERBOSE
325 |         printf("cv_index %d\n", cv_index);
326 | #endif
327 |         assert(cv_index < reader.frame_count);
328 |         for (i = 0;  i < CHROMA_BIN_COUNT; i++)
329 |             CHROM(cv_index, i) = binEnergy[i] / binCount[i];
330 |         cv_index++;
331 |     } // end of while ((readcount = read_mono_floats...
332 | 
333 |     free(fft_dataI);
334 |     free(fft_dataR);
335 |     free(full_data);
336 |     if (verbose)
337 |         printf("\nGenerated Chroma. file%d_frames is %i\n", id, file1_frames);
338 |     return cv_index;
339 | }
340 | 
341 | 
342 | class Event_list {
343 | public:
344 | 	Alg_note_ptr note;
345 | 	Event_list *next;
346 | 
347 | 	Event_list(Alg_event_ptr event_, Event_list *next_) {
348 | 		note = (Alg_note_ptr) event_;
349 | 		next = next_;
350 | 	}
351 | 
352 | 	~Event_list() {
353 | 	}
354 | };
355 | typedef Event_list *Event_list_ptr;
356 | 
357 | 
358 | /* gen_chroma_midi -- generate chroma vectors for midi file */
359 | /*
360 |     generates the chroma energy for a given sequence
361 |     with a low cutoff and high cutoff.  
362 |     The chroma energy is placed in the float *chrom_energy.
363 |     this 2D is an array of pointers.
364 |     The function returns the number of frames 
365 |     (aka the length of the 1st dimention of chrom_energy)
366 |  *
367 |  *
368 |   Notes: keep a list of notes that are sounding.
369 |   For each frame, 
370 |     zero the vector
371 |     while next note starts before end of frame, insert note in list
372 | 	  for each note in list, compute weight and add to vector. Remove
373 | 	  if note ends before frame start time.	 
374 |   How many frames? 
375 |  */
376 | 
377 | int Scorealign::gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff, 
378 |                     float **chrom_energy, float *actual_frame_period,
379 |                     int id, bool verbose)
380 | {	
381 |     if (verbose) {
382 |         printf ("==============FILE %d====================\n", id);
383 |         SA_V(seq.write(cout, true));
384 |     }
385 |     /*=============================================================*/
386 | 
387 |     *actual_frame_period = (frame_period) ; // since we don't quantize to samples
388 | 	
389 |     /*=============================================================*/
390 |     
391 |     seq.convert_to_seconds();
392 |     /* find duration */
393 |     float dur = 0.0F;
394 |     int nnotes = 0;
395 |     nnotes= find_midi_duration(seq, &dur); 
396 | 
397 |     /*================================================================*/
398 | 	
399 |     int frame_count= (int)ceil(((float)dur/ frame_period + 1)); 	
400 | 	
401 |     /*================================================================*/
402 | 	
403 |     if (verbose) {
404 |         printf("   note count = %d\n", nnotes);
405 |         printf("   duration in sec = %f\n", dur); 
406 |         printf("   chroma frames %d\n", frame_count);
407 |     }
408 | 
409 |     //set up the chrom_energy array;
410 |     (*chrom_energy) = ALLOC(float, frame_count * (CHROMA_BIN_COUNT + 1));
411 |     Event_list_ptr list = NULL;
412 |     Alg_iterator iterator(&seq, true);
413 |     iterator.begin();
414 |     Alg_event_ptr event = iterator.next();
415 |     int cv_index;
416 |     for (cv_index = 0; cv_index < frame_count; cv_index++) {
417 | 		
418 |         /*====================================================*/
419 | 
420 |         float frame_begin = max((cv_index * (frame_period)) - 
421 |                                 window_size/2 , 0.0F); //chooses zero if negative
422 | 
423 |         float frame_end= frame_begin +(window_size/2); 	
424 | 	/*============================================================*/
425 |         /* zero the vector */
426 |         for (int i = 0; i < CHROMA_BIN_COUNT; i++) CHROM(cv_index, i) = 0;
427 |         /* add new notes that are in the frame */
428 |         while (event && event->time < frame_end) {
429 |             if (event->is_note()) {
430 |                 list = new Event_list(event, list);
431 |             }
432 |             event = iterator.next();
433 |         }
434 |         /* remove notes that are no longer sounding */
435 |         Event_list_ptr *ptr = &list;
436 |         while (*ptr) {
437 |             while ((*ptr) && 
438 |                    (*ptr)->note->time + (*ptr)->note->dur < frame_begin) {
439 |                 Event_list_ptr temp = *ptr;
440 |                 *ptr = (*ptr)->next;
441 |                 delete temp;
442 |             }
443 |             if (*ptr) ptr = &((*ptr)->next);
444 |         }
445 |         for (Event_list_ptr item = list; item; item = item->next) {
446 |             /* compute duration of overlap */
447 |             float overlap = 
448 |                 min(frame_end, (float) (item->note->time + item->note->dur)) - 
449 |                 max(frame_begin, (float) item->note->time);
450 |             float velocity = item->note->loud;
451 |             float weight = overlap * velocity;
452 | #if DEBUG_LOG
453 |             fprintf(dbf, "%3d pitch %g key %d overlap %g velocity %g\n", 
454 |                     cv_index, item->note->pitch, item->note->get_identifier(), 
455 |                     overlap, velocity);
456 | #endif
457 |             CHROM(cv_index, (int)item->note->pitch % 12) += weight;
458 |         }
459 | #if DEBUG_LOG
460 |         for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
461 |             fprintf(dbf, "%d:%g ", i, CHROM(cv_index, i));
462 |         }
463 |         fprintf(dbf, "\n\n");
464 | #endif
465 |     }
466 |     while (list) {
467 |         Event_list_ptr temp = list;
468 |         list = list->next;
469 |         delete temp;
470 |     }
471 |     iterator.end();
472 |     if (verbose)
473 |         printf("\nGenerated Chroma. file%d_frames is %i\n", id, file1_frames);
474 |     return frame_count;
475 | }
476 | 


--------------------------------------------------------------------------------
/scorealign.xcodeproj/project.pbxproj:
--------------------------------------------------------------------------------
  1 | // !$*UTF8*$!
  2 | {
  3 | 	archiveVersion = 1;
  4 | 	classes = {
  5 | 	};
  6 | 	objectVersion = 42;
  7 | 	objects = {
  8 | 
  9 | /* Begin PBXBuildFile section */
 10 | 		3D6E91710E351D8300FE12E2 /* FFT3.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D6E916F0E351D8300FE12E2 /* FFT3.cpp */; };
 11 | 		3D6E91720E351D8300FE12E2 /* FFT3.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D6E91700E351D8300FE12E2 /* FFT3.h */; };
 12 | 		3D848DC60E2C391300EDB5E3 /* audiofilereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D848DC50E2C391300EDB5E3 /* audiofilereader.cpp */; };
 13 | 		3D848DC90E2C392600EDB5E3 /* audioreader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D848DC80E2C392600EDB5E3 /* audioreader.cpp */; };
 14 | 		3D848E540E2CE2B300EDB5E3 /* alignfiles.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D848E530E2CE2B300EDB5E3 /* alignfiles.cpp */; };
 15 | 		3D848E7A0E2CEC4200EDB5E3 /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D848E790E2CEC4200EDB5E3 /* main.cpp */; };
 16 | 		3D9037AD0CC7CB3C00F46FD5 /* comp_chroma.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037AB0CC7CB3C00F46FD5 /* comp_chroma.cpp */; };
 17 | 		3D9037AE0CC7CB3C00F46FD5 /* comp_chroma.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037AC0CC7CB3C00F46FD5 /* comp_chroma.h */; };
 18 | 		3D9037B10CC7CB9C00F46FD5 /* gen_chroma.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037AF0CC7CB9C00F46FD5 /* gen_chroma.cpp */; };
 19 | 		3D9037B20CC7CB9C00F46FD5 /* gen_chroma.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037B00CC7CB9C00F46FD5 /* gen_chroma.h */; };
 20 | 		3D9037B80CC7D16200F46FD5 /* regression.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037B70CC7D16200F46FD5 /* regression.cpp */; };
 21 | 		3D9037BA0CC7D18400F46FD5 /* regression.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037B90CC7D18400F46FD5 /* regression.h */; };
 22 | 		3D9037BD0CC7D18F00F46FD5 /* scorealign.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037BB0CC7D18F00F46FD5 /* scorealign.cpp */; };
 23 | 		3D9037BE0CC7D18F00F46FD5 /* scorealign.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037BC0CC7D18F00F46FD5 /* scorealign.h */; };
 24 | 		3D9037E20CC7E2E000F46FD5 /* allegro.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037E10CC7E2E000F46FD5 /* allegro.cpp */; };
 25 | 		3D9037E40CC7E2F700F46FD5 /* allegro.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037E30CC7E2F700F46FD5 /* allegro.h */; };
 26 | 		3D9037E60CC7E30600F46FD5 /* allegrord.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037E50CC7E30600F46FD5 /* allegrord.cpp */; };
 27 | 		3D9037E80CC7E31300F46FD5 /* allegrosmfrd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037E70CC7E31300F46FD5 /* allegrosmfrd.cpp */; };
 28 | 		3D9037EA0CC7E31C00F46FD5 /* allegrowr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037E90CC7E31C00F46FD5 /* allegrowr.cpp */; };
 29 | 		3D9037EC0CC7E32400F46FD5 /* allegrosmfwr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037EB0CC7E32400F46FD5 /* allegrosmfwr.cpp */; };
 30 | 		3D9037EF0CC7E33200F46FD5 /* mfmidi.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037ED0CC7E33200F46FD5 /* mfmidi.cpp */; };
 31 | 		3D9037F30CC7E33D00F46FD5 /* strparse.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037F10CC7E33D00F46FD5 /* strparse.cpp */; };
 32 | 		3D9037F40CC7E33D00F46FD5 /* strparse.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037F20CC7E33D00F46FD5 /* strparse.h */; };
 33 | 		3D9037FF0CC7E46700F46FD5 /* mfmidi.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037FE0CC7E46700F46FD5 /* mfmidi.h */; };
 34 | 		3D9038800CC9B54000F46FD5 /* curvefit.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D90387E0CC9B54000F46FD5 /* curvefit.h */; };
 35 | 		3D9038810CC9B54000F46FD5 /* curvefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D90387F0CC9B54000F46FD5 /* curvefit.cpp */; };
 36 | 		3D9038880CCA25E100F46FD5 /* hillclimb.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9038860CCA25E100F46FD5 /* hillclimb.h */; };
 37 | 		3D9038890CCA25E100F46FD5 /* hillclimb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9038870CCA25E100F46FD5 /* hillclimb.cpp */; };
 38 | 		3D9038DE0CCA815D00F46FD5 /* sautils.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9038DC0CCA815D00F46FD5 /* sautils.h */; };
 39 | 		3D9038DF0CCA815D00F46FD5 /* sautils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9038DD0CCA815D00F46FD5 /* sautils.cpp */; };
 40 | 		8DD76F6A0486A84900D96B5E /* scorealign.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6859E8B029090EE04C91782 /* scorealign.1 */; };
 41 | /* End PBXBuildFile section */
 42 | 
 43 | /* Begin PBXCopyFilesBuildPhase section */
 44 | 		8DD76F690486A84900D96B5E /* CopyFiles */ = {
 45 | 			isa = PBXCopyFilesBuildPhase;
 46 | 			buildActionMask = 8;
 47 | 			dstPath = /usr/share/man/man1/;
 48 | 			dstSubfolderSpec = 0;
 49 | 			files = (
 50 | 				8DD76F6A0486A84900D96B5E /* scorealign.1 in CopyFiles */,
 51 | 				3D9037AE0CC7CB3C00F46FD5 /* comp_chroma.h in CopyFiles */,
 52 | 				3D9037B20CC7CB9C00F46FD5 /* gen_chroma.h in CopyFiles */,
 53 | 				3D9037BA0CC7D18400F46FD5 /* regression.h in CopyFiles */,
 54 | 				3D9037BE0CC7D18F00F46FD5 /* scorealign.h in CopyFiles */,
 55 | 				3D9037E40CC7E2F700F46FD5 /* allegro.h in CopyFiles */,
 56 | 				3D9037F40CC7E33D00F46FD5 /* strparse.h in CopyFiles */,
 57 | 				3D9037FF0CC7E46700F46FD5 /* mfmidi.h in CopyFiles */,
 58 | 				3D9038800CC9B54000F46FD5 /* curvefit.h in CopyFiles */,
 59 | 				3D9038880CCA25E100F46FD5 /* hillclimb.h in CopyFiles */,
 60 | 				3D9038DE0CCA815D00F46FD5 /* sautils.h in CopyFiles */,
 61 | 				3D6E91720E351D8300FE12E2 /* FFT3.h in CopyFiles */,
 62 | 			);
 63 | 			runOnlyForDeploymentPostprocessing = 1;
 64 | 		};
 65 | /* End PBXCopyFilesBuildPhase section */
 66 | 
 67 | /* Begin PBXFileReference section */
 68 | 		3D6E916F0E351D8300FE12E2 /* FFT3.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = FFT3.cpp; path = fft3/FFT3.cpp; sourceTree = "<group>"; };
 69 | 		3D6E91700E351D8300FE12E2 /* FFT3.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = FFT3.h; path = fft3/FFT3.h; sourceTree = "<group>"; };
 70 | 		3D848DC50E2C391300EDB5E3 /* audiofilereader.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = audiofilereader.cpp; sourceTree = "<group>"; };
 71 | 		3D848DC80E2C392600EDB5E3 /* audioreader.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = audioreader.cpp; sourceTree = "<group>"; };
 72 | 		3D848E530E2CE2B300EDB5E3 /* alignfiles.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = alignfiles.cpp; sourceTree = "<group>"; };
 73 | 		3D848E790E2CEC4200EDB5E3 /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = main.cpp; sourceTree = "<group>"; };
 74 | 		3D9037AB0CC7CB3C00F46FD5 /* comp_chroma.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = comp_chroma.cpp; sourceTree = "<group>"; };
 75 | 		3D9037AC0CC7CB3C00F46FD5 /* comp_chroma.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = comp_chroma.h; sourceTree = "<group>"; };
 76 | 		3D9037AF0CC7CB9C00F46FD5 /* gen_chroma.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = gen_chroma.cpp; sourceTree = "<group>"; };
 77 | 		3D9037B00CC7CB9C00F46FD5 /* gen_chroma.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = gen_chroma.h; sourceTree = "<group>"; };
 78 | 		3D9037B70CC7D16200F46FD5 /* regression.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = regression.cpp; sourceTree = "<group>"; };
 79 | 		3D9037B90CC7D18400F46FD5 /* regression.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = regression.h; sourceTree = "<group>"; };
 80 | 		3D9037BB0CC7D18F00F46FD5 /* scorealign.cpp */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.cpp; path = scorealign.cpp; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
 81 | 		3D9037BC0CC7D18F00F46FD5 /* scorealign.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = scorealign.h; sourceTree = "<group>"; };
 82 | 		3D9037E10CC7E2E000F46FD5 /* allegro.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = allegro.cpp; path = ../portsmf/allegro.cpp; sourceTree = SOURCE_ROOT; };
 83 | 		3D9037E30CC7E2F700F46FD5 /* allegro.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = allegro.h; path = ../portsmf/allegro.h; sourceTree = SOURCE_ROOT; };
 84 | 		3D9037E50CC7E30600F46FD5 /* allegrord.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = allegrord.cpp; path = ../portsmf/allegrord.cpp; sourceTree = SOURCE_ROOT; };
 85 | 		3D9037E70CC7E31300F46FD5 /* allegrosmfrd.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = allegrosmfrd.cpp; path = ../portsmf/allegrosmfrd.cpp; sourceTree = SOURCE_ROOT; };
 86 | 		3D9037E90CC7E31C00F46FD5 /* allegrowr.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = allegrowr.cpp; path = ../portsmf/allegrowr.cpp; sourceTree = SOURCE_ROOT; };
 87 | 		3D9037EB0CC7E32400F46FD5 /* allegrosmfwr.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = allegrosmfwr.cpp; path = ../portsmf/allegrosmfwr.cpp; sourceTree = SOURCE_ROOT; };
 88 | 		3D9037ED0CC7E33200F46FD5 /* mfmidi.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = mfmidi.cpp; path = ../portsmf/mfmidi.cpp; sourceTree = SOURCE_ROOT; };
 89 | 		3D9037F10CC7E33D00F46FD5 /* strparse.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = strparse.cpp; path = ../portsmf/strparse.cpp; sourceTree = SOURCE_ROOT; };
 90 | 		3D9037F20CC7E33D00F46FD5 /* strparse.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = strparse.h; path = ../portsmf/strparse.h; sourceTree = SOURCE_ROOT; };
 91 | 		3D9037FE0CC7E46700F46FD5 /* mfmidi.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = mfmidi.h; path = ../portsmf/mfmidi.h; sourceTree = SOURCE_ROOT; };
 92 | 		3D90387E0CC9B54000F46FD5 /* curvefit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = curvefit.h; sourceTree = "<group>"; };
 93 | 		3D90387F0CC9B54000F46FD5 /* curvefit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = curvefit.cpp; sourceTree = "<group>"; };
 94 | 		3D9038860CCA25E100F46FD5 /* hillclimb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hillclimb.h; sourceTree = "<group>"; };
 95 | 		3D9038870CCA25E100F46FD5 /* hillclimb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hillclimb.cpp; sourceTree = "<group>"; };
 96 | 		3D9038DC0CCA815D00F46FD5 /* sautils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sautils.h; sourceTree = "<group>"; };
 97 | 		3D9038DD0CCA815D00F46FD5 /* sautils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sautils.cpp; sourceTree = "<group>"; };
 98 | 		8DD76F6C0486A84900D96B5E /* scorealign */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = scorealign; sourceTree = BUILT_PRODUCTS_DIR; };
 99 | 		C6859E8B029090EE04C91782 /* scorealign.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = scorealign.1; sourceTree = "<group>"; };
100 | /* End PBXFileReference section */
101 | 
102 | /* Begin PBXFrameworksBuildPhase section */
103 | 		8DD76F660486A84900D96B5E /* Frameworks */ = {
104 | 			isa = PBXFrameworksBuildPhase;
105 | 			buildActionMask = 2147483647;
106 | 			files = (
107 | 			);
108 | 			runOnlyForDeploymentPostprocessing = 0;
109 | 		};
110 | /* End PBXFrameworksBuildPhase section */
111 | 
112 | /* Begin PBXGroup section */
113 | 		08FB7794FE84155DC02AAC07 /* scorealign */ = {
114 | 			isa = PBXGroup;
115 | 			children = (
116 | 				08FB7795FE84155DC02AAC07 /* Source */,
117 | 				C6859E8C029090F304C91782 /* Documentation */,
118 | 				1AB674ADFE9D54B511CA2CBB /* Products */,
119 | 			);
120 | 			name = scorealign;
121 | 			sourceTree = "<group>";
122 | 		};
123 | 		08FB7795FE84155DC02AAC07 /* Source */ = {
124 | 			isa = PBXGroup;
125 | 			children = (
126 | 				3D6E916F0E351D8300FE12E2 /* FFT3.cpp */,
127 | 				3D6E91700E351D8300FE12E2 /* FFT3.h */,
128 | 				3D9038DC0CCA815D00F46FD5 /* sautils.h */,
129 | 				3D9038DD0CCA815D00F46FD5 /* sautils.cpp */,
130 | 				3D9038860CCA25E100F46FD5 /* hillclimb.h */,
131 | 				3D9038870CCA25E100F46FD5 /* hillclimb.cpp */,
132 | 				3D90387E0CC9B54000F46FD5 /* curvefit.h */,
133 | 				3D90387F0CC9B54000F46FD5 /* curvefit.cpp */,
134 | 				3D9037F10CC7E33D00F46FD5 /* strparse.cpp */,
135 | 				3D9037F20CC7E33D00F46FD5 /* strparse.h */,
136 | 				3D9037ED0CC7E33200F46FD5 /* mfmidi.cpp */,
137 | 				3D9037EB0CC7E32400F46FD5 /* allegrosmfwr.cpp */,
138 | 				3D9037E90CC7E31C00F46FD5 /* allegrowr.cpp */,
139 | 				3D848DC80E2C392600EDB5E3 /* audioreader.cpp */,
140 | 				3D848E790E2CEC4200EDB5E3 /* main.cpp */,
141 | 				3D848DC50E2C391300EDB5E3 /* audiofilereader.cpp */,
142 | 				3D9037E70CC7E31300F46FD5 /* allegrosmfrd.cpp */,
143 | 				3D9037E50CC7E30600F46FD5 /* allegrord.cpp */,
144 | 				3D9037E30CC7E2F700F46FD5 /* allegro.h */,
145 | 				3D848E530E2CE2B300EDB5E3 /* alignfiles.cpp */,
146 | 				3D9037E10CC7E2E000F46FD5 /* allegro.cpp */,
147 | 				3D9037BC0CC7D18F00F46FD5 /* scorealign.h */,
148 | 				3D9037B90CC7D18400F46FD5 /* regression.h */,
149 | 				3D9037B70CC7D16200F46FD5 /* regression.cpp */,
150 | 				3D9037AF0CC7CB9C00F46FD5 /* gen_chroma.cpp */,
151 | 				3D9037B00CC7CB9C00F46FD5 /* gen_chroma.h */,
152 | 				3D9037AB0CC7CB3C00F46FD5 /* comp_chroma.cpp */,
153 | 				3D9037FE0CC7E46700F46FD5 /* mfmidi.h */,
154 | 				3D9037AC0CC7CB3C00F46FD5 /* comp_chroma.h */,
155 | 				3D9037BB0CC7D18F00F46FD5 /* scorealign.cpp */,
156 | 			);
157 | 			name = Source;
158 | 			sourceTree = "<group>";
159 | 		};
160 | 		1AB674ADFE9D54B511CA2CBB /* Products */ = {
161 | 			isa = PBXGroup;
162 | 			children = (
163 | 				8DD76F6C0486A84900D96B5E /* scorealign */,
164 | 			);
165 | 			name = Products;
166 | 			sourceTree = "<group>";
167 | 		};
168 | 		C6859E8C029090F304C91782 /* Documentation */ = {
169 | 			isa = PBXGroup;
170 | 			children = (
171 | 				C6859E8B029090EE04C91782 /* scorealign.1 */,
172 | 			);
173 | 			name = Documentation;
174 | 			sourceTree = "<group>";
175 | 		};
176 | /* End PBXGroup section */
177 | 
178 | /* Begin PBXNativeTarget section */
179 | 		8DD76F620486A84900D96B5E /* scorealign */ = {
180 | 			isa = PBXNativeTarget;
181 | 			buildConfigurationList = 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "scorealign" */;
182 | 			buildPhases = (
183 | 				8DD76F640486A84900D96B5E /* Sources */,
184 | 				8DD76F660486A84900D96B5E /* Frameworks */,
185 | 				8DD76F690486A84900D96B5E /* CopyFiles */,
186 | 			);
187 | 			buildRules = (
188 | 			);
189 | 			dependencies = (
190 | 			);
191 | 			name = scorealign;
192 | 			productInstallPath = "$(HOME)/bin";
193 | 			productName = scorealign;
194 | 			productReference = 8DD76F6C0486A84900D96B5E /* scorealign */;
195 | 			productType = "com.apple.product-type.tool";
196 | 		};
197 | /* End PBXNativeTarget section */
198 | 
199 | /* Begin PBXProject section */
200 | 		08FB7793FE84155DC02AAC07 /* Project object */ = {
201 | 			isa = PBXProject;
202 | 			buildConfigurationList = 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "scorealign" */;
203 | 			hasScannedForEncodings = 1;
204 | 			mainGroup = 08FB7794FE84155DC02AAC07 /* scorealign */;
205 | 			projectDirPath = "";
206 | 			targets = (
207 | 				8DD76F620486A84900D96B5E /* scorealign */,
208 | 			);
209 | 		};
210 | /* End PBXProject section */
211 | 
212 | /* Begin PBXSourcesBuildPhase section */
213 | 		8DD76F640486A84900D96B5E /* Sources */ = {
214 | 			isa = PBXSourcesBuildPhase;
215 | 			buildActionMask = 2147483647;
216 | 			files = (
217 | 				3D9037AD0CC7CB3C00F46FD5 /* comp_chroma.cpp in Sources */,
218 | 				3D9037B10CC7CB9C00F46FD5 /* gen_chroma.cpp in Sources */,
219 | 				3D9037B80CC7D16200F46FD5 /* regression.cpp in Sources */,
220 | 				3D9037BD0CC7D18F00F46FD5 /* scorealign.cpp in Sources */,
221 | 				3D9037E20CC7E2E000F46FD5 /* allegro.cpp in Sources */,
222 | 				3D9037E60CC7E30600F46FD5 /* allegrord.cpp in Sources */,
223 | 				3D9037E80CC7E31300F46FD5 /* allegrosmfrd.cpp in Sources */,
224 | 				3D9037EA0CC7E31C00F46FD5 /* allegrowr.cpp in Sources */,
225 | 				3D9037EC0CC7E32400F46FD5 /* allegrosmfwr.cpp in Sources */,
226 | 				3D9037EF0CC7E33200F46FD5 /* mfmidi.cpp in Sources */,
227 | 				3D9037F30CC7E33D00F46FD5 /* strparse.cpp in Sources */,
228 | 				3D9038890CCA25E100F46FD5 /* hillclimb.cpp in Sources */,
229 | 				3D9038810CC9B54000F46FD5 /* curvefit.cpp in Sources */,
230 | 				3D9038DF0CCA815D00F46FD5 /* sautils.cpp in Sources */,
231 | 				3D848DC60E2C391300EDB5E3 /* audiofilereader.cpp in Sources */,
232 | 				3D848DC90E2C392600EDB5E3 /* audioreader.cpp in Sources */,
233 | 				3D848E540E2CE2B300EDB5E3 /* alignfiles.cpp in Sources */,
234 | 				3D848E7A0E2CEC4200EDB5E3 /* main.cpp in Sources */,
235 | 				3D6E91710E351D8300FE12E2 /* FFT3.cpp in Sources */,
236 | 			);
237 | 			runOnlyForDeploymentPostprocessing = 0;
238 | 		};
239 | /* End PBXSourcesBuildPhase section */
240 | 
241 | /* Begin XCBuildConfiguration section */
242 | 		1DEB923208733DC60010E9CD /* Debug */ = {
243 | 			isa = XCBuildConfiguration;
244 | 			buildSettings = {
245 | 				COPY_PHASE_STRIP = NO;
246 | 				GCC_DYNAMIC_NO_PIC = NO;
247 | 				GCC_ENABLE_FIX_AND_CONTINUE = YES;
248 | 				GCC_MODEL_TUNING = G5;
249 | 				GCC_OPTIMIZATION_LEVEL = 0;
250 | 				INSTALL_PATH = "$(HOME)/bin";
251 | 				OTHER_LDFLAGS = (
252 | 					"-L/usr/local/lib",
253 | 					"-lsndfile",
254 | 				);
255 | 				PRODUCT_NAME = scorealign;
256 | 				USER_HEADER_SEARCH_PATHS = "../portsmf/ /usr/local/include";
257 | 				ZERO_LINK = YES;
258 | 			};
259 | 			name = Debug;
260 | 		};
261 | 		1DEB923308733DC60010E9CD /* Release */ = {
262 | 			isa = XCBuildConfiguration;
263 | 			buildSettings = {
264 | 				ARCHS = (
265 | 					ppc,
266 | 					i386,
267 | 				);
268 | 				GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
269 | 				GCC_MODEL_TUNING = G5;
270 | 				INSTALL_PATH = "$(HOME)/bin";
271 | 				PRODUCT_NAME = scorealign;
272 | 				USER_HEADER_SEARCH_PATHS = ../portsmf/;
273 | 			};
274 | 			name = Release;
275 | 		};
276 | 		1DEB923608733DC60010E9CD /* Debug */ = {
277 | 			isa = XCBuildConfiguration;
278 | 			buildSettings = {
279 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES;
280 | 				GCC_WARN_UNUSED_VARIABLE = YES;
281 | 				PREBINDING = NO;
282 | 				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
283 | 				USER_HEADER_SEARCH_PATHS = ../portsmf/;
284 | 			};
285 | 			name = Debug;
286 | 		};
287 | 		1DEB923708733DC60010E9CD /* Release */ = {
288 | 			isa = XCBuildConfiguration;
289 | 			buildSettings = {
290 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES;
291 | 				GCC_WARN_UNUSED_VARIABLE = YES;
292 | 				PREBINDING = NO;
293 | 				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
294 | 			};
295 | 			name = Release;
296 | 		};
297 | /* End XCBuildConfiguration section */
298 | 
299 | /* Begin XCConfigurationList section */
300 | 		1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "scorealign" */ = {
301 | 			isa = XCConfigurationList;
302 | 			buildConfigurations = (
303 | 				1DEB923208733DC60010E9CD /* Debug */,
304 | 				1DEB923308733DC60010E9CD /* Release */,
305 | 			);
306 | 			defaultConfigurationIsVisible = 0;
307 | 			defaultConfigurationName = Release;
308 | 		};
309 | 		1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "scorealign" */ = {
310 | 			isa = XCConfigurationList;
311 | 			buildConfigurations = (
312 | 				1DEB923608733DC60010E9CD /* Debug */,
313 | 				1DEB923708733DC60010E9CD /* Release */,
314 | 			);
315 | 			defaultConfigurationIsVisible = 0;
316 | 			defaultConfigurationName = Release;
317 | 		};
318 | /* End XCConfigurationList section */
319 | 	};
320 | 	rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
321 | }
322 | 


--------------------------------------------------------------------------------
/scorealign.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <ctype.h>
  5 | #include <math.h>
  6 | #ifndef __MACH__
  7 | #include <malloc.h>
  8 | #endif
  9 | #include <fstream>
 10 | #include "allegro.h"
 11 | #include "audioreader.h"
 12 | #include "scorealign.h"
 13 | #include "gen_chroma.h"
 14 | #include "comp_chroma.h"
 15 | #include "curvefit.h"
 16 | #include "mfmidi.h"
 17 | #include "regression.h"
 18 | #include "sautils.h"
 19 | 
 20 | #if (defined (WIN32) || defined (_WIN32))
 21 | #define	snprintf	_snprintf
 22 | #endif
 23 | 
 24 | #define	LOW_CUTOFF  40
 25 | #define HIGH_CUTOFF 2000
 26 | 
 27 | // Note: There are "verbose" flags passed as parameters that 
 28 | // enable some printing. The SA_VERBOSE compiler flag causes a
 29 | // lot more debugging output, so it could be called VERY_VERBOSE
 30 | // as opposed to the quieter verbose flags.
 31 | 
 32 | #ifdef SA_VERBOSE
 33 | #include "main.h"
 34 | #endif
 35 | 
 36 | // for presmoothing, how near does a point have to be to be "on the line"
 37 | #define NEAR 1.5
 38 | 
 39 | // path is file1_frames by file2_frames array, so first index
 40 | // (rows) is in [0 .. file1_frames]. Array is sequence of rows.
 41 | // columns (j) ranges from [0 .. file2_frames]
 42 | #define PATH(i,j) (path[(i) * file2_frames + (j)])
 43 | 
 44 | /*===========================================================================*/
 45 | 
 46 | #if DEBUG_LOG
 47 | FILE *dbf = NULL;
 48 | #endif
 49 | 
 50 | 
 51 | /*			MAP_TIME  
 52 |     lookup time of file1 in smooth_time_map and interpolate
 53 |     to get time in file2 
 54 | */
 55 | 
 56 | float Scorealign::map_time(float t1)
 57 | {
 58 |     t1 /= actual_frame_period_1; // convert from seconds to frames
 59 |     int i = (int) t1; // round down
 60 |     if (i < 0) i = 0;
 61 |     if (i >= file1_frames - 1) i = file1_frames - 2;
 62 |     // interpolate to get time
 63 |     return actual_frame_period_2 * 
 64 |         interpolate(i, smooth_time_map[i], i+1, smooth_time_map[i+1],
 65 |                     t1);
 66 | }
 67 | 
 68 | 
 69 | /*				FIND_MIDI_DURATION 
 70 |     Finds the duration of a midi song where the end
 71 |     is defined by where the last note off occurs. Duration
 72 |     in seconds is given in DUR, and returns in int the number
 73 |     of notes in the song
 74 | */
 75 | 
 76 | int find_midi_duration(Alg_seq &seq, float *dur) 
 77 | {
 78 |     *dur = 0.0F;
 79 |     int nnotes = 0;
 80 |     int i, j;
 81 |     seq.convert_to_seconds();
 82 |     for (j = 0; j < seq.track_list.length(); j++) {
 83 |         Alg_events &notes = (seq.track_list[j]);
 84 |             
 85 |         for (i = 0; i < notes.length(); i++) {
 86 |             Alg_event_ptr e = notes[i];
 87 |             if (e->is_note()) {
 88 |                 Alg_note_ptr n = (Alg_note_ptr) e;
 89 |                 float note_end = n->time + n->dur;
 90 |                 if (note_end > *dur) *dur = note_end;
 91 |                 nnotes++;
 92 |             }
 93 |         }
 94 |     }
 95 |     return nnotes; 
 96 | }
 97 |     
 98 |     
 99 |     
100 | /* Returns the minimum of three values */
101 | double min3(double x, double y, double z)
102 | {
103 |     return (x < y ?
104 |             (x < z ? x : z) :
105 |             (y < z ? y : z));
106 | }
107 | 
108 | 
109 | void save_frames(char *name, int frames, float **chrom_energy)
110 | {
111 |     FILE *outf = fopen(name, "w");
112 |     int i,j;
113 |     for (j=0; j < frames; j++) {
114 |         float *chrom_energy_frame = chrom_energy[j];
115 |         for (i = 0;  i <= CHROMA_BIN_COUNT; i++) {
116 |             fprintf(outf, "%g ", chrom_energy_frame[i]);
117 |         }
118 |         fprintf(outf, "\n");
119 |     }
120 |     fclose(outf);
121 | }
122 | 
123 | 
124 | /* steps through the dynamic programming path
125 | */
126 | void Scorealign::path_step(int i, int j)
127 | {
128 | #if DEBUG_LOG
129 |     fprintf(dbf, "(%i,%i) ", i, j);
130 | 	if (++path_count % 5 == 0 ||
131 | 		(i == 0 && j == 0)) 
132 | 		fprintf(dbf, "\n");
133 | #endif
134 |     pathx[pathlen] = i; 
135 |     pathy[pathlen] = j;
136 |     pathlen++;
137 | }        
138 | 
139 | 
140 | /* path_reverse -- path is computed from last to first, flip it */
141 | /**/
142 | void Scorealign::path_reverse()
143 | {
144 |     int i = 0;
145 |     int j = pathlen - 1;
146 |     while (i < j) {
147 |         short tempx = pathx[i]; short tempy = pathy[i];
148 |         pathx[i] = pathx[j]; pathy[i] = pathy[j];
149 |         pathx[j] = tempx; pathy[j] = tempy;
150 |         i++; j--;
151 |     }
152 | }
153 |  
154 | /*
155 |   Sees if the chroma energy vector is silent (indicated by the 12th element being one)
156 |   Returns true if it is silent.  False if it is not silent 
157 | */
158 |  bool silent( int i, float *chrom_energy)
159 |  {
160 |      if (AREF2(chrom_energy, i,CHROMA_BIN_COUNT) == 1.0F)
161 |          return true;
162 |      else 
163 |          return false; 
164 |      
165 | }
166 | 
167 | /*
168 | returns the first index in pathy where the element is bigger than sec 
169 | */
170 | int Scorealign::sec_to_pathy_index(float sec) 
171 | {
172 |     for (int i = 0 ; i < (file1_frames + file2_frames); i++) {
173 |         if (smooth_time_map[i] * actual_frame_period_2 >= sec) {
174 |             return i; 
175 |         }
176 |         //printf("%i\n" ,pathy[i]);
177 |     }
178 |     return -1; 
179 | }
180 | 
181 | 
182 | /*	
183 | given a chrom_energy vector, sees how many 
184 | of the inital frames are designated as silent 
185 | */
186 | 
187 | int frames_of_init_silence( float *chrom_energy, int frame_count)
188 | {
189 |     bool silence = true;
190 |     int frames=0; 
191 |     while (silence) {
192 |         if (silent(frames, chrom_energy)) 
193 |             frames++; 
194 |         else
195 |             silence=false; 
196 |     }
197 |     
198 |     return frames; 
199 | }
200 | 
201 | 
202 | /*		COMPARE_CHROMA
203 | Perform Dynamic Programming to find optimal alignment
204 | */
205 | void Scorealign::compare_chroma(bool verbose)
206 | {
207 |     float *path;
208 |     int x = 0;
209 |     int y = 0;
210 |     
211 |     /* Allocate the distance matrix */
212 |     path = (float *) calloc(file1_frames * file2_frames, sizeof(float));
213 |     
214 |     /* Initialize first row and column */
215 | 
216 |     /* allow free skip over initial silence in either signal, but not both */
217 |     /* silence is indicated by a run of zeros along the first row and or 
218 |      * column, starting at the origin (0,0). After computing these runs, we
219 |      * put the proper value at (0,0)
220 |      */
221 |     if (verbose) printf("Performing silent skip DP \n"); 
222 |     PATH(0, 0) = (silent(0, chrom_energy1) ? 0 :
223 |                   gen_dist(0, 0, chrom_energy1, chrom_energy2));
224 |     for (int i = 1; i < file1_frames; i++)
225 |         PATH(i, 0) = (PATH(i-1, 0) == 0 && silent(i, chrom_energy1) ? 0 :
226 |                       gen_dist(i, 0, chrom_energy1, chrom_energy2) + 
227 |                       PATH(i-1, 0));
228 |     PATH(0, 0) = (silent(0, chrom_energy2) ? 0 :
229 |                   gen_dist(0, 0, chrom_energy1, chrom_energy2));
230 |     for (int j = 1; j < file2_frames; j++)
231 |         PATH(0, j) = (PATH(0, j-1) == 0 && silent(j, chrom_energy2) ? 0 :
232 |                       gen_dist(0, j, chrom_energy1, chrom_energy2) + 
233 |                       PATH(0, j-1));
234 |     /* first row and first column are done, put proper value at (0,0) */
235 |     PATH(0, 0) = (!silent(0, chrom_energy1) || !silent(0, chrom_energy2) ?
236 |                   gen_dist(0, 0, chrom_energy1, chrom_energy2) : 0);
237 |     
238 |     /* Perform DP for the rest of the matrix */
239 |     for (int i = 1; i < file1_frames; i++)
240 |         for (int j = 1; j < file2_frames; j++)
241 |             PATH(i, j) = gen_dist(i, j, chrom_energy1, chrom_energy2) +
242 |                 min3(PATH(i-1, j-1), PATH(i-1, j), PATH(i, j-1)); 
243 |     
244 |     if (verbose) printf("Completed Dynamic Programming.\n");
245 |     
246 |     
247 |     x = file1_frames - 1;
248 |     y = file2_frames - 1;
249 | 
250 |     //x and y are the ending points, it can end at either the end of midi, 
251 |     // or end of audio but not both
252 |     pathx = ALLOC(short, (x + y + 2));
253 |     pathy = ALLOC(short, (x + y + 2));
254 | 	
255 |     assert(pathx != NULL);
256 |     assert(pathy != NULL);
257 | 	 
258 |     // map from file1 time to file2 time
259 |     time_map = ALLOC(float, file1_frames);
260 |     smooth_time_map = ALLOC(float, file1_frames);
261 | 	
262 | #if DEBUG_LOG
263 |     fprintf(dbf, "\nOptimal Path: ");
264 | #endif
265 |     while (1) {
266 |         /* Check for stopping */
267 |         if (x ==  0 & y == 0) {
268 |             path_step(0, 0);
269 |             path_reverse();
270 |             break;
271 |         }
272 | 		
273 |         /* Print the current coordinate in the path*/
274 |         path_step(x, y);
275 | 
276 |         /* Check for the optimal path backwards*/
277 |         if (x > 0 && y > 0 && PATH(x-1, y-1) <= PATH(x-1, y) &&
278 |             PATH(x-1, y-1) <= PATH(x, y-1)) {
279 |             x--;
280 |             y--;
281 |         } else if (x > 0 && y > 0 && PATH(x-1, y) <= PATH(x, y-1)) {
282 |             x--;
283 |         } else if (y > 0) {
284 |             y--;
285 |         } else if (x > 0) {
286 |             x--;
287 |         }
288 |     }
289 |     free(path);
290 | }
291 | 
292 | 
293 | 
294 | void Scorealign::linear_regression(int n, int width, float &a, float &b)
295 | {
296 |     int hw = (width - 1) / 2; // a more convenient form: 1/2 width
297 |     // compute average of x = avg of time_map[i]
298 |     float xsum = 0;
299 |     float ysum = 0;
300 |     float xavg, yavg;
301 |     int i;
302 |     for (i = n - hw; i <= n + hw; i++) {
303 |         xsum += i;
304 |         ysum += time_map[i];
305 |     }
306 |     xavg = xsum / width;
307 |     yavg = ysum / width;
308 |     float num = 0;
309 |     float den = 0;
310 |     for (i = n - hw; i <= n + hw; i++) {
311 |         num += (i - xavg) * (time_map[i] - yavg);
312 |         den += (i - xavg) * (i - xavg);
313 |     }
314 |     b = num / den;
315 |     a = yavg - b * xavg;
316 | }
317 | 
318 | 
319 | 
320 | 
321 | 
322 | /*			COMPUTE_SMOOTH_TIME_MAP 
323 | 	 compute regression line and estimate point at i
324 |  
325 | 	 Number of points in regression is smooth (an odd number). First
326 | 	 index to compute is (smooth-1)/2. Use that line for the first
327 | 	 (smooth+1)/2 points. The last index to compute is 
328 | 	 (file1_frames - (smooth+1)/2). Use that line for the last 
329 | 	 (smooth+1)/2 points.
330 | */
331 | void Scorealign::compute_smooth_time_map()
332 | {
333 |     // do the first points:
334 |     float a, b;
335 |     linear_regression((smooth - 1) / 2, smooth, a, b);
336 |     int i;
337 |     for (i = 0; i < (smooth + 1) / 2; i++) {
338 |         smooth_time_map[i] = a + b*i;
339 |     }
340 |     
341 |     // do the middle points:
342 |     for (i = (smooth + 1) / 2; i < file1_frames - (smooth + 1) / 2; i++) {
343 |         linear_regression(i, smooth, a, b);
344 |         smooth_time_map[i] = a + b*i;
345 |         
346 | #if DEBUG_LOG
347 |         fprintf(dbf, "time_map[%d] = %g, smooth_time_map[%d] = %g\n", 
348 |                 i, time_map[i], i, a + b*i);
349 | #endif
350 |         
351 |     }
352 |     
353 |     // do the last points
354 |     linear_regression(file1_frames - (smooth + 1) / 2, smooth, a, b);
355 |     for (i = file1_frames - (smooth + 1) / 2; i < file1_frames; i++) {
356 |         smooth_time_map[i] = a + b*i;
357 |     }
358 |     
359 |     
360 | }
361 |  
362 | 
363 | /* near_line -- see if point is near line */
364 | /**/
365 | bool near_line(float x1, float y1, float x2, float y2, float x, float y)
366 | {
367 |     float exact_y;
368 |     if (x1 == x) {
369 |         exact_y = y1;
370 |     } else {
371 |         assert(x1 != x2);
372 |         exact_y = y1 + (y2 - y1) * ((x - x1) / (x2 - x1));
373 |     }
374 |     y = y - exact_y;
375 |     return y < NEAR && y > -NEAR;
376 | }
377 | 
378 | 
379 | // path_copy -- copy a path for debugging
380 | short *path_copy(short *path, int len)
381 | {
382 |     short *new_path = ALLOC(short, len);
383 |     memcpy(new_path, path, len * sizeof(path[0]));
384 |     return new_path;
385 | }
386 | 
387 | 
388 | /* presmooth -- try to remove typical dynamic programming errors
389 |  * 
390 |  * A common problem is that the best path wanders off track a ways
391 |  * and then comes back. The idea of presmoothing is to see if the
392 |  * path is mostly a straight line. If so, adjust the points off of
393 |  * the line to fall along the line. The variable presmooth_time is
394 |  * the duration of the line. It is drawn between every pair of 
395 |  * points presmooth_time apart. If 25% of the first half of the line
396 |  * falls within one frame of the path, and 25% of the second half of
397 |  * the line falls within one frame of the path, then find the best
398 |  * fit of the line to the points within 1 frame. Then adjust the middle
399 |  * part of the line (from 25% to 75%) to fall along the line.
400 |  * Note that all this curve fitting is done on integer coordinates.
401 |  */
402 | void Scorealign::presmooth()
403 | {
404 |     int n = ROUND(presmooth_time / actual_frame_period_2);
405 |     n = (n + 3) & ~3; // round up to multiple of 4
406 |     int i = 0;
407 |     while (pathx[i] + n < file2_frames) {
408 |         /* line goes from i to i+n-1 */
409 |         int x1 = pathx[i];
410 |         int xmid = x1 + n/2;
411 |         int x2 = x1 + n;
412 |         int y1 = pathy[i];
413 |         int y2;
414 |         int j;
415 |         /* search for y2 = pathy[j] s.t. pathx[j] == x2 */
416 |         for (j = i + n; j < pathlen; j++) {
417 |             if (pathx[j] == x2) {
418 |                 y2 = pathy[j];
419 |                 break;
420 |             }
421 |         }
422 |         Regression regr;
423 |         /* see if line fits the data */
424 |         int k = i;
425 |         int count = 0;
426 |         while (pathx[k] < xmid) { // search first half
427 |             if (near_line(x1, y1, x2, y2, pathx[k], pathy[k])) {
428 |                 count++;
429 |                 regr.point(pathx[k], pathy[k]);
430 |             }
431 |             k++;
432 |         }
433 |         /* see if points were close to line */
434 |         if (count < n/4) {
435 |             i++;
436 |             continue;
437 |         }
438 |         /* see if line fits top half of the data */
439 |         while (pathx[k] < x2) {
440 |             if (near_line(x1, y1, x2, y2, pathx[k], pathy[k])) {
441 |                 count++;
442 |                 regr.point(pathx[k], pathy[k]);
443 |             }
444 |             k++;
445 |         }
446 |         /* see if points were close to line */
447 |         if (count < n/4) {
448 |             i++;
449 |             continue;
450 |         }
451 |         /* debug: */
452 |         SA_V(printf("presmoothing path from %d to %d:\n", i, j);)
453 |         SA_V(print_path_range(pathx, pathy, i, j);)
454 |         /* fit line to nearby points */
455 |         regr.regress();
456 |         /* adjust points to fall along line */
457 |         // basically reconstruct pathx and pathy from i to j
458 |         short x = pathx[i];
459 |         short y = pathy[i];
460 |         k = i + 1;
461 |         SA_V(printf("start loop: j %d, pathx %d, pathy %d\n",
462 |                  j, pathx[j], pathy[j]);)
463 |         while (x < pathx[j] || y < pathy[j]) {
464 |             SA_V(printf("top of loop: x %d, y %d\n", x, y);)
465 |             // iteratively make an optional move in the +y direction
466 |             // then make a move in the x direction
467 |             // check y direction: want to move to y+1 if either we are below
468 |             // the desired y coordinate or we are below the maximum slope
469 |             // line (if y is too low, we'll have to go at sharper than 2:1
470 |             // slope to get to pathx[j], pathy[j], which is bad
471 |             int target_y = ROUND(regr.f(x));
472 |             SA_V(printf("target_y@%d %d, r %g, ", x, target_y, regr.f(x));)
473 |             // but what if the line goes way below the last point?
474 |             // we don't want to go below a diagonal through the last point
475 |             int dist_to_last_point = pathx[j] - x;
476 |             int minimum_y = pathy[j] - 2 * dist_to_last_point;
477 |             if (target_y < minimum_y) {
478 |                 target_y = minimum_y;
479 |                 SA_V(printf("minimum_y %d, ", minimum_y);)
480 |             }
481 |             // alternatively, if line goes too high:
482 |             int maximum_y = pathy[j] - dist_to_last_point / 2;
483 |             if (target_y > maximum_y) {
484 |                 target_y = maximum_y;
485 |                 SA_V(printf("maximum y %d, ", maximum_y);)
486 |             }
487 |             // now advance to target_y
488 |             if (target_y > y) {
489 |                 pathx[k] = x;
490 |                 pathy[k] = y + 1;
491 |                 SA_V(printf("up: pathx[%d] %d, pathy[%d] %d\n", 
492 |                          k, pathx[k], k, pathy[k]);)
493 |                 k++;
494 |                 y++;
495 |             }
496 |             if (x < pathx[j]) {
497 |                 // now advance x
498 |                 x++;
499 |                 // y can either go horizontal or diagonal, i.e. y either
500 |                 // stays the same or increments by one
501 |                 target_y = ROUND(regr.f(x));
502 |                 SA_V(printf("target_y@%d %d, r %g, ", x, target_y, regr.f(x));)
503 |                 if (target_y > y) y++;
504 |                 pathx[k] = x;
505 |                 pathy[k] = y;
506 |                 SA_V(printf("pathx[%d] %d, pathy[%d] %d\n", 
507 |                          k, pathx[k], k, pathy[k]);)
508 |                 k++;
509 |             }
510 |         }
511 |         // make sure new path is no longer than original path
512 |         // the last point we wrote was k - 1
513 |         k = k - 1; // the last point we wrote is now k
514 |         // DEBUG
515 |         if (k > j) {
516 |             printf("oops: k %d, j %d\n", k, j);
517 |             SA_V(print_path_range(pathx, pathy, i, k);)
518 |         }
519 |         assert(k <= j);
520 |         // if new path is shorter than original, then fix up path
521 |         if (k < j) {
522 |             memmove(&pathx[k], &pathx[j], sizeof(pathx[0]) * (pathlen - j));
523 |             memmove(&pathy[k], &pathy[j], sizeof(pathy[0]) * (pathlen - j));
524 |             pathlen -= (j - k);
525 |         }
526 |         /* debug */
527 |         SA_V(printf("after presmoothing:\n");)
528 |         SA_V(print_path_range(pathx, pathy, i, k);)
529 |         /* since we adjusted the path, skip by 3/4 of n */
530 |         i = i + 3 * n/4;
531 |     }
532 | }
533 | 
534 | 
535 | /*				COMPUTE_REGRESSION_LINES
536 | 	computes the smooth time map from the path computed
537 | 	by dynamic programming
538 | 
539 | */
540 | void Scorealign::compute_regression_lines()
541 | {
542 |     // first, compute the y value of the path at
543 |     // each x value. If the path has multiple values
544 |     // on x, take the average.
545 |     int p = 0;
546 |     int i;
547 |     int upper, lower;
548 |     for (i = 0; i < file1_frames; i++) {
549 |         lower = pathy[p];
550 |         while (p < pathlen && pathx[p] == i) {
551 |             upper = pathy[p];
552 |             p = p + 1;
553 |         }
554 |         time_map[i] = (lower + upper) * 0.5;
555 |     }
556 |     // now fit a line to the nearest WINDOW points and record the 
557 |     // line's y value for each x.
558 |     compute_smooth_time_map();
559 | }
560 | 
561 | 
562 | void Scorealign::midi_tempo_align(Alg_seq &seq, bool verbose)
563 | {
564 |     // We create a new time map out of the alignment, and replace
565 |     // the original time map in the Alg_seq sequence
566 |     Alg_seq new_time_map_seq;
567 | 
568 |     /** align at all integer beats **/
569 |     int totalbeats; 
570 |     float dur_in_sec; 
571 |     find_midi_duration(seq, &dur_in_sec); 
572 |     // totalbeat = lastbeat + 1 and round up the beat
573 |     totalbeats = (int) (seq.get_time_map()->time_to_beat(dur_in_sec) + 2);
574 |     if (verbose)
575 |         printf("midi duration = %f, totalbeats=%i \n", dur_in_sec, totalbeats);   
576 |     
577 |     for (int i = 0; i < totalbeats; i++) {
578 |         double newtime = map_time(seq.get_time_map()->beat_to_time(i));
579 |         if (newtime > 0) 
580 |             new_time_map_seq.insert_beat(newtime, (double) i);
581 |     }
582 |     seq.convert_to_beats();
583 |     seq.set_time_map(new_time_map_seq.get_time_map());
584 | }
585 | 
586 | 
587 | // this routine performs an alignment by adjusting midi to match audio
588 | //
589 | void Scorealign::align_midi_to_audio(Alg_seq &seq, Audio_reader &reader, 
590 |                                     bool verbose)
591 | {
592 |     /* Generate the chroma for file 1 
593 |      * This will always be the MIDI File when aligning midi with audio.
594 |      */
595 |     file1_frames = gen_chroma_midi(seq, HIGH_CUTOFF, LOW_CUTOFF, 
596 |             &chrom_energy1, &actual_frame_period_1, 1, verbose);
597 | 
598 |     /* Generate the chroma for file 2 */
599 |     file2_frames = gen_chroma_audio(reader, HIGH_CUTOFF, LOW_CUTOFF, 
600 |             &chrom_energy2, &actual_frame_period_2, 2, verbose);
601 | 
602 |     align_chromagrams(verbose);
603 | }
604 | 
605 | void Scorealign::align_audio_to_audio(Audio_reader &reader1, 
606 |                                      Audio_reader &reader2, bool verbose)
607 | {
608 |     file1_frames = gen_chroma_audio(reader1, HIGH_CUTOFF, LOW_CUTOFF, 
609 |                     &chrom_energy1, &actual_frame_period_1, 1, verbose);
610 |     file2_frames = gen_chroma_audio(reader2, HIGH_CUTOFF, LOW_CUTOFF, 
611 |                     &chrom_energy2, &actual_frame_period_2, 2, verbose);
612 |     align_chromagrams(verbose);
613 | }
614 | 
615 | 
616 | void Scorealign::align_midi_to_midi(Alg_seq &seq1, Alg_seq &seq2, 
617 |                                    bool verbose)
618 | {
619 |     file1_frames = gen_chroma_midi(seq1, HIGH_CUTOFF, LOW_CUTOFF, 
620 |             &chrom_energy1, &actual_frame_period_1, 1, verbose);
621 | 
622 |     file2_frames = gen_chroma_midi(seq2, HIGH_CUTOFF, LOW_CUTOFF, 
623 |             &chrom_energy2, &actual_frame_period_2, 2, verbose);
624 | 
625 |     align_chromagrams(verbose);
626 | }
627 | 
628 | void Scorealign::align_chromagrams(bool verbose)
629 | {
630 |     if (verbose)
631 |         printf("\nGenerated Chroma.\n");
632 |     /* now that we have actual_frame_period_2, we can compute smooth */
633 |     // smooth is an odd number of frames that spans about smooth_time
634 |     smooth = ROUND(smooth_time / actual_frame_period_2);
635 |     if (smooth < 3) smooth = 3;
636 |     if (!(smooth & 1)) smooth++; // must be odd
637 |     if (verbose) {
638 |         printf("smoothing time is %g\n", smooth_time);
639 |         printf("smooth count is %d\n", smooth);
640 |     }
641 |     /* Normalize the chroma frames */
642 |     norm_chroma(file1_frames, chrom_energy1);
643 |     SA_V(printf("Chromagram data for file 1:\n");)
644 |     SA_V(print_chroma_table(chrom_energy1, file1_frames);)
645 |     norm_chroma(file2_frames, chrom_energy2);
646 |     SA_V(printf("Chromagram data for file 2:\n");)
647 |     SA_V(print_chroma_table(chrom_energy2, file2_frames);)
648 |     if (verbose)
649 |         printf("Normalized Chroma.\n");
650 | 
651 |     /* Compare the chroma frames */
652 |     compare_chroma(verbose);
653 |     /* Compute the smooth time map now for use by curve-fitting */	
654 |     compute_regression_lines();
655 |     /* if line_time is set, do curve-fitting */
656 |     if (line_time > 0.0) {
657 |         curve_fitting(this, verbose);
658 |         /* Redo the smooth time map after curve fitting or smoothing */	
659 |         compute_regression_lines();
660 |     }
661 |     /* if presmooth_time is set, do presmoothing */
662 |     if (presmooth_time > 0.0) {
663 |         presmooth();
664 |         /* Redo the smooth time map after curve fitting or smoothing */	
665 |         compute_regression_lines();
666 |     }
667 | }
668 | 


--------------------------------------------------------------------------------