├── trace.h ├── test ├── cde.gro ├── cde.mid ├── cde-11khz.wav └── save.sh ├── changelog.txt ├── gen_chroma.h ├── sautils.cpp ├── sautils.h ├── audiofilereader.h ├── main.h ├── trace.cpp ├── comp_chroma.h ├── alignfiles.h ├── regression.cpp ├── score-align.sln ├── audioreader.h ├── regression.h ├── compare_transcripts ├── compare_transcripts.sln └── compare.cpp ├── hillclimb.h ├── license.txt ├── curvefit.h ├── comp_chroma.cpp ├── audiofilereader.cpp ├── Makefile.osx ├── Makefile.linux ├── alignfiles.cpp ├── audioreader.cpp ├── fft3 ├── FFT3.h └── FFT3.cpp ├── scorealign.h ├── compare_transcripts.vcproj ├── hillclimb.cpp ├── README.txt ├── curvefit.cpp ├── score-align.vcproj ├── main.cpp ├── gen_chroma.cpp ├── scorealign.xcodeproj └── project.pbxproj └── scorealign.cpp /trace.h: -------------------------------------------------------------------------------- 1 | void trace(char *format, ...); 2 | 3 | -------------------------------------------------------------------------------- /test/cde.gro: -------------------------------------------------------------------------------- 1 | T0 -tempor:120.0 2 | c4 q 3 | d4 4 | e4 5 | -------------------------------------------------------------------------------- /test/cde.mid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cataska/scorealign/HEAD/test/cde.mid -------------------------------------------------------------------------------- /test/cde-11khz.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cataska/scorealign/HEAD/test/cde-11khz.wav -------------------------------------------------------------------------------- /changelog.txt: -------------------------------------------------------------------------------- 1 | Change Log for scorealign 2 | 3 | 03-Jul-2008 RBD modified to use latest changes to portsmf 4 | added this file and license.txt 5 | 6 | -------------------------------------------------------------------------------- /gen_chroma.h: -------------------------------------------------------------------------------- 1 | #define CHROMA_BIN_COUNT 12 2 | 3 | bool is_midi_file(char *filename); 4 | 5 | #define AREF2(chrom_energy, row, column) \ 6 | (chrom_energy[row * (CHROMA_BIN_COUNT + 1) + column]) 7 | -------------------------------------------------------------------------------- /test/save.sh: -------------------------------------------------------------------------------- 1 | mv path.data $1.path.data 2 | mv smooth.data $1.smooth.data 3 | mv beatmap.txt $1.beatmap.txt 4 | mv debug-log.txt $1.debug-log.txt 5 | mv midi.mid $1.midi.mid 6 | mv transcription.txt $1.transcription.txt 7 | echo "moved outputs to backup files prefixed with $1." 8 | 9 | -------------------------------------------------------------------------------- /sautils.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * sautils.cpp 3 | * scorealign 4 | * 5 | * Created by Roger Dannenberg on 10/20/07. 6 | * Copyright 2007 __MyCompanyName__. All rights reserved. 7 | * 8 | */ 9 | 10 | #include "sautils.h" 11 | 12 | double interpolate(double x1, double y1, double x2, double y2, double x) 13 | { 14 | return y1 + (y2 - y1) * (x - x1) / (x2 - x1); 15 | } 16 | 17 | 18 | -------------------------------------------------------------------------------- /sautils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * sautils.h 3 | * scorealign 4 | * 5 | * Created by Roger Dannenberg on 10/20/07. 6 | * Copyright 2007 by Roger B. Dannenberg. All rights reserved. 7 | * 8 | */ 9 | 10 | #define ALLOC(t, n) (t *) malloc(sizeof(t) * (n)) 11 | 12 | #define ROUND(x) ((int) (0.5 + (x))) 13 | 14 | double interpolate(double x1, double y1, double x2, double y2, double x); 15 | 16 | 17 | -------------------------------------------------------------------------------- /audiofilereader.h: -------------------------------------------------------------------------------- 1 | #define MAX_NAME_LEN 255 2 | 3 | class Audio_file_reader : public Audio_reader { 4 | public: 5 | virtual long read(float *data, long n); 6 | SNDFILE *sf; 7 | SF_INFO sf_info; 8 | char name[MAX_NAME_LEN + 1]; 9 | int bytes_per_frame; 10 | long total_frames; 11 | bool open(char *filename, Scorealign &sa, bool verbose); 12 | void close(); 13 | double get_sample_rate(); 14 | long get_frames(); 15 | void print_info(); 16 | }; 17 | 18 | -------------------------------------------------------------------------------- /main.h: -------------------------------------------------------------------------------- 1 | /* main.h -- declarations of some command-line functions 2 | * 3 | * If VERBOSE is on in some files, some print functions are called. 4 | * Since these are only appropriate for the command-line interface, 5 | * there are some print functions declared in main.cpp. main.h 6 | * declares these functions for use in scorealign.cpp (and maybe others) 7 | * 8 | * 14-Jul-08 RBD 9 | */ 10 | 11 | void print_path_range(short *pathx, short *pathy, int i, int j); 12 | void print_chroma_table(float *chrom_energy, int frames); 13 | -------------------------------------------------------------------------------- /trace.cpp: -------------------------------------------------------------------------------- 1 | #include "stdarg.h" 2 | #include "stdio.h" 3 | 4 | #ifdef __linux__ 5 | #define _vsnprintf vsnprintf 6 | #elif defined(__MACH__) 7 | #define _vsnprintf vsnprintf 8 | #else 9 | #include "crtdbg.h" 10 | #endif 11 | 12 | void trace(char *format, ...) 13 | { 14 | char msg[256]; 15 | va_list args; 16 | va_start(args, format); 17 | _vsnprintf(msg, 256, format, args); 18 | va_end(args); 19 | 20 | #if defined(_DEBUG) && !defined(__linux__) 21 | _CrtDbgReport(_CRT_WARN, NULL, NULL, NULL, msg); 22 | #else 23 | printf(msg); 24 | #endif 25 | } 26 | -------------------------------------------------------------------------------- /comp_chroma.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define SILENCE_THRESHOLD 0.001 8 | #define SILENCE_DISTANCE 16.0 9 | 10 | /* NORM_CHROMA 11 | * 12 | * This function normalizes the chroma for each frame of the 13 | * chrom_energy to mean 0 and std. dev. 1. 14 | */ 15 | void norm_chroma( int len, float *chrom_energy ); 16 | 17 | /* GEN_DIST 18 | * 19 | * This function generates the Euclidean distance for points i 20 | * and j in two chroma vectors for use with dynamic time warping of 21 | * the chroma vectors. 22 | */ 23 | float gen_dist(int i, int j, float *chrom_energy1, 24 | float *chrom_energy2 ); 25 | -------------------------------------------------------------------------------- /alignfiles.h: -------------------------------------------------------------------------------- 1 | /* alignfiles.h -- perform alignment given filenames 2 | * 3 | * this module is an intermediate between the command-line interface 4 | * main.cpp and the alignment code in scorealign.cpp. The scorealign.cpp 5 | * module is supposed to work on data from any source, e.g. it could be 6 | * a file, or it could be an object that sucks samples out of an 7 | * Audacity wave track. This module is supposed to not assume a command 8 | * line, type-script based interface, but *does* assume that you want 9 | * to read data from files, so you pass filenames into this module and 10 | * it reads the files and calls scorealign.cpp to do the alignment work. 11 | * 12 | * 14-Jul-08 RBD 13 | */ 14 | 15 | bool align_files(char *infilename1, char *infilename2, 16 | Scorealign &sa, bool verbose); 17 | 18 | bool is_midi_file(char *filename); 19 | -------------------------------------------------------------------------------- /regression.cpp: -------------------------------------------------------------------------------- 1 | // Regression is a class to compute a linear regression 2 | // 3 | #include "regression.h" 4 | 5 | 6 | Regression::Regression() 7 | { 8 | init(); 9 | } 10 | 11 | void Regression::init() 12 | { 13 | sumxx = 0; 14 | sumyy = 0; 15 | sumxy = 0; 16 | sumx = 0; 17 | sumy = 0; 18 | n = 0; 19 | } 20 | 21 | void Regression::point(float x, float y) 22 | { 23 | sumx = sumx + x; 24 | sumy = sumy + y; 25 | sumxx = sumxx + x * x; 26 | sumyy = sumyy + y * y; 27 | sumxy = sumxy + x * y; 28 | n = n + 1; 29 | } 30 | 31 | void Regression::regress() 32 | { 33 | float sxx = sumxx - sumx * sumx / n; 34 | float sxy = sumxy - sumx * sumy / n; 35 | b = sxy / sxx; 36 | a = (sumy - b * sumx) / n; 37 | } 38 | 39 | 40 | float Regression::f(float x) 41 | { 42 | return a + b * x; 43 | } 44 | 45 | 46 | float Regression::f_inv(float y) 47 | { 48 | return (y - a) / b; 49 | } 50 | -------------------------------------------------------------------------------- /score-align.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 9.00 3 | # Visual C++ Express 2005 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "score-align", "score-align.vcproj", "{5F04DE5D-0A34-496E-8A34-BE30BA9C70A1}" 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 8 | Debug|Win32 = Debug|Win32 9 | Release|Win32 = Release|Win32 10 | EndGlobalSection 11 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 12 | {5F04DE5D-0A34-496E-8A34-BE30BA9C70A1}.Debug|Win32.ActiveCfg = Debug|Win32 13 | {5F04DE5D-0A34-496E-8A34-BE30BA9C70A1}.Debug|Win32.Build.0 = Debug|Win32 14 | {5F04DE5D-0A34-496E-8A34-BE30BA9C70A1}.Release|Win32.ActiveCfg = Release|Win32 15 | {5F04DE5D-0A34-496E-8A34-BE30BA9C70A1}.Release|Win32.Build.0 = Release|Win32 16 | EndGlobalSection 17 | GlobalSection(SolutionProperties) = preSolution 18 | HideSolutionNode = FALSE 19 | EndGlobalSection 20 | EndGlobal 21 | -------------------------------------------------------------------------------- /audioreader.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | class Scorealign; 5 | 6 | class Audio_reader { 7 | public: 8 | long samples_per_frame; 9 | long hop_samples; 10 | double actual_frame_period; 11 | long frame_count; // number of chroma vectors (analysis windows) 12 | virtual void print_info() = 0; 13 | long read_window(float *data); 14 | virtual long read(float *data, long n) = 0; 15 | virtual double get_sample_rate() = 0; 16 | virtual long get_frames() = 0; // returns frames of input audio 17 | // i.e. (samples/channels) 18 | void calculate_parameters(Scorealign &sa, bool verbose); 19 | Audio_reader() { 20 | reading_first_window = true; 21 | reading_last_window = false; 22 | temp_data = NULL; 23 | } 24 | ~Audio_reader() { 25 | if (temp_data) free(temp_data); 26 | } 27 | protected: 28 | bool reading_first_window; 29 | bool reading_last_window; 30 | float *temp_data; 31 | }; 32 | 33 | -------------------------------------------------------------------------------- /regression.h: -------------------------------------------------------------------------------- 1 | // Regression is a class to compute a linear regression 2 | // 3 | // call point(x, y) to add a data point 4 | // call regress() to compute the regression 5 | // call f(x) to evaluate the linear regression at x 6 | // call f_inv(y) to evaluate the inverse linear regression at y 7 | // to restart with new points, call init() 8 | // regress() can be called after each point() 9 | // 10 | // other forms of regression should be added. This one does 11 | // standard least squares regression 12 | 13 | class Regression { 14 | float sumxx; // sum of x^2 15 | float sumyy; // sum of y^2 16 | float sumxy; // sum of xy 17 | float sumx; // sum of x 18 | float sumy; // sum of y 19 | int n; // number of points 20 | float a, b; // regression line is a + b*x 21 | public: 22 | Regression(); 23 | void init(); 24 | void point(float x, float y); 25 | void regress(); 26 | float f(float x); 27 | float f_inv(float y); 28 | }; 29 | 30 | -------------------------------------------------------------------------------- /compare_transcripts/compare_transcripts.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 9.00 3 | # Visual Studio 2005 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "compare_transcripts", "compare_transcripts.vcproj", "{C7876B12-4C66-4C26-9318-3363CD9A3FE9}" 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 8 | Debug|Win32 = Debug|Win32 9 | Release|Win32 = Release|Win32 10 | EndGlobalSection 11 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 12 | {C7876B12-4C66-4C26-9318-3363CD9A3FE9}.Debug|Win32.ActiveCfg = Debug|Win32 13 | {C7876B12-4C66-4C26-9318-3363CD9A3FE9}.Debug|Win32.Build.0 = Debug|Win32 14 | {C7876B12-4C66-4C26-9318-3363CD9A3FE9}.Release|Win32.ActiveCfg = Release|Win32 15 | {C7876B12-4C66-4C26-9318-3363CD9A3FE9}.Release|Win32.Build.0 = Release|Win32 16 | EndGlobalSection 17 | GlobalSection(SolutionProperties) = preSolution 18 | HideSolutionNode = FALSE 19 | EndGlobalSection 20 | EndGlobal 21 | -------------------------------------------------------------------------------- /hillclimb.h: -------------------------------------------------------------------------------- 1 | /* 2 | * hillclimb.h 3 | * scorealign 4 | * 5 | * Created by Roger Dannenberg on 10/20/07. 6 | * Copyright 2007 by Roger B. Dannenberg. All rights reserved. 7 | * 8 | * USAGE: 9 | * Subclass and define evaluate, a function of n parameters. 10 | * Set parameters, step_size, min, max, and n. Parameters must 11 | * be near a local maximum and step_size defines the grid that 12 | * is searched. Search will not go outside of min and max. 13 | * Call optimize() 14 | * retrieve optimized parameter values with get_parameters. 15 | * 16 | */ 17 | 18 | class Hillclimb { 19 | protected: 20 | double *parameters; // parameters to optimize 21 | double *step_size; // step size for each parameter (these are 22 | // provided by the user and remain fixed) 23 | double *min_param; // minimum parameter values 24 | double *max_param; // maximum parameter values 25 | int n; // number of parameters 26 | public: 27 | void set_parameters(double *parameters_, double *step_size_, 28 | double *min_, double *max_, int n_); 29 | // retrieve parameters after optimization: 30 | double *get_parameters() { return parameters; } 31 | virtual double evaluate() = 0; 32 | double optimize(); 33 | }; 34 | 35 | 36 | -------------------------------------------------------------------------------- /compare_transcripts/compare.cpp: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "stdlib.h" 3 | #include "string.h" 4 | #include "math.h" 5 | 6 | bool read_stuff(FILE *f, int &p, float &start, float &dur) 7 | { 8 | int n, chan, vel; 9 | int c; 10 | while ((c = getc(f)) == '#') { 11 | while ((c = getc(f)) != '\n' && c != EOF) ; 12 | } 13 | ungetc(c, f); 14 | int fields = fscanf(f, "%d %d %d %d %f %f", &n, &chan, &p, &vel, &start, &dur); 15 | if (fields == EOF) { 16 | return false; 17 | } else if (fields != 6) { 18 | printf("Error scanning file\n"); 19 | exit(1); 20 | } 21 | while ((c = getc(f)) != '\n' && c != EOF) ; 22 | return true; 23 | } 24 | 25 | 26 | void print_usage(char *progname) 27 | { 28 | printf("%s file1 file2\n", progname); 29 | } 30 | 31 | 32 | int main(int argc, char *argv[]) 33 | { 34 | char *file1; 35 | char *file2; 36 | char *progname = strrchr(argv[0], '/'); 37 | progname = progname ? progname + 1 : argv[0]; 38 | if (argc < 3) { 39 | print_usage(progname); 40 | return 1; 41 | } 42 | file1 = argv[1]; 43 | file2 = argv[2]; 44 | FILE *f1 = fopen(file1, "r"); 45 | FILE *f2 = fopen(file2, "r"); 46 | int count = 0; 47 | float sum = 0.0; 48 | float sumsqr = 0.0; 49 | while (true) { 50 | int p1, p2; 51 | float start1, start2; 52 | float dur1, dur2; 53 | bool ok1 = read_stuff(f1, p1, start1, dur1); 54 | bool ok2 = read_stuff(f2, p2, start2, dur2); 55 | if (ok1 != ok2 || p1 != p2) { 56 | printf("Transcripts are not compatible\n"); 57 | exit(1); 58 | } 59 | if (!ok1) break; 60 | count++; 61 | float diff = start2 - start1; 62 | sum += diff; 63 | sumsqr += diff * diff; 64 | } 65 | float avg = sum / count; 66 | float stddev = sqrt((sumsqr / count) - (avg * avg)); 67 | printf("average error = %g\nstandard deviation = %g\n", avg, stddev); 68 | 69 | return 0; 70 | } -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * scorealign: audio/score alignment program 3 | * 4 | * license.txt -- a copy of the scorealign copyright notice and license 5 | * information 6 | * 7 | * Latest version available at: http://sourceforge.net/projects/portmedia 8 | * 9 | * Copyright (c) 1999-2000 Ross Bencina and Phil Burk 10 | * Copyright (c) 2001-2006 Roger B. Dannenberg 11 | * 12 | * Permission is hereby granted, free of charge, to any person obtaining 13 | * a copy of this software and associated documentation files 14 | * (the "Software"), to deal in the Software without restriction, 15 | * including without limitation the rights to use, copy, modify, merge, 16 | * publish, distribute, sublicense, and/or sell copies of the Software, 17 | * and to permit persons to whom the Software is furnished to do so, 18 | * subject to the following conditions: 19 | * 20 | * The above copyright notice and this permission notice shall be 21 | * included in all copies or substantial portions of the Software. 22 | * 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 26 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR 27 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 28 | * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 29 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 30 | */ 31 | 32 | /* 33 | * The text above constitutes the entire Portsmf license; however, 34 | * the PortMusic community also makes the following non-binding requests: 35 | * 36 | * Any person wishing to distribute modifications to the Software is 37 | * requested to send the modifications to the original developer so that 38 | * they can be incorporated into the canonical version. It is also 39 | * requested that these non-binding requests be included along with the 40 | * license above. 41 | */ 42 | -------------------------------------------------------------------------------- /curvefit.h: -------------------------------------------------------------------------------- 1 | /* 2 | * curvefit.h 3 | * scorealign 4 | * 5 | * Created by Roger B. Dannenberg on 10/20/07. 6 | * Copyright 2007 by Roger B. Dannenberg. All rights reserved. 7 | * 8 | * Dynamic programming does a good job of getting a rough alignment 9 | * that is very good in a global sense, but there are often short-term 10 | * "digressions" where the optimal path wanders off the "true" path. 11 | * These digressions are hard to correct with simple smoothing. This 12 | * module is intended to assert a "steady tempo" constraint to improve 13 | * the path. It starts with the dynamic programming path, which is likely 14 | * to be close to the correct path. The DP path (in pathx[] and pathy[]) 15 | * is divided evenly into segments of approximately line_time seconds 16 | * along the x axis. For a segment from x1 to x2, linear regression is 17 | * performed on the DP path from x1 to x2. This specifies an initial 18 | * line segment. Next, the end-points are joined by averaging: if 19 | * the segment from x1 to x2 ends at y-end and the segment from x2 to x3 20 | * starts at y-start, then the end of line x1--x2 and the beginning of 21 | * line x2--x3 are adjusted to (y-end + y-start)/2. Now the fun starts: 22 | * the endpoints of all the lines are adjusted up and down in order to 23 | * minimize a distance function. The distance function estimates the 24 | * integral of the distance matrix value along the line. Since the line 25 | * falls between discrete points in the matrix, interpolation is used. 26 | * The end result is converted back into a discrete path. (Maybe in the 27 | * future, the pathx[]/pathy[] representation should be generalized to 28 | * allow for non-integer path coordinates.) The resulting path will 29 | * have steady tempo at least within each segment. What I hope will 30 | * happen is that when there are chord changes or melody changes, there 31 | * will be "narrow" pathways in the distance matrix. Getting the 32 | * alignment wrong at these transitions will cost a lot. Other places 33 | * are not so critical, which is why I think DP wanders off the true 34 | * path. The straight-line path will ensure that for the most part, the 35 | * score alignment is determined by the transitions, and where alignment 36 | * is not critical, the alignment will avoid any rubato or over-fitting. 37 | */ 38 | 39 | void curve_fitting(Scorealign *sa, bool verbose); 40 | 41 | -------------------------------------------------------------------------------- /comp_chroma.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include "allegro.h" 4 | #include "audioreader.h" 5 | #include "gen_chroma.h" 6 | #include "comp_chroma.h" 7 | 8 | using namespace std; 9 | 10 | /* NORM_CHROMA 11 | * 12 | * This function normalizes the chroma for each frame of the 13 | * chrom_energy to mean 0 and std. dev. 1. But if this is a 14 | * "silent frame", set the 13th element to 1. 15 | */ 16 | void norm_chroma( int len, float *chrom_energy ) { 17 | 18 | float avg = 0; 19 | float dev = 0; 20 | float sum = 0; 21 | 22 | for( int i = 0; i < len; i++ ) { 23 | 24 | /* Calculate avg for this frame */ 25 | sum = 0; 26 | for ( int j = 0; j < 12; j++ ) 27 | sum += AREF2(chrom_energy, i, j); 28 | avg = sum / 12.0; 29 | 30 | /* Silence detection: */ 31 | float silence = 0.0F; 32 | if (avg < SILENCE_THRESHOLD) { /* assume silent */ 33 | silence = 1.0F; 34 | } 35 | AREF2(chrom_energy, i, 12) = silence; 36 | 37 | // printf("avg at %g: %g\n", i * 0.25, avg); 38 | 39 | /* Normalize this frame to avg. 0 */ 40 | for ( int j = 0; j < 12; j++ ) 41 | AREF2(chrom_energy, i, j) -= avg; 42 | 43 | /* Calculate std. dev. for this frame */ 44 | sum = 0; 45 | for ( int j = 0; j < 12; j++ ) { 46 | float x = AREF2(chrom_energy, i, j); 47 | sum += x * x; 48 | } 49 | dev = sqrt( sum / 12.0 ); 50 | if (dev == 0.0) dev = 1.0F; /* don't divide by zero */ 51 | 52 | /* Normalize this frame to std. dev. 1*/ 53 | for ( int j = 0; j < 12; j++ ) 54 | AREF2(chrom_energy, i, j) /= dev; 55 | } 56 | } 57 | 58 | /* Returns the minimum of two values */ 59 | double min2( double x, double y ) { 60 | return (x < y ? x : y); 61 | } 62 | 63 | /* GEN_DIST 64 | * 65 | * This function generates the Euclidean distance for points i 66 | * and j in two chroma vectors for use with dynamic time warping of 67 | * the chroma vectors. 68 | */ 69 | float gen_dist( int i, int j, float *chrom_energy1, 70 | float *chrom_energy2 ) { 71 | 72 | float sum = 0; 73 | float MAX = 12.0; 74 | 75 | if (AREF2(chrom_energy1, i, CHROMA_BIN_COUNT) != 76 | AREF2(chrom_energy2, j, CHROMA_BIN_COUNT)) { 77 | //printf("gd%g ", SILENCE_DISTANCE); // print result 78 | return SILENCE_DISTANCE; 79 | } 80 | /* Determine the distance between these vectors 81 | chroma1[i] and chroma2[j] to return */ 82 | for (int k = 0; k < 12; k++) { 83 | float x = AREF2(chrom_energy1, i, k); 84 | float y = AREF2(chrom_energy2, j, k); 85 | float diff = x - y; 86 | 87 | sum += diff*diff ; 88 | } 89 | sum = min2( sqrt( sum ), MAX ); 90 | //printf("gd%g ", sum); // print the result 91 | return sum; 92 | } 93 | -------------------------------------------------------------------------------- /audiofilereader.cpp: -------------------------------------------------------------------------------- 1 | /* audiofilereader.cpp -- implements a class to read samples 2 | * 3 | * 14-Jun-08 RBD 4 | * 16-Jun-08 RBD revised to use libsndfile 5 | */ 6 | #include "assert.h" 7 | #include "stdio.h" 8 | #include "string.h" 9 | #include "sndfile.h" 10 | #include "audioreader.h" 11 | #include "audiofilereader.h" 12 | 13 | double Audio_file_reader::get_sample_rate() 14 | { 15 | return sf_info.samplerate; 16 | } 17 | 18 | 19 | long Audio_file_reader::get_frames() 20 | { 21 | return total_frames; 22 | } 23 | 24 | 25 | long Audio_file_reader::read(float *data, long n) 26 | { 27 | // note that "samples_per_frame" is really "frames_per_window" in this 28 | // context, so we're computing bytes per window 29 | float *input_data = (float *) alloca(bytes_per_frame * samples_per_frame); 30 | assert(input_data != NULL) ; 31 | 32 | // read into the end of data 33 | long frames_read = sf_readf_float(sf, input_data, n); 34 | long chans = sf_info.channels; 35 | // now convert and move to beginning of data 36 | if (chans > 1) { 37 | for (int frame = 0; frame < frames_read; frame++) { 38 | float sum = 0; 39 | for (int chan = 0; chan < sf_info.channels; chan++) { 40 | // sum over channels within a frame 41 | sum += input_data[frame * chans + chan]; 42 | } 43 | // write the frame sum to result array 44 | data[frame] = sum; 45 | } 46 | } 47 | return frames_read; 48 | } 49 | 50 | 51 | bool Audio_file_reader::open(char *filename, Scorealign &sa, bool verbose) 52 | { 53 | bytes_per_frame = 0; // initialize now in case an error occurs 54 | name[0] = 0; 55 | bzero(&sf_info, sizeof(sf_info)); 56 | sf = sf_open(filename, SFM_READ, &sf_info); 57 | if (!sf) return false; 58 | strncpy(name, filename, MAX_NAME_LEN); 59 | name[MAX_NAME_LEN] = 0; // just in case 60 | total_frames = (long) sf_seek(sf, 0, SEEK_END); 61 | sf_seek(sf, 0, SEEK_SET); 62 | // we're going to read floats, but they might be multi-channel... 63 | bytes_per_frame = sf_info.channels * sizeof(float); 64 | calculate_parameters(sa, verbose); 65 | return true; 66 | } 67 | 68 | 69 | void Audio_file_reader::close() 70 | { 71 | sf_close(sf); 72 | } 73 | 74 | 75 | void Audio_file_reader::print_info() 76 | { 77 | printf(" file name = %s\n", name); 78 | double sample_rate = sf_info.samplerate; 79 | printf(" sample rate = %g\n", sample_rate); 80 | printf(" channels = %d\n", sf_info.channels); 81 | /*=============================================================*/ 82 | printf(" total frames number is = %d\n", total_frames); 83 | printf(" audio duration = %g seconds\n", total_frames / sample_rate); 84 | /*=============================================================*/ 85 | } 86 | -------------------------------------------------------------------------------- /Makefile.osx: -------------------------------------------------------------------------------- 1 | # OS X Makefile for scorealign 2 | # NOTE: This is out of date. See the score-align.vcproj -OR- Makefile.linux 3 | 4 | # profile = -pg 5 | profile = 6 | 7 | DEBUGFLAGS = 8 | 9 | #optimize = -g $(DEBUGFLAGS) $(profile) 10 | optimize = -O 11 | 12 | #PORTSMF LIBRARY PATH: 13 | # (set this to the location of allegro.cpp, allegrosmfrd.cpp, allegrosmfwr.cpp, etc.) 14 | # (if you do not have this directory, look for portsmf in the portmedia project 15 | # on sourceforge) 16 | PORTSMF = ../portsmf 17 | 18 | INCLUDES = -Isnd -Ifft3 -I$(PORTSMF) 19 | 20 | #OPTIONS FOR LINUX: 21 | c_opts = $(optimize) -DLINUX $(INCLUDES) 22 | 23 | cc = gcc $(c_opts) 24 | c++ = g++ $(c_opts) 25 | cclink = gcc 26 | c++link = g++ 27 | syslibs = -lm 28 | 29 | scorealign_o = comp_chroma.o gen_chroma.o scorealign.o snd/audionone.o snd/sndcvt.o snd/sndheader.o snd/sndio.o snd/snd.o snd/ieeecvt.o snd/sndlinux.o snd/sndfaillinux.o fft3/FFT.o mfmidi.o $(PORTSMF)/allegro.o $(PORTSMF)/allegrosmfrd.o $(PORTSMF)/allegrosmfwr.o $(PORTSMF)/allegrord.o $(PORTSMF)/strparse.o trace.o regression.o 30 | 31 | .SUFFIXES: .cpp .c .h .aur .o 32 | 33 | .cpp.o: 34 | $(c++) -c $< -o $*.o 35 | 36 | .c.o: 37 | $(cc) -c $< -o $*.o 38 | 39 | all: scorealign 40 | 41 | header: 42 | echo "**********************************************************" 43 | echo "**********************************************************" 44 | 45 | clean: 46 | echo "removing objects" 47 | find . -name "*~" | xargs rm -f 48 | rm -f $(scorealign_o) 49 | rm -f core* 50 | 51 | cleaner: clean 52 | rm -f TAGS 53 | 54 | cleanest: cleaner 55 | rm -f scorealign 56 | rm -f scorealign.ncb 57 | rm -f scorealign.opt 58 | rm -f scorealign.plg 59 | 60 | TAGS: 61 | find . \( -name "*.cpp" -o -name "*.h" \) -print | etags - 62 | 63 | tags: TAGS 64 | 65 | backup: cleanest 66 | tar cvf ../scorealign.tar . 67 | gzip ../scorealign.tar 68 | echo "please rename ../scorealign.tar.gz" 69 | 70 | 71 | scorealign: $(scorealign_o) 72 | $(c++link) $(scorealign_o) -o scorealign $(syslibs) 73 | 74 | test0: 75 | ./scorealign /home/rbd/wdh/music/nyquist-temp.wav /home/rbd/wdh/music/chrom_nyquist-temp.wav > output.txt 76 | diff output.txt /home/rbd/wdh/music/output.txt | less 77 | 78 | test: 79 | ./scorealign /home/rbd/scorealign/data/live-beeth-s5m1-25s-mono.wav /home/rbd/scorealign/data/midi-beeth-s5m1-25s-mono.wav > output.txt 80 | diff output.txt /home/rbd/wdh/music/output.txt | less 81 | 82 | bigtest: 83 | ./scorealign data/live-Beethoven_Symphony_No5_Mvt1.wav data/midi-Beethoven_Symphony_No5_Mvt1.wav 84 | 85 | bigtest2: 86 | ./scorealign data/midi-Beethoven_Symphony_No5_Mvt1.wav data/live-Beethoven_Symphony_No5_Mvt1.wav 87 | 88 | 89 | # DEPENDENCIES 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /Makefile.linux: -------------------------------------------------------------------------------- 1 | # Linux makefile for scorealign 2 | # please use either make -f Makfile.linux 3 | # -OR- ln -s Makfile.linux Makefile 4 | 5 | # profile = -pg 6 | profile = 7 | 8 | DEBUGFLAGS = 9 | 10 | optimize = -g $(DEBUGFLAGS) $(profile) 11 | # optimize = -O 12 | 13 | #PORTSMF LIBRARY PATH: 14 | # (set this to the location of allegro.cpp, allegrosmfrd.cpp, allegrosmfwr.cpp, etc.) 15 | # (if you do not have this directory, look for portsmf in the portmedia project 16 | # on sourceforge) 17 | PORTSMF = ../portsmf 18 | 19 | INCLUDES = -Isnd -Ifft3 -I$(PORTSMF) 20 | 21 | #OPTIONS FOR LINUX: 22 | c_opts = $(optimize) -DLINUX $(INCLUDES) 23 | 24 | cc = gcc $(c_opts) 25 | c++ = g++ $(c_opts) 26 | cclink = gcc 27 | c++link = g++ 28 | syslibs = -lsndfile -lm 29 | 30 | scorealign_o = comp_chroma.o gen_chroma.o scorealign.o \ 31 | fft3/FFT3.o $(PORTSMF)/mfmidi.o $(PORTSMF)/allegro.o \ 32 | $(PORTSMF)/allegrosmfrd.o $(PORTSMF)/allegrosmfwr.o \ 33 | $(PORTSMF)/allegrord.o $(PORTSMF)/strparse.o trace.o \ 34 | regression.o sautils.o curvefit.o hillclimb.o main.o \ 35 | audioreader.o audiofilereader.o alignfiles.o 36 | 37 | .SUFFIXES: .cpp .c .h .aur .o 38 | 39 | .cpp.o: 40 | $(c++) -c $< -o $*.o 41 | 42 | .c.o: 43 | $(cc) -c $< -o $*.o 44 | 45 | all: scorealign 46 | 47 | header: 48 | echo "**********************************************************" 49 | echo "**********************************************************" 50 | 51 | clean: 52 | echo "removing objects" 53 | find . -name "*~" | xargs rm -f 54 | rm -f $(scorealign_o) 55 | rm -f core* 56 | 57 | cleaner: clean 58 | rm -f TAGS 59 | 60 | cleanest: cleaner 61 | rm -f scorealign 62 | rm -f scorealign.ncb 63 | rm -f scorealign.opt 64 | rm -f scorealign.plg 65 | 66 | TAGS: 67 | find . \( -name "*.cpp" -o -name "*.h" \) -print | etags - 68 | 69 | tags: TAGS 70 | 71 | backup: cleanest 72 | tar cvf ../scorealign.tar . 73 | gzip ../scorealign.tar 74 | echo "please rename ../scorealign.tar.gz" 75 | 76 | 77 | scorealign: $(scorealign_o) 78 | $(c++link) $(scorealign_o) -o scorealign $(syslibs) 79 | 80 | test0: 81 | ./scorealign /home/rbd/wdh/music/nyquist-temp.wav /home/rbd/wdh/music/chrom_nyquist-temp.wav > output.txt 82 | diff output.txt /home/rbd/wdh/music/output.txt | less 83 | 84 | test: 85 | ./scorealign /home/rbd/scorealign/data/live-beeth-s5m1-25s-mono.wav /home/rbd/scorealign/data/midi-beeth-s5m1-25s-mono.wav > output.txt 86 | diff output.txt /home/rbd/wdh/music/output.txt | less 87 | 88 | bigtest: 89 | ./scorealign data/live-Beethoven_Symphony_No5_Mvt1.wav data/midi-Beethoven_Symphony_No5_Mvt1.wav 90 | 91 | bigtest2: 92 | ./scorealign data/midi-Beethoven_Symphony_No5_Mvt1.wav data/live-Beethoven_Symphony_No5_Mvt1.wav 93 | 94 | 95 | # DEPENDENCIES 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /alignfiles.cpp: -------------------------------------------------------------------------------- 1 | /* alignfiles.cpp -- do alignment on files 2 | * 3 | * 14-Jul-08 RBD 4 | * 5 | * This is an intermediate layer between main.cpp, the client, and 6 | * scorealign.cpp, where the real work is done. 7 | */ 8 | 9 | #include "stdio.h" 10 | #include "string.h" 11 | #include "sndfile.h" 12 | #include 13 | #include "allegro.h" 14 | #include "audioreader.h" 15 | #include "audiofilereader.h" 16 | #include "alignfiles.h" 17 | #include "scorealign.h" 18 | 19 | using namespace std; 20 | 21 | /* is_midi_file -- see if file name ends in .mid */ 22 | /**/ 23 | bool is_midi_file(char *filename) 24 | { 25 | size_t len = strlen(filename); 26 | return (len > 4 && strcmp(filename + len - 4, ".mid") == 0); 27 | } 28 | 29 | 30 | bool align_files(char *infilename1, char *infilename2, 31 | Scorealign &sa, bool verbose) 32 | { 33 | if (verbose) printf("opening %s\n", infilename1); 34 | if (is_midi_file(infilename1)) { 35 | // get sequence from infilename1 36 | Alg_seq seq(infilename1, true); 37 | if (seq.get_read_error()) { // error opening file 38 | if (verbose) 39 | printf ("Error: Not able to open input file %s\n", 40 | infilename1); 41 | return false; 42 | } 43 | if (verbose) printf("opening %s\n", infilename2); 44 | if (is_midi_file(infilename2)) { 45 | // get sequence from infilename2 46 | Alg_seq seq2(infilename2, true); 47 | if (seq2.get_read_error()) { // error opening file 48 | if (verbose) 49 | printf ("Error: Not able to open input file %s\n", 50 | infilename2); 51 | return false; 52 | } 53 | sa.align_midi_to_midi(seq, seq2, verbose); 54 | return true; 55 | } else { 56 | // get audio from infilename2 57 | Audio_file_reader reader; 58 | if (!reader.open(infilename2, sa, verbose)) { 59 | if (verbose) 60 | printf ("Error: Not able to open input file %s\n", 61 | infilename2); 62 | return false; 63 | } 64 | sa.align_midi_to_audio(seq, reader, verbose); 65 | return true; 66 | } 67 | } else { // if first file is audio, so is second file 68 | Audio_file_reader reader1; 69 | if (!reader1.open(infilename1, sa, verbose)) { 70 | if (verbose) 71 | printf ("Error: Not able to open input file %s\n", 72 | infilename1); 73 | return false; 74 | } 75 | if (verbose) printf("opening %s\n", infilename2); 76 | Audio_file_reader reader2; 77 | if (!reader2.open(infilename2, sa, verbose)) { 78 | if (verbose) 79 | printf ("Error: Not able to open input file %s\n", 80 | infilename2); 81 | return false; 82 | } 83 | sa.align_audio_to_audio(reader1, reader2, verbose); 84 | return true; 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /audioreader.cpp: -------------------------------------------------------------------------------- 1 | /* audioreader.cpp -- reads sequence of overlapping windows 2 | * 3 | * 14-Jul-08 RBD 4 | */ 5 | 6 | #include "assert.h" 7 | #include "stdio.h" 8 | #include "math.h" 9 | #include "sautils.h" 10 | #include "string.h" 11 | #include 12 | #include "allegro.h" 13 | #include "audioreader.h" 14 | #include "scorealign.h" 15 | 16 | using namespace std; 17 | 18 | long Audio_reader::read_window(float *data) 19 | // reads the next window of samples 20 | // the first time, fill half the window with zeros and the second half 21 | // with data from the file 22 | // after that, shift the window by hop_size and fill the end of the window 23 | // with hop_size new samples 24 | // the window is actually constructed in temp_data, then copied to data. 25 | // That way, the caller can apply a smoothing function to data and we'll 26 | // still have a copy. 27 | // the function returns false on the next call when detecting that there 28 | // is no more samples, 29 | // data -- the window to be returned 30 | // temp_data -- since we destroy data by windowing, temp_data saves 31 | // overlapping samples so we don't have to read them again 32 | // samples_per_frame -- must be even, note that first window is padded 33 | // half-full with zeros 34 | // hop_samples -- additional samples read each time after the first window 35 | { 36 | int frames_read; // how many frames did we read? 37 | 38 | int hop = hop_samples; 39 | if (reading_first_window) { 40 | hop = samples_per_frame / 2; // first time we read more data 41 | // zero end of temp_data, which will shift to beginning 42 | memset(temp_data + hop, 0, 43 | sizeof(float) * (samples_per_frame - hop)); 44 | reading_first_window = false; 45 | } 46 | 47 | // before reading in new sounds, shift temp_data by hop_size 48 | memmove(temp_data, temp_data + hop, 49 | (samples_per_frame - hop) * sizeof(float)); 50 | 51 | frames_read = read(temp_data + samples_per_frame - hop, hop); 52 | // zero any leftovers (happens at last frame): 53 | //printf("check fr %i hs %i ws %i ",frames_read,hop_size,window_size); 54 | memset(temp_data + samples_per_frame - hop + frames_read, 0, 55 | sizeof(float) * (hop - frames_read)); 56 | assert(samples_per_frame - frames_read >= 0); 57 | 58 | // now copy temp_data to data 59 | memcpy(data, temp_data, sizeof(float) * samples_per_frame); 60 | 61 | if (frames_read != hop && reading_last_window == false) { 62 | reading_last_window = true; 63 | return true; 64 | } else if (reading_last_window == true) { 65 | return false; 66 | } else { 67 | return true; 68 | } 69 | } 70 | 71 | 72 | void Audio_reader::calculate_parameters(Scorealign &sa, bool verbose) 73 | { 74 | double sample_rate = get_sample_rate(); 75 | long pcm_frames = get_frames(); 76 | // we want to make sure samples_per_frame is even, to keep things 77 | // consistent we'll change hopsize_samples the same way 78 | samples_per_frame = (int) (sa.window_size * sample_rate + 0.5); 79 | if (samples_per_frame % 2 == 1) 80 | samples_per_frame = samples_per_frame + 1; 81 | 82 | /*=============================================================*/ 83 | 84 | hop_samples = (int)(sa.frame_period * sample_rate + 0.5); 85 | if (hop_samples % 2 == 1) 86 | hop_samples = hop_samples + 1; 87 | actual_frame_period = (hop_samples / sample_rate); 88 | 89 | // this is stored back in a field in sa as well as here in the reader 90 | frame_count= (int) ceil(((float) pcm_frames / hop_samples + 1)); 91 | this->frame_count = frame_count; 92 | temp_data = ALLOC(float, samples_per_frame); 93 | memset(temp_data, 0, samples_per_frame * sizeof(temp_data[0])); 94 | assert(temp_data); 95 | } 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /fft3/FFT3.h: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | 3 | FFT3.h -- FFT routines, based on the following but renamed with "3" 4 | to avoid naming problems. This early implementation from Audacity has 5 | been upated and the current Audacity version imports wxChar, but if I 6 | adapt the latest code into the scorealign library, then scorealign 7 | will depend upon wxWindows. On the other hand, if I don't update to 8 | the latest, then there are name conflicts when scorealign is used 9 | within Audacity. For now, at least, I'm just renaming functions, 10 | which has no real impact on scorealign (other than function names 11 | with the mystery "3" appended), but will result in duplicated code 12 | in Audacity. -RBD 13 | 14 | FFT.h 15 | 16 | Dominic Mazzoni 17 | 18 | September 2000 19 | 20 | This file contains a few FFT routines, including a real-FFT 21 | routine that is almost twice as fast as a normal complex FFT, 22 | and a power spectrum routine which is more convenient when 23 | you know you don't care about phase information. It now also 24 | contains a few basic windowing functions. 25 | 26 | Some of this code was based on a free implementation of an FFT 27 | by Don Cross, available on the web at: 28 | 29 | http://www.intersrv.com/~dcross/fft.html 30 | 31 | The basic algorithm for his code was based on Numerical Recipes 32 | in Fortran. I optimized his code further by reducing array 33 | accesses, caching the bit reversal table, and eliminating 34 | float-to-float conversions, and I added the routines to 35 | calculate a real FFT and a real power spectrum. 36 | 37 | Note: all of these routines use single-precision floats. 38 | I have found that in practice, floats work well until you 39 | get above 8192 samples. If you need to do a larger FFT, 40 | you need to use floats. 41 | 42 | **********************************************************************/ 43 | 44 | #ifndef M_PI 45 | #define M_PI 3.14159265358979323846 /* pi */ 46 | #endif 47 | 48 | #define false 0 49 | #define true 1 50 | 51 | /* 52 | * This is the function you will use the most often. 53 | * Given an array of floats, this will compute the power 54 | * spectrum by doing a Real FFT and then computing the 55 | * sum of the squares of the real and imaginary parts. 56 | * Note that the output array is half the length of the 57 | * input array, and that NumSamples must be a power of two. 58 | */ 59 | 60 | void PowerSpectrum3(int NumSamples, float *In, float *Out); 61 | 62 | /* 63 | * Computes an FFT when the input data is real but you still 64 | * want complex data as output. The output arrays are half 65 | * the length of the input, and NumSamples must be a power of 66 | * two. 67 | */ 68 | 69 | void RealFFT3(int NumSamples, 70 | float *RealIn, float *RealOut, float *ImagOut); 71 | 72 | /* 73 | * Computes a FFT of complex input and returns complex output. 74 | * Currently this is the only function here that supports the 75 | * inverse transform as well. 76 | */ 77 | 78 | void FFT3(int NumSamples, 79 | int InverseTransform, 80 | float *RealIn, float *ImagIn, float *RealOut, float *ImagOut); 81 | 82 | /* 83 | * Applies a windowing function to the data in place 84 | * 85 | * 0: Rectangular (no window) 86 | * 1: Bartlett (triangular) 87 | * 2: Hamming 88 | * 3: Hanning 89 | */ 90 | 91 | void WindowFunc3(int whichFunction, int NumSamples, float *data); 92 | 93 | /* 94 | * Returns the name of the windowing function (for UI display) 95 | */ 96 | 97 | char *WindowFuncName3(int whichFunction); 98 | 99 | /* 100 | * Returns the number of windowing functions supported 101 | */ 102 | 103 | int NumWindowFuncs3(); 104 | -------------------------------------------------------------------------------- /scorealign.h: -------------------------------------------------------------------------------- 1 | /* scorealign.h 2 | * 3 | * RBD 4 | */ 5 | 6 | // turn on lots of debugging, comment this line out to disable 7 | // #define SA_VERBOSE 1 8 | 9 | #ifdef SA_VERBOSE 10 | #define SA_V(stmt) stmt 11 | #else 12 | #define SA_V(stmt) 13 | #endif 14 | 15 | 16 | class Scorealign { 17 | public: 18 | float frame_period; // time in seconds 19 | float window_size; 20 | float presmooth_time; 21 | float line_time; 22 | float smooth_time; // duration of smoothing window 23 | int smooth; // number of points used to compute the smooth time map 24 | 25 | Scorealign() { 26 | frame_period = 0.25; 27 | window_size = 0.25; 28 | presmooth_time = 0.0; 29 | line_time = 0.0; 30 | smooth_time = 1.75; 31 | pathlen = 0; 32 | path_count = 0; 33 | pathx = NULL; 34 | pathy = NULL; 35 | } 36 | 37 | ~Scorealign() { 38 | if (pathx) free(pathx); 39 | if (pathy) free(pathy); 40 | } 41 | 42 | // chromagrams and lengths, path data 43 | float *chrom_energy1; 44 | int file1_frames; // number of frames in file1 45 | float *chrom_energy2; 46 | int file2_frames; //number of frames in file2 47 | short *pathx; //for midi (when aligning midi and audio) 48 | short *pathy; //for audio (when aligning midi and audio) 49 | int pathlen; 50 | void set_pathlen(int p) { pathlen = p; } 51 | float *time_map; 52 | float *smooth_time_map; 53 | 54 | // chroma vectors are calculated from an integer number of samples 55 | // that approximates the nominal frame_period. Actual frame period 56 | // is calculated and stored here: 57 | // time in seconds for midi (when aligning midi and audio) 58 | float actual_frame_period_1; 59 | // time in seconds for audio (when aligning midi and audio) 60 | float actual_frame_period_2; 61 | 62 | /* gen_chroma.cpp stuff: 63 | generates the chroma energy for a given file 64 | with a low cutoff and high cutoff. 65 | The chroma energy is placed in the float** chrom_energy. 66 | this 2D is an array of pointers. the pointers point to an array 67 | of length 12, representing the 12 chroma bins 68 | The function returns the number of frames 69 | (i.e. the length of the 1st dimention of chrom_energy 70 | */ 71 | int gen_chroma_audio(Audio_reader &reader, int hcutoff, int lcutoff, 72 | float **chrom_energy, float *actual_frame_period, 73 | int id, bool verbose); 74 | 75 | int gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff, 76 | float **chrom_energy, float *actual_frame_period, 77 | int id, bool verbose); 78 | 79 | /* scorealign.cpp stuff: */ 80 | float map_time(float t1); 81 | void midi_tempo_align(Alg_seq &seq , char *midiname, char *beatname); 82 | void align_midi_to_audio(Alg_seq &seq, Audio_reader &reader, 83 | bool verbose); 84 | void align_midi_to_midi(Alg_seq &seq1, Alg_seq &seq2, bool verbose); 85 | void align_audio_to_audio(Audio_reader &reader1, 86 | Audio_reader &reader2, bool verbose); 87 | void align_chromagrams(bool verbose); 88 | 89 | int path_count; // for debug log formatting 90 | void path_step(int i, int j); 91 | void path_reverse(); 92 | int sec_to_pathy_index(float sec); 93 | void compare_chroma(bool verbose); 94 | void linear_regression(int n, int width, float &a, float &b); 95 | void compute_smooth_time_map(); 96 | void presmooth(); 97 | void compute_regression_lines(); 98 | void midi_tempo_align(Alg_seq &seq, bool verbose); 99 | }; 100 | 101 | #define DEBUG_LOG 1 102 | #if DEBUG_LOG 103 | extern FILE *dbf; 104 | #endif 105 | 106 | int find_midi_duration(Alg_seq &seq, float *dur); 107 | -------------------------------------------------------------------------------- /compare_transcripts.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 14 | 15 | 16 | 17 | 18 | 25 | 28 | 31 | 34 | 37 | 40 | 52 | 55 | 58 | 61 | 68 | 71 | 74 | 77 | 80 | 83 | 86 | 89 | 92 | 93 | 101 | 104 | 107 | 110 | 113 | 116 | 125 | 128 | 131 | 134 | 143 | 146 | 149 | 152 | 155 | 158 | 161 | 164 | 167 | 168 | 169 | 170 | 171 | 172 | 177 | 180 | 181 | 182 | 187 | 188 | 193 | 194 | 195 | 196 | 197 | 198 | -------------------------------------------------------------------------------- /hillclimb.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * hillclimb.cpp 3 | * scorealign 4 | * 5 | * Created by Roger Dannenberg on 10/20/07. 6 | * Copyright 2007 __MyCompanyName__. All rights reserved. 7 | * 8 | * Hillclimb is an abstract class for optimization. It models problems where 9 | * you have a vector of parameters (stored as an array), a corresponding set 10 | * of step sizes, and a non-linear function. The function is a virtual 11 | * member function that subclasses must implement. 12 | * 13 | * The optimization algorithm is as follows: 14 | * An initial set of parameters and step sizes is given. 15 | * 16 | * Estimate the partial derivatives with respect to each parameter value 17 | * by taking a step along that dimension (use step sizes to determine 18 | * how far to go) and calling the evaluate virtual function. 19 | * Find the parameter that causes the maximum absolute change. If the 20 | * change is positive for that parameter, take the step along that 21 | * dimension. If the change is negative, take a negative step along that 22 | * dimension. 23 | * 24 | * Repeat the previous paragraph as long as the result of evaluate is 25 | * increasing. When it stops, you are at the top of a hill, a local 26 | * maximum. 27 | */ 28 | 29 | #include "hillclimb.h" 30 | #include "stdio.h" 31 | 32 | #define HC_VERBOSE 0 33 | #define V if (HC_VERBOSE) 34 | 35 | void Hillclimb::set_parameters(double *p, double *ss, 36 | double *min_, double *max_, int plen) 37 | { 38 | parameters = p; 39 | step_size = ss; 40 | min_param = min_; 41 | max_param = max_; 42 | n = plen; 43 | } 44 | 45 | /* this optimize assumes that the surface is smooth enought that if the 46 | * function decreases when parameter[i] increases, then the function will 47 | * increase when parameter[i] decreases. The alternative version does more 48 | * evaluation, but checks in both directions to find the best overall move. 49 | 50 | double Hillclimb::optimize() 51 | { 52 | double best = evaluate(); 53 | while (true) { 54 | printf("best %g ", best); 55 | // eval partial derivatives 56 | int i; 57 | // variables to search for max partial derivative 58 | double max = 0; // max of |dy| so far 59 | int max_i; // index where max was found 60 | int max_sign = 1; // sign of dy 61 | double max_y; // value of evaluate() at max_i 62 | // now search over all parameters for max change 63 | for (i = 0; i < n; i++) { 64 | int sign = 1; // sign of derivative in the +step direction 65 | int step_direction = 1; // how to undo parameter variation 66 | parameters[i] += step_size[i]; 67 | if (parameters[i] > max_param[i]) { 68 | // try stepping in the other direction 69 | parameters[i] -= step_size[i] * 2; 70 | sign = -1; 71 | step_direction = -1; 72 | } 73 | 74 | double y = evaluate(); 75 | // restore parameter i 76 | parameters[i] -= step_size[i] * step_direction; 77 | 78 | double dy = y - best; 79 | if (dy < 0) { 80 | dy = -dy; 81 | sign = -sign; 82 | } 83 | // is this the best yet and legal move? 84 | double proposal = parameters[i] + step_size[i] * sign; 85 | if (dy > max && proposal <= max_param[i] && 86 | proposal >= min_param[i]) { 87 | max = dy; 88 | max_i = i; 89 | max_y = y; 90 | max_sign = sign; 91 | } 92 | } 93 | // best move is parameter max_i in max_sign direction 94 | parameters[max_i] += step_size[max_i] * max_sign; 95 | printf("moved %d to %g", max_i, parameters[max_i]); 96 | // what's the value now? put it in max_y 97 | if (max_sign == -1) max_y = evaluate(); 98 | printf(" to get %g (vs. best %g)\n", max_y, best); 99 | // otherwise, max_y already has the new value 100 | if (max_y <= best) { // no improvement, we're done 101 | parameters[max_i] -= step_size[max_i] * max_sign; 102 | printf("\nCompleted hillclimbing, best %g\n", best); 103 | return best; 104 | } 105 | // improvement because max_y higher than best: 106 | best = max_y; 107 | } 108 | } 109 | */ 110 | 111 | double Hillclimb::optimize() 112 | { 113 | double best = evaluate(); 114 | while (true) { 115 | V printf("best %g ", best); 116 | // eval partial derivatives 117 | int i; 118 | // variables to search for max partial derivative 119 | double max_y = best; // max of evaluate() so far 120 | int max_i; // index where best max was found 121 | double max_parameter; // the good parameter value for max_i 122 | // now search over all parameters for best improvement 123 | for (i = 0; i < n; i++) { 124 | V printf("optimize at %d param %g ", i, parameters[i]); 125 | double save_param = parameters[i]; 126 | parameters[i] = save_param + step_size[i]; 127 | if (parameters[i] <= max_param[i]) { 128 | double y = evaluate(); 129 | V printf("up->%g ", y); 130 | if (y > max_y) { 131 | V printf("NEW MAX! "); 132 | max_y = y; 133 | max_i = i; 134 | max_parameter = parameters[i]; 135 | } 136 | } 137 | parameters[i] = save_param - step_size[i]; 138 | if (parameters[i] >= min_param[i]) { 139 | double y = evaluate(); 140 | V printf("dn->%g ", y); 141 | if (y > max_y) { 142 | V printf("NEW MAX! "); 143 | max_y = y; 144 | max_i = i; 145 | max_parameter = parameters[i]; 146 | } 147 | } 148 | parameters[i] = save_param; 149 | V printf("\n"); 150 | } 151 | if (max_y <= best) { // no improvement, we're done 152 | V printf("\nCompleted hillclimbing, best %g\n", best); 153 | return best; 154 | } 155 | // improvement because max_y higher than best: 156 | parameters[max_i] = max_parameter; 157 | best = max_y; 158 | } 159 | } 160 | 161 | 162 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | scorealign -- a program for audio-to-audio and audio-to-midi alignment 2 | 3 | Last updated July 17, 2008 by RBD 4 | 5 | Contributors include: 6 | Ning Hu 7 | Roger B. Dannenberg 8 | Joshua Hailpern 9 | Umpei Kurokawa 10 | Greg Wakefield 11 | Mark Bartsch 12 | 13 | scorealign works by computing chromagrams of the two sources. Midi chromagrams 14 | are estimated directly from pitch data without synthesis. A similarity matrix 15 | is constructed and dynamic programming finds the lowest-cost path through the 16 | matrix. 17 | 18 | (some more details should be added here about handling boundaries) 19 | 20 | Output includes a map from one version to the other. If one file is MIDI, 21 | output also includes (1) an estimated transcript in ASCII format with time, 22 | pitch, MIDI channel, and duration of each notes in the audio file, (2) a 23 | time-aligned midi file, and (3) a text file with beat times. 24 | 25 | scorealign uses libsndfile (http://www.mega-nerd.com/libsndfile/). You must 26 | install libsndfile to build scorealign. 27 | 28 | For Macintosh OS X, use Xcode to open scorealign.xcodeproj 29 | For Linux, use "make -f Makefile.linux" 30 | For Windows, open score-align.vcproj (probably out of date now -- please 31 | update the project following Makefile.linux, or contact rbd at cs.cmu.edu) 32 | 33 | Command line parameters: 34 | 35 | scorealign [- [ ]] 36 | [] 37 | specifying only simply transcribes MIDI in to 38 | transcription.txt. Otherwise, align and . 39 | -h 0.25 indicates a frame period of 0.25 seconds 40 | -w 0.25 indicates a window size of 0.25 seconds. 41 | -r indicates filename to write raw alignment path to (default path.data) 42 | -s is filename to write smoothed alignment path(default is smooth.data) 43 | -t is filename to write the time aligned transcription 44 | (default is transcription.txt) 45 | -m is filename to write the time aligned midi file (default is midi.mid) 46 | -b is filename to write the time aligned beat times (default is beatmap.txt) 47 | -o 2.0 indicates a smoothing window of 2.0s 48 | -p 3.0 means pre-smooth with a 3s window 49 | -x 6.0 indicates 6s line segment approximation 50 | 51 | A bit more detail: 52 | 53 | The -o flag (smoothing) controls a post-process on the path. Since the 54 | path is discrete, it will have small jumps ahead or pauses whenever it 55 | differs from the diagonal. A linear regression is performed at each frame 56 | using a set of points whose size is determined by the -o parameter, and the 57 | discrete time indicated by the path is replaced by a continuous time estimated 58 | from neighboring points. This smooths out local irregularities in the time 59 | map. 60 | 61 | The -p flag (presmoothing) operates on the discrete path. It tries to fit a 62 | straight line segment (length is set by -p) to the path. If the path fits 63 | well to the first half of the path and the second half of the path, the 64 | entire path is replaced with a straight line approximation. To "fit well", 65 | half of the path points must fall very close to the straight line (currently, 66 | within 1.5 frames). For example, if the line segment spans 40 frames, then 10 67 | path points must be close to the first 20 frames and 10 path points must be 68 | close to the last 20 frames. The step is repeated on overlapping windows 69 | through the whole piece. This presmoothing step is designed to detect 70 | places where dynamic programming "wanders off" from the true path and then 71 | realigns to the true path. The off-track points are replaced, so they do not 72 | adversely affect the smoothing step. This approach does not seem to be 73 | robust, but sometimes works well. 74 | 75 | The -x flag is another approach to deal with dynamic programming errors. It 76 | divides the entire piece into segments whose lengths are about equal and about 77 | the length specified by the -x parameter. The line segments are fit to the 78 | path by linear regression, and their endpoints are joined by averaging their 79 | linear regression values. Next, a hill-climbing search is performed to 80 | minimize the total distance along the path. This is like dynamic programming 81 | except that each line spans many frames, so the resulting path is forced to 82 | be fairly straight. Linear interpolation is used to estimate chroma distance 83 | since the lines do always pass through integer frame locations. This approach 84 | is probably good when the audio is known to have a steady tempo or be 85 | performed with tempo changes that match those in the midi file. 86 | 87 | Some notes on the software architecture of scorealign: 88 | 89 | scorealign was originally implemented as a fairly monolithic program 90 | in MatLab. It was ported to C++. To incorporate this code into Audacity, 91 | the code was restructured so that audio input is obtained from 92 | Audio_reader, an abstract class that calls on a subclass to implement 93 | read(). The subclass just copies floats into the provided buffer. It is 94 | responsible for sample format conversion, stereo-to-mono conversion, etc. 95 | The Audio_reader returns possibly overlapping buffers of floats. The 96 | Audio_file_reader subclass uses libsndfile to read in samples and convert 97 | them to float. It does its own conversion to mono. 98 | 99 | When scorealign is used in Audacity, a different subclass of Audio_reader 100 | will call into Audacity using a Mixer object to retrieve samples from 101 | selected tracks. 102 | 103 | For use from the command line, scorealign has a module main.cpp that 104 | parses command line arguments. A lot of parameters and options that 105 | were formerly globals are now stored in a Scorealign object that is 106 | passed around to many routines and methods. main.cpp creates a (global) 107 | Scorealign object and uses code in the module alignfiles.cpp to do the 108 | work. The purpose of alignfiles is to provide an API that does not 109 | depend upon a command line interface, but which assumes you are aligning 110 | files. Finally, alignfiles.cpp uses an Audio_file_reader to offer 111 | samples to the main score alignment algorithm. 112 | 113 | To summarize: 114 | scorealign.cpp and gen_chroma.cpp do most of the pure alignment work 115 | audioreader.cpp abstracts the source of audio, whether it comes from 116 | a file or some other source 117 | alignfiles.cpp opens files and invokes the modules above 118 | main.cpp parses the command line and invokes alignfiles. 119 | 120 | -------------------------------------------------------------------------------- /curvefit.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * curvefit.cpp 3 | * scorealign 4 | * 5 | * Created by Roger Dannenberg on 10/20/07. 6 | * Copyright 2007 __MyCompanyName__. All rights reserved. 7 | * 8 | */ 9 | 10 | #include "assert.h" 11 | #include "comp_chroma.h" 12 | #include "sautils.h" 13 | // the following are needed to get Scorealign 14 | #include 15 | #include "allegro.h" 16 | #include "audioreader.h" 17 | #include "scorealign.h" 18 | #include "hillclimb.h" 19 | #include "curvefit.h" 20 | 21 | void save_path(char *filename); 22 | 23 | /* Curvefit class: do hill-climbing to fit lines to data 24 | * 25 | * This class implements the algorithm described above. 26 | * The problem is partitioned into the general search algorithm 27 | * (implemented in Hillclimb::optimize) and the evaluation function 28 | * (implemented in Curvefit::evaluate). A brute-force evaluation 29 | * would simply recompute the cost of the entire path every time, 30 | * but note that the search algorithm works by adjusting one parameter 31 | * at a time. This affects at most two line segments, so the rest 32 | * contribute a cost that does not need to be recomputed. Thus the 33 | * total cost can be computed incrementally. It is hard to see how 34 | * to use this optimization within the general Hillclimb:optimize 35 | * method, so to avoid making that algorithm very specific and ugly, 36 | * I decided to hide the incremental nature of evaluate inside 37 | * the evaluate function itself. The way this works is that evaluate 38 | * keeps a cache of the coordinates of each line segment and the 39 | * resulting cost of the segment. Before recomputing any segment, 40 | * the cache is consulted. If the end points have not moved, the 41 | * cached value is retrieved. Ideally, there should be a 3-element 42 | * cache because endpoints are moved and then restored. (The three 43 | * elements would hold the results of the original, changed left, 44 | * and changed right endpoints.) The bigger cache would eliminate 45 | * 1/3 of the computation, but the simple cache already eliminates 46 | * about (n-2)/n of the work, so that should help a lot. 47 | */ 48 | 49 | class Curvefit : public Hillclimb { 50 | public: 51 | Curvefit(Scorealign *sa_, bool verbose_) { sa = sa_; verbose = verbose_; } 52 | virtual double evaluate(); 53 | void setup(int n); 54 | double *get_x() { return x; } 55 | private: 56 | Scorealign *sa; 57 | bool verbose; 58 | double line_dist(int i); // get cost of line segment i 59 | double compute_dist(int i); // compute cost of line segment i 60 | double distance_rc(int row, int col); 61 | double distance_xy(double x, double y); 62 | 63 | double *p1_cache; // left endpoint y values 64 | double *p2_cache; // right endpoint y values 65 | double *d_cache; // cached cost of line segment 66 | double *x; // the x values of line segment endpoints 67 | // (the y values are in parameters[]) 68 | }; 69 | 70 | 71 | double Curvefit::evaluate() 72 | { 73 | double sum = 0; 74 | // why does this loop go to n-2? Because i represents the left endpoint 75 | // of the line segment. There are n parameters, but only n-1 segments. 76 | for (int i = 0; i < n-1; i++) { 77 | sum += line_dist(i); // look up in cache or recompute each segment 78 | } 79 | return -sum; // return negative of distance so that bigger will be better 80 | } 81 | 82 | 83 | double Curvefit::line_dist(int i) 84 | { 85 | if (p1_cache[i] == parameters[i] && 86 | p2_cache[i] == parameters[i+1]) { 87 | // endpoints have not changed: 88 | return d_cache[i]; 89 | } 90 | // otherwise, we need to recompute and save dist in cache 91 | double d = compute_dist(i); 92 | p1_cache[i] = parameters[i]; 93 | p2_cache[i] = parameters[i+1]; 94 | d_cache[i] = d; 95 | return d; 96 | } 97 | 98 | 99 | void Curvefit::setup(int segments) 100 | { 101 | // number of parameters is greater than segments because the left 102 | // col of segment i is parameter i, so the right col of 103 | // the last segment == parameter[segments]. 104 | n = segments + 1; 105 | parameters = ALLOC(double, n); 106 | p1_cache = ALLOC(double, n); 107 | p2_cache = ALLOC(double, n); 108 | d_cache = ALLOC(double, n); 109 | x = ALLOC(double, n); 110 | step_size = ALLOC(double, n); 111 | min_param = ALLOC(double, n); 112 | max_param = ALLOC(double, n); 113 | int i; 114 | // ideal frames per segment 115 | float seg_length = ((float) (sa->file1_frames - 1)) / segments; 116 | for (i = 0; i < n; i++) { // initialize cache keys to garbage 117 | p1_cache[i] = p2_cache[i] = -999999.99; 118 | // initialize x values 119 | x[i] = ROUND(i * seg_length); 120 | // now initialize parameters based on pathx/pathy/time_map 121 | // time_map has y values for each x 122 | parameters[i] = sa->time_map[(int) x[i]]; 123 | if (verbose) 124 | printf("initial x[%d] = %g, parameters[%d] = %g\n", 125 | i, x[i], i, parameters[i]); 126 | step_size[i] = 0.5; 127 | min_param[i] = 0; 128 | max_param[i] = sa->file2_frames - 1; 129 | } 130 | } 131 | 132 | 133 | // distance_rc -- look up or compute distance between chroma vectors 134 | // at row, col in similarity matrix 135 | // 136 | // Note: in current implementation, there is no stored representation 137 | // of the matrix, so we have to recompute every time. It would be 138 | // possible to store the whole matrix, but it's large and it would 139 | // double the memory requirements (we already allocate the large 140 | // PATH array in compare_chroma to compute the optimal path. 141 | // 142 | // Since distance can be computed relatively quickly, a better plan 143 | // would be to cache values along the path. Here's a brief design 144 | // (for the future, assuming this routine is actually a hot spot): 145 | // Allocate a matrix that is, say, 20 x file1_frames to contain distances 146 | // that are +/- 10 frames from the path. Initialize cells to -1. 147 | // Allocate an array of integer offsets of size file1_frames. 148 | // Fill in the integer offsets with the column number (pathy) value of 149 | // the path. 150 | // Now, to get distance_rc(row, col): 151 | // offset = offsets[row] 152 | // i = 10 + col - offset; 153 | // if (i < 0 || i > 20) /* not in cache */ return compute_distance(...); 154 | // dist = distances[20 * row + i]; 155 | // if (dist == -1) { return distances[20 * row + i] = compute_distance...} 156 | // return dist; 157 | // 158 | double Curvefit::distance_rc(int row, int col) 159 | { 160 | return gen_dist(row, col, sa->chrom_energy1, sa->chrom_energy2); 161 | } 162 | 163 | 164 | // compute distance from distance matrix using interpolation. A least 165 | // one of x, y should be an integer value so interpolation is only 2-way 166 | double Curvefit::distance_xy(double x, double y) 167 | { 168 | int xi = (int) x; 169 | int yi = (int) y; 170 | if (xi == x) { // x is integer, interpolate along y axis 171 | double d1 = distance_rc(xi, yi); 172 | double d2 = distance_rc(xi, yi + 1); 173 | return interpolate(yi, d1, yi + 1, d2, y); 174 | } else if (yi == y) { // y is integer, interpolate along x axis 175 | double d1 = distance_rc(xi, yi); 176 | double d2 = distance_rc(xi + 1, yi); 177 | return interpolate(xi, d1, xi + 1, d2, x); 178 | } else { 179 | printf("FATAL INTERNAL ERROR IN distance_xy: neither x nor y is " 180 | "an integer\n"); 181 | assert(false); 182 | } 183 | } 184 | 185 | 186 | double Curvefit::compute_dist(int i) 187 | { 188 | double x1 = x[i], x2 = x[i+1]; 189 | double y1 = parameters[i], y2 = parameters[i+1]; 190 | double dx = x2 - x1, dy = y2 - y1; 191 | double sum = 0; 192 | int n; 193 | if (dx > dy) { // evauate at each x 194 | n = (int) dx; 195 | for (int x = (int) x1; x < x2; x++) { 196 | double y = interpolate(x1, y1, x2, y2, x); 197 | sum += distance_xy(x, y); 198 | } 199 | } else { // evaluate at each y 200 | n = (int) dy; 201 | for (int y = (int) y1; y < y2; y++) { 202 | double x = interpolate(y1, x1, y2, x2, y); 203 | sum += distance_xy(x, y); 204 | } 205 | } 206 | // normalize using line length: sum/n is average distance. Multiply 207 | // avg. distance (cost per unit length) by length to get total cost: 208 | double rslt = sqrt(dx*dx + dy*dy) * sum / n; 209 | // printf("compute_dist %d: x1 %g y1 %g x2 %g y2 %g sum %g rslt %g\n", 210 | // i, x1, y1, x2, y2, sum, rslt); 211 | return rslt; 212 | } 213 | 214 | 215 | void curve_fitting(Scorealign *sa, bool verbose) 216 | { 217 | if (verbose) 218 | printf("Performing line-segment approximation with %gs segments.\n", 219 | sa->line_time); 220 | Curvefit curvefit(sa, verbose); 221 | double *parameters; 222 | double *x; 223 | // how many segments? About total time / line_time: 224 | int segments = 225 | (int) (0.5 + (sa->actual_frame_period_1 * sa->file1_frames) / 226 | sa->line_time); 227 | curvefit.setup(segments); 228 | curvefit.optimize(); 229 | parameters = curvefit.get_parameters(); 230 | x = curvefit.get_x(); 231 | // now, rewrite pathx and pathy according to segments 232 | // pathx and pathy are generously allocated, so we can change pathlen 233 | // each segment goes from x[i], parameters[i] to x[i+1], parameters[i+1] 234 | int i; 235 | int j = 0; // index into path 236 | for (i = 0; i < segments; i++) { 237 | int x1 = (int) x[i]; 238 | int x2 = (int) x[i+1]; 239 | int y1 = (int) parameters[i]; 240 | int y2 = (int) parameters[i+1]; 241 | int dx = x2 - x1; 242 | int dy = y2 - y1; 243 | if (dx >= dy) { // output point at each x 244 | int x; 245 | for (x = x1; x < x2; x++) { 246 | sa->pathx[j] = x; 247 | sa->pathy[j] = (int) (0.5 + interpolate(x1, y1, x2, y2, x)); 248 | j++; 249 | } 250 | } else { 251 | int y; 252 | for (y = y1; y < y2; y++) { 253 | sa->pathx[j] = (int) (0.5 + interpolate(y1, x1, y2, x2, y)); 254 | sa->pathy[j] = y; 255 | j++; 256 | } 257 | } 258 | } 259 | // output last point 260 | sa->pathx[j] = (int) x[segments]; 261 | sa->pathy[j] = (int) (0.5 + parameters[segments]); 262 | j++; 263 | sa->set_pathlen(j); 264 | } 265 | 266 | 267 | 268 | -------------------------------------------------------------------------------- /fft3/FFT3.cpp: -------------------------------------------------------------------------------- 1 | /********************************************************************** 2 | 3 | FFT3.cpp -- see notes in FFT3.h -RBD 4 | 5 | FFT.cpp 6 | 7 | Dominic Mazzoni 8 | 9 | September 2000 10 | 11 | This file contains a few FFT routines, including a real-FFT 12 | routine that is almost twice as fast as a normal complex FFT, 13 | and a power spectrum routine when you know you don't care 14 | about phase information. 15 | 16 | Some of this code was based on a free implementation of an FFT 17 | by Don Cross, available on the web at: 18 | 19 | http://www.intersrv.com/~dcross/fft.html 20 | 21 | The basic algorithm for his code was based on Numerican Recipes 22 | in Fortran. I optimized his code further by reducing array 23 | accesses, caching the bit reversal table, and eliminating 24 | float-to-float conversions, and I added the routines to 25 | calculate a real FFT and a real power spectrum. 26 | 27 | **********************************************************************/ 28 | 29 | #include 30 | #include 31 | #include 32 | #ifndef __MACH__ 33 | #include 34 | #endif 35 | #include "FFT3.h" 36 | 37 | int **gFFTBitTable3 = NULL; 38 | const int MaxFastBits = 16; 39 | 40 | int IsPowerOfTwo(int x) 41 | { 42 | if (x < 2) 43 | return false; 44 | 45 | if (x & (x - 1)) /* Thanks to 'byang' for this cute trick! */ 46 | return false; 47 | 48 | return true; 49 | } 50 | 51 | int NumberOfBitsNeeded(int PowerOfTwo) 52 | { 53 | int i; 54 | 55 | if (PowerOfTwo < 2) { 56 | fprintf(stderr, "Error: FFT called with size %d\n", PowerOfTwo); 57 | exit(1); 58 | } 59 | 60 | for (i = 0;; i++) 61 | if (PowerOfTwo & (1 << i)) 62 | return i; 63 | } 64 | 65 | int ReverseBits(int index, int NumBits) 66 | { 67 | int i, rev; 68 | 69 | for (i = rev = 0; i < NumBits; i++) { 70 | rev = (rev << 1) | (index & 1); 71 | index >>= 1; 72 | } 73 | 74 | return rev; 75 | } 76 | 77 | void InitFFT() 78 | { 79 | gFFTBitTable3 = (int **) malloc(sizeof(int *) * MaxFastBits); 80 | 81 | int len = 2; 82 | int b; 83 | for (b = 1; b <= MaxFastBits; b++) { 84 | 85 | gFFTBitTable3[b - 1] = (int *) malloc(len * sizeof(int)); 86 | int i; 87 | for (i = 0; i < len; i++) 88 | gFFTBitTable3[b - 1][i] = ReverseBits(i, b); 89 | 90 | len <<= 1; 91 | } 92 | } 93 | 94 | inline int FastReverseBits3(int i, int NumBits) 95 | { 96 | if (NumBits <= MaxFastBits) 97 | return gFFTBitTable3[NumBits - 1][i]; 98 | else 99 | return ReverseBits(i, NumBits); 100 | } 101 | 102 | /* 103 | * Complex Fast Fourier Transform 104 | */ 105 | 106 | void FFT3(int NumSamples, 107 | int InverseTransform, 108 | float *RealIn, float *ImagIn, float *RealOut, float *ImagOut) 109 | { 110 | int NumBits; /* Number of bits needed to store indices */ 111 | int i, j, k, n; 112 | int BlockSize, BlockEnd; 113 | 114 | float angle_numerator = 2.0 * M_PI; 115 | float tr, ti; /* temp real, temp imaginary */ 116 | 117 | if (!IsPowerOfTwo(NumSamples)) { 118 | fprintf(stderr, "%d is not a power of two\n", NumSamples); 119 | exit(1); 120 | } 121 | 122 | if (!gFFTBitTable3) 123 | InitFFT(); 124 | 125 | if (InverseTransform) 126 | angle_numerator = -angle_numerator; 127 | 128 | NumBits = NumberOfBitsNeeded(NumSamples); 129 | 130 | /* 131 | ** Do simultaneous data copy and bit-reversal ordering into outputs... 132 | */ 133 | 134 | for (i = 0; i < NumSamples; i++) { 135 | j = FastReverseBits3(i, NumBits); 136 | RealOut[j] = RealIn[i]; 137 | ImagOut[j] = (ImagIn == NULL) ? 0.0F : ImagIn[i]; 138 | } 139 | 140 | /* 141 | ** Do the FFT itself... 142 | */ 143 | 144 | BlockEnd = 1; 145 | for (BlockSize = 2; BlockSize <= NumSamples; BlockSize <<= 1) { 146 | 147 | float delta_angle = angle_numerator / (float) BlockSize; 148 | 149 | float sm2 = sin(-2 * delta_angle); 150 | float sm1 = sin(-delta_angle); 151 | float cm2 = cos(-2 * delta_angle); 152 | float cm1 = cos(-delta_angle); 153 | float w = 2 * cm1; 154 | float ar0, ar1, ar2, ai0, ai1, ai2; 155 | 156 | for (i = 0; i < NumSamples; i += BlockSize) { 157 | ar2 = cm2; 158 | ar1 = cm1; 159 | 160 | ai2 = sm2; 161 | ai1 = sm1; 162 | 163 | for (j = i, n = 0; n < BlockEnd; j++, n++) { 164 | ar0 = w * ar1 - ar2; 165 | ar2 = ar1; 166 | ar1 = ar0; 167 | 168 | ai0 = w * ai1 - ai2; 169 | ai2 = ai1; 170 | ai1 = ai0; 171 | 172 | k = j + BlockEnd; 173 | tr = ar0 * RealOut[k] - ai0 * ImagOut[k]; 174 | ti = ar0 * ImagOut[k] + ai0 * RealOut[k]; 175 | 176 | /* if(k==NumSamples-1) 177 | printf("j=NumSamples-1 => %g - %g=",RealOut[j],tr); 178 | */ 179 | RealOut[k] = RealOut[j] - tr; 180 | ImagOut[k] = ImagOut[j] - ti; 181 | /* 182 | if(k==NumSamples-1) 183 | printf("%g\n",RealOut[k]); 184 | */ 185 | RealOut[j] += tr; 186 | ImagOut[j] += ti; 187 | } 188 | } 189 | 190 | BlockEnd = BlockSize; 191 | } 192 | 193 | /* 194 | ** Need to normalize if inverse transform... 195 | */ 196 | 197 | if (InverseTransform) { 198 | float denom = (float) NumSamples; 199 | 200 | for (i = 0; i < NumSamples; i++) { 201 | RealOut[i] /= denom; 202 | ImagOut[i] /= denom; 203 | } 204 | } 205 | } 206 | 207 | /* 208 | * Real Fast Fourier Transform 209 | * 210 | * This function was based on the code in Numerical Recipes in C. 211 | * In Num. Rec., the inner loop is based on a single 1-based array 212 | * of interleaved real and imaginary numbers. Because we have two 213 | * separate zero-based arrays, our indices are quite different. 214 | * Here is the correspondence between Num. Rec. indices and our indices: 215 | * 216 | * i1 <-> real[i] 217 | * i2 <-> imag[i] 218 | * i3 <-> real[n/2-i] 219 | * i4 <-> imag[n/2-i] 220 | */ 221 | 222 | void RealFFT3(int NumSamples, float *RealIn, float *RealOut, float *ImagOut) 223 | { 224 | int Half = NumSamples / 2; 225 | int i; 226 | 227 | float theta = M_PI / Half; 228 | 229 | float *tmpReal = (float *) alloca(sizeof(float) * Half); 230 | float *tmpImag = (float *) alloca(sizeof(float) * Half); 231 | 232 | for (i = 0; i < Half; i++) { 233 | tmpReal[i] = RealIn[2 * i]; 234 | tmpImag[i] = RealIn[2 * i + 1]; 235 | } 236 | 237 | FFT3(Half, 0, tmpReal, tmpImag, RealOut, ImagOut); 238 | 239 | float wtemp = (float) (sin(0.5 * theta)); 240 | 241 | float wpr = -2.0F * wtemp * wtemp; 242 | float wpi = (float) (sin(theta)); 243 | float wr = 1.0F + wpr; 244 | float wi = wpi; 245 | 246 | int i3; 247 | 248 | float h1r, h1i, h2r, h2i; 249 | 250 | for (i = 1; i < Half / 2; i++) { 251 | 252 | i3 = Half - i; 253 | 254 | h1r = 0.5F * (RealOut[i] + RealOut[i3]); 255 | h1i = 0.5F * (ImagOut[i] - ImagOut[i3]); 256 | h2r = 0.5F * (ImagOut[i] + ImagOut[i3]); 257 | h2i = -0.5F * (RealOut[i] - RealOut[i3]); 258 | 259 | RealOut[i] = h1r + wr * h2r - wi * h2i; 260 | ImagOut[i] = h1i + wr * h2i + wi * h2r; 261 | RealOut[i3] = h1r - wr * h2r + wi * h2i; 262 | ImagOut[i3] = -h1i + wr * h2i + wi * h2r; 263 | 264 | wr = (wtemp = wr) * wpr - wi * wpi + wr; 265 | wi = wi * wpr + wtemp * wpi + wi; 266 | } 267 | 268 | RealOut[0] = (h1r = RealOut[0]) + ImagOut[0]; 269 | ImagOut[0] = h1r - ImagOut[0]; 270 | 271 | //free(tmpReal); 272 | //free(tmpImag); 273 | } 274 | 275 | /* 276 | * PowerSpectrum 277 | * 278 | * This function computes the same as RealFFT, above, but 279 | * adds the squares of the real and imaginary part of each 280 | * coefficient, extracting the power and throwing away the 281 | * phase. 282 | * 283 | * For speed, it does not call RealFFT, but duplicates some 284 | * of its code. 285 | */ 286 | 287 | void PowerSpectrum3(int NumSamples, float *In, float *Out) 288 | { 289 | int Half = NumSamples / 2; 290 | int i; 291 | 292 | float theta = M_PI / Half; 293 | 294 | float *tmpReal = (float *) alloca(sizeof(float) * Half);; 295 | float *tmpImag = (float *) alloca(sizeof(float) * Half); 296 | float *RealOut = (float *) alloca(sizeof(float) * Half); 297 | float *ImagOut = (float *) alloca(sizeof(float) * Half); 298 | 299 | for (i = 0; i < Half; i++) { 300 | tmpReal[i] = In[2 * i]; 301 | tmpImag[i] = In[2 * i + 1]; 302 | } 303 | 304 | FFT3(Half, 0, tmpReal, tmpImag, RealOut, ImagOut); 305 | 306 | float wtemp = (float) (sin(0.5 * theta)); 307 | 308 | float wpr = -2.0F * wtemp * wtemp; 309 | float wpi = (float) (sin(theta)); 310 | float wr = 1.0F + wpr; 311 | float wi = wpi; 312 | 313 | int i3; 314 | 315 | float h1r, h1i, h2r, h2i, rt, it; 316 | 317 | for (i = 1; i < Half / 2; i++) { 318 | 319 | i3 = Half - i; 320 | 321 | h1r = 0.5F * (RealOut[i] + RealOut[i3]); 322 | h1i = 0.5F * (ImagOut[i] - ImagOut[i3]); 323 | h2r = 0.5F * (ImagOut[i] + ImagOut[i3]); 324 | h2i = -0.5F * (RealOut[i] - RealOut[i3]); 325 | 326 | rt = h1r + wr * h2r - wi * h2i; 327 | it = h1i + wr * h2i + wi * h2r; 328 | 329 | Out[i] = rt * rt + it * it; 330 | 331 | rt = h1r - wr * h2r + wi * h2i; 332 | it = -h1i + wr * h2i + wi * h2r; 333 | 334 | Out[i3] = rt * rt + it * it; 335 | 336 | wr = (wtemp = wr) * wpr - wi * wpi + wr; 337 | wi = wi * wpr + wtemp * wpi + wi; 338 | } 339 | 340 | rt = (h1r = RealOut[0]) + ImagOut[0]; 341 | it = h1r - ImagOut[0]; 342 | Out[0] = rt * rt + it * it; 343 | 344 | rt = RealOut[Half / 2]; 345 | it = ImagOut[Half / 2]; 346 | Out[Half / 2] = rt * rt + it * it; 347 | 348 | //free(tmpReal); 349 | //free(tmpImag); 350 | //free(RealOut); 351 | //free(ImagOut); 352 | } 353 | 354 | /* 355 | * Windowing Functions 356 | */ 357 | 358 | int NumWindowFuncs3() 359 | { 360 | return 4; 361 | } 362 | 363 | char *WindowFuncName3(int whichFunction) 364 | { 365 | switch (whichFunction) { 366 | default: 367 | case 0: 368 | return "Rectangular"; 369 | case 1: 370 | return "Bartlett"; 371 | case 2: 372 | return "Hamming"; 373 | case 3: 374 | return "Hanning"; 375 | } 376 | } 377 | 378 | void WindowFunc3(int whichFunction, int NumSamples, float *in) 379 | { 380 | int i; 381 | 382 | if (whichFunction == 1) { 383 | // Bartlett (triangular) window 384 | for (i = 0; i < NumSamples / 2; i++) { 385 | in[i] *= (i / (float) (NumSamples / 2)); 386 | in[i + (NumSamples / 2)] *= 387 | (1.0F - (i / (float) (NumSamples / 2))); 388 | } 389 | } 390 | 391 | if (whichFunction == 2) { 392 | // Hamming 393 | for (i = 0; i < NumSamples; i++) 394 | in[i] *= 0.54F - 0.46F * (float) cos(2 * M_PI * i / (NumSamples - 1)); 395 | } 396 | 397 | if (whichFunction == 3) { 398 | // Hanning 399 | for (i = 0; i < NumSamples; i++) 400 | in[i] *= 0.50F - 0.50F * (float) cos(2 * M_PI * i / (NumSamples - 1)); 401 | } 402 | } 403 | -------------------------------------------------------------------------------- /score-align.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 14 | 15 | 16 | 17 | 18 | 26 | 29 | 32 | 35 | 38 | 41 | 55 | 58 | 61 | 64 | 73 | 76 | 79 | 82 | 85 | 88 | 91 | 94 | 97 | 98 | 106 | 109 | 112 | 115 | 118 | 121 | 131 | 134 | 137 | 140 | 150 | 153 | 156 | 159 | 162 | 165 | 168 | 171 | 174 | 175 | 176 | 177 | 178 | 179 | 184 | 187 | 188 | 191 | 192 | 195 | 196 | 199 | 200 | 203 | 204 | 207 | 208 | 211 | 212 | 215 | 216 | 219 | 222 | 225 | 230 | 231 | 234 | 239 | 240 | 241 | 244 | 247 | 252 | 253 | 256 | 261 | 262 | 263 | 266 | 267 | 270 | 273 | 278 | 279 | 282 | 287 | 288 | 289 | 292 | 295 | 300 | 301 | 304 | 309 | 310 | 311 | 314 | 317 | 322 | 323 | 326 | 331 | 332 | 333 | 336 | 339 | 344 | 345 | 348 | 353 | 354 | 355 | 358 | 361 | 366 | 367 | 370 | 375 | 376 | 377 | 378 | 383 | 384 | 385 | 390 | 393 | 394 | 397 | 398 | 401 | 402 | 405 | 406 | 409 | 410 | 413 | 414 | 417 | 418 | 421 | 422 | 425 | 426 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | /* main.cpp -- the command line interface for scorealign 2 | * 3 | * 14-Jul-08 RBD 4 | */ 5 | 6 | #include "stdio.h" 7 | #include "main.h" 8 | #include 9 | #include "allegro.h" 10 | #include "audioreader.h" 11 | #include "scorealign.h" 12 | #include "sautils.h" 13 | #include "alignfiles.h" 14 | #include "gen_chroma.h" 15 | #include "comp_chroma.h" 16 | 17 | // a global object with score alignment parameters and data 18 | Scorealign sa; 19 | 20 | static void print_usage(char *progname) 21 | { 22 | printf("\nUsage: %s [- [ " 23 | " ]] []\n", progname); 24 | printf(" specifying only simply transcribes MIDI in " 25 | "to\n"); 26 | printf(" transcription.txt. Otherwise, align and .\n"); 27 | printf(" -h 0.25 indicates a frame period of 0.25 seconds\n"); 28 | printf(" -w 0.25 indicates a window size of 0.25 seconds. \n"); 29 | printf(" -r indicates filename to write raw alignment path to " 30 | "(default path.data)\n"); 31 | printf(" -s is filename to write smoothed alignment path(default is " 32 | "smooth.data)\n"); 33 | printf(" -t is filename to write the time aligned transcription " 34 | "(default is transcription.txt)\n"); 35 | printf(" -m is filename to write the time aligned midi file " 36 | "(default is midi.mid)\n"); 37 | printf(" -b is filename to write the time aligned beat times " 38 | "(default is beatmap.txt)\n"); 39 | printf(" -i is filename to write an image of the distance matrix " 40 | "(default is distance.pnm)\n"); 41 | printf(" -o 2.0 indicates a smoothing window time of 2.0s\n"); 42 | printf(" -p 3.0 indicates presmoothing with a 3s window\n"); 43 | printf(" -x 6.0 indicates 6s line segment approximation\n"); 44 | #if (defined (_WIN32) || defined (WIN32)) 45 | printf(" This is a Unix style command line application which\n" 46 | " should be run in a MSDOS box or Command Shell window.\n\n"); 47 | printf(" Type RETURN to exit.\n") ; 48 | getchar(); 49 | #endif 50 | } /* print_usage */ 51 | 52 | 53 | /* SAVE_SMOOTH_FILE 54 | saves the smooth time map in SMOOTH_FILENAME 55 | 56 | */ 57 | void save_smooth_file(char *smooth_filename, Scorealign &sa) { 58 | FILE *smoothf = fopen(smooth_filename, "w"); 59 | assert(smoothf); 60 | for (int i = 0; i < sa.file1_frames; i++) { 61 | fprintf(smoothf, "%g \t%g\n", i * sa.actual_frame_period_1, 62 | sa.smooth_time_map[i] * sa.actual_frame_period_2); 63 | } 64 | fclose(smoothf); 65 | } 66 | 67 | 68 | /* PRINT_BEAT_MAP 69 | prints the allegro beat_map (for debugging) which contain 70 | the time, beat pair for a song 71 | */ 72 | void print_beat_map(Alg_seq &seq, char *filename) { 73 | 74 | FILE *beatmap_print = fopen(filename, "w"); 75 | 76 | Alg_beats &b = seq.get_time_map()->beats; 77 | long num_beats = seq.get_time_map()->length(); 78 | 79 | for(int i = 0; i < num_beats; i++) { 80 | fprintf(beatmap_print," %f %f \n", b[i].beat, b[i].time); 81 | } 82 | fclose(beatmap_print); 83 | 84 | } 85 | 86 | 87 | /* EDIT_TRANSCRIPTION 88 | edit the allegro time map structure according 89 | to the warping and output a midi file and transcription 90 | file 91 | 92 | */ 93 | void edit_transcription(Alg_seq &seq , bool warp, FILE *outf, 94 | char *midi_filename, char *beat_filename) { 95 | int note_x = 1; 96 | seq.convert_to_seconds(); 97 | Alg_iterator iterator(&seq, true); 98 | iterator.begin(); 99 | 100 | Alg_event_ptr e = iterator.next(); 101 | 102 | while (e) { 103 | if (e->is_note()) { 104 | Alg_note_ptr n = (Alg_note_ptr) e; 105 | fprintf(outf, "%d %d %d %d ", 106 | note_x++, n->chan, ROUND(n->pitch), ROUND(n->loud)); 107 | // now compute onset time mapped to audio time 108 | double start = n->time; 109 | double finish = n->time + n->dur; 110 | if (warp) { 111 | start = sa.map_time(start); 112 | finish = sa.map_time(finish); 113 | } 114 | fprintf(outf, "%.3f %.3f\n", start, finish-start); 115 | } 116 | e = iterator.next(); 117 | } 118 | iterator.end(); 119 | fclose(outf); 120 | if (warp) { 121 | // align the midi file and write out 122 | sa.midi_tempo_align(seq, true); 123 | seq.smf_write(midi_filename); 124 | print_beat_map(seq, beat_filename); 125 | } 126 | } 127 | 128 | 129 | // save image of distance matrix 130 | void save_image(char *image_filename, Scorealign &sa) 131 | { 132 | FILE *outf = fopen(image_filename, "wb"); 133 | float max_d = 0.0; 134 | float min_d = 999999.0; 135 | fputs("P5\n", outf); 136 | fprintf(outf, "%d %d 255\n", sa.file1_frames, sa.file2_frames); 137 | for (int row = 0; row < sa.file2_frames; row++) { 138 | for (int col = 0; col < sa.file1_frames; col++) { 139 | float d = gen_dist(row, col, sa.chrom_energy2, sa.chrom_energy1); 140 | if (d > max_d) max_d = d; 141 | if (d < min_d) min_d = d; 142 | int pixel = (int) (255 * (d / 6.0) + 0.5); 143 | if (pixel > 255) pixel = 255; 144 | putc(pixel, outf); 145 | } 146 | } 147 | fclose(outf); 148 | printf("max distance %g, min distance %g\n", max_d, min_d); 149 | } 150 | 151 | 152 | /* SAVE_TRANSCRIPTION 153 | write note data corresponding to audio file 154 | 155 | assume audio file is file 1 and midi file is file 2 156 | so pathx is index into audio, pathy is index into MIDI 157 | 158 | If warp is false, simply write a transcription of the midi file. 159 | 160 | Every note has 6 fields separated by a space character. The fields are: 161 | 162 | Where 163 | is just an integer note number, e.g. 1, 2, 3, ... 164 | is MIDI channel from 0 to 15 165 | is MIDI key number (60 = middle C) 166 | is MIDI key velocity (1 to 127) 167 | is time in seconds, rounded to 3 decimal places (milliseconds) 168 | is time in seconds, rounded to 3 decimal places 169 | */ 170 | void save_transcription(char *file1, char *file2, 171 | bool warp, char *filename, char *smooth_filename, 172 | char *midi_filename, char *beat_filename) 173 | { 174 | 175 | char *midiname; //midi file to be read 176 | char *audioname; //audio file to be read 177 | 178 | if (warp) save_smooth_file(smooth_filename, sa); 179 | 180 | //If either is a midifile 181 | if (is_midi_file(file1) || is_midi_file(file2)) { 182 | 183 | if (is_midi_file(file1)) { 184 | midiname=file1; 185 | audioname=file2; 186 | } else { 187 | midiname=file2; 188 | audioname=file1; 189 | } 190 | 191 | Alg_seq seq(midiname, true); 192 | 193 | FILE *outf = fopen(filename, "w"); 194 | if (!outf) { 195 | printf("Error: could not open %s\n", filename); 196 | return; 197 | } 198 | fprintf(outf, "# transcription of %s\n", midiname); 199 | if (warp) { 200 | fprintf(outf, "# note times are aligned to %s\n", audioname); 201 | } else { 202 | fprintf(outf, "# times are unmodified from those in MIDI file\n"); 203 | } 204 | fprintf(outf, "# transcription format : " 205 | " \n"); 206 | 207 | edit_transcription(seq, warp, outf, midi_filename, beat_filename); 208 | } 209 | } 210 | 211 | 212 | /* SAVE_PATH 213 | write the alignment path to FILENAME 214 | */ 215 | void save_path(char *filename, int pathlen, short* pathx, short *pathy, 216 | float actual_frame_period_1, float actual_frame_period_2) 217 | { 218 | // print the path to a (plot) file 219 | FILE *pathf = fopen(filename, "w"); 220 | assert(pathf); 221 | int p; 222 | for (p = 0; p < pathlen; p++) { 223 | fprintf(pathf, "%g %g\n", pathx[p] * actual_frame_period_1, 224 | pathy[p] * actual_frame_period_2); 225 | } 226 | fclose(pathf); 227 | } 228 | 229 | 230 | /* 231 | Prints the chroma table (for debugging) 232 | */ 233 | 234 | void print_chroma_table(float *chrom_energy, int frames) 235 | { 236 | int i, j; 237 | for (j = 0; j < frames; j++) { 238 | for (i = 0; i <= CHROMA_BIN_COUNT; i++) { 239 | printf("%5.2f | ", AREF2(chrom_energy, j, i)); 240 | } 241 | printf("\n"); 242 | } 243 | } 244 | 245 | 246 | int main(int argc, char *argv []) 247 | { 248 | char *progname, *infilename1, *infilename2; 249 | char *smooth_filename, *path_filename, *trans_filename; 250 | char *midi_filename, *beat_filename, *image_filename; 251 | 252 | //just transcribe if trasncribe == 1 253 | int transcribe = 0; 254 | 255 | // Default for the user definable parameters 256 | 257 | path_filename = "path.data"; 258 | smooth_filename = "smooth.data"; 259 | trans_filename = "transcription.txt"; 260 | midi_filename = "midi.mid"; 261 | beat_filename = "beatmap.txt"; 262 | image_filename = "distance.pnm"; 263 | 264 | progname = strrchr(argv [0], '/'); 265 | progname = progname ? progname + 1 : argv[0] ; 266 | 267 | // If no arguments, return usage 268 | if (argc < 2) { 269 | print_usage(progname); 270 | return 1; 271 | } 272 | 273 | 274 | 275 | /*******PARSING CODE BEGINS*********/ 276 | int i = 1; 277 | while (i < argc) { 278 | //expected flagged argument 279 | if (argv[i][0] == '-') { 280 | char flag = argv[i][1]; 281 | if (flag == 'h') { 282 | sa.frame_period = atof(argv[i+1]); 283 | } else if (flag == 'w') { 284 | sa.window_size = atof(argv[i+1]); 285 | } else if (flag == 'r') { 286 | path_filename = argv[i+1]; 287 | } else if (flag == 's') { 288 | smooth_filename = argv[i+1]; 289 | } else if (flag == 't') { 290 | trans_filename = argv[i+1]; 291 | } else if (flag == 'm') { 292 | midi_filename = argv[i+1]; 293 | } else if (flag == 'i') { 294 | image_filename = argv[i+1]; 295 | } else if (flag == 'b') { 296 | beat_filename = argv[i+1]; 297 | } else if (flag == 'o') { 298 | sa.smooth_time = atof(argv[i+1]); 299 | } else if (flag == 'p') { 300 | sa.presmooth_time = atof(argv[i+1]); 301 | } else if (flag == 'x') { 302 | sa.line_time = atof(argv[i+1]); 303 | } 304 | i++; 305 | } 306 | // When aligning audio to midi we must force file1 to be midi 307 | else { 308 | // file 1 is midi 309 | if (transcribe == 0) { 310 | infilename1 = argv[i]; 311 | transcribe++; 312 | } 313 | // file 2 is audio or a second midi 314 | else { 315 | infilename2 = argv[i]; 316 | transcribe++; 317 | } 318 | } 319 | i++; 320 | } 321 | /**********END PARSING ***********/ 322 | if (sa.presmooth_time > 0 && sa.line_time > 0) { 323 | printf("WARNING: both -p and -x options selected.\n"); 324 | } 325 | #if DEBUG_LOG 326 | dbf = fopen("debug-log.txt", "w"); 327 | assert(dbf); 328 | #endif 329 | 330 | if (transcribe == 1) { 331 | // if only one midi file, just write transcription and exit, 332 | // no alignment 333 | save_transcription(infilename1, "", false, trans_filename,NULL, NULL, NULL); 334 | printf("Wrote %s\n", trans_filename); 335 | goto finish; 336 | } 337 | 338 | 339 | // if midi only in infilename2, make it infilename1 340 | if (is_midi_file(infilename2) && !is_midi_file(infilename1)) { 341 | char *temp; 342 | temp = infilename1; 343 | infilename1 = infilename2; 344 | infilename2 = temp; 345 | } 346 | 347 | if (!align_files(infilename1, infilename2, sa, true /* verbose */)) { 348 | printf("An error occurred, not saving path and transcription data\n"); 349 | goto finish; 350 | } 351 | if (sa.file1_frames <= 2 || sa.file2_frames <= 2) { 352 | printf("Error: file frame counts are low: %d (for input 1) and %d " 353 | "for input 2)\n...not saving path and transcription data\n", 354 | sa.file1_frames, sa.file2_frames); 355 | goto finish; 356 | } 357 | // save path 358 | save_path(path_filename, sa.pathlen, sa.pathx, sa.pathy, 359 | sa.actual_frame_period_1, sa.actual_frame_period_2); 360 | // save image of distance matrix 361 | save_image(image_filename, sa); 362 | // save smooth, midi, transcription 363 | save_transcription(infilename1, infilename2, true, trans_filename, 364 | smooth_filename, midi_filename, beat_filename); 365 | 366 | // print what the chroma matrix looks like 367 | /* 368 | printf("file1 chroma table: \n"); 369 | print_chroma_table(chrom_energy1,file1_frames); 370 | printf("\nfile2 chroma table: \n"); 371 | print_chroma_table(chrom_energy2, file2_frames); 372 | */ 373 | 374 | // only path and smooth are written when aligning two audio files 375 | if (is_midi_file(infilename1) || is_midi_file(infilename2)) 376 | printf("Wrote %s, %s, %s, and %s.", path_filename, smooth_filename, 377 | trans_filename, beat_filename); 378 | else 379 | printf("Wrote %s and %s.", path_filename, smooth_filename); 380 | 381 | finish: 382 | #if DEBUG_LOG 383 | fclose(dbf); 384 | #endif 385 | 386 | return 0 ; 387 | } /* main */ 388 | 389 | 390 | /* print_path_range -- debugging output */ 391 | /**/ 392 | void print_path_range(short *pathx, short *pathy, int i, int j) 393 | { 394 | while (i <= j) { 395 | printf("%d %d\n", pathx[i], pathy[i]); 396 | i++; 397 | } 398 | } 399 | 400 | 401 | -------------------------------------------------------------------------------- /gen_chroma.cpp: -------------------------------------------------------------------------------- 1 | 2 | #ifdef _WIN32 3 | #include "malloc.h" 4 | #endif 5 | #include "stdlib.h" // for OSX compatibility, malloc.h -> stdlib.h 6 | #include "stdio.h" 7 | #include "assert.h" 8 | #include "string.h" 9 | #include "math.h" 10 | #include 11 | #include "allegro.h" 12 | #include "fft3/FFT3.h" 13 | #include "audioreader.h" 14 | #include "scorealign.h" 15 | #include "gen_chroma.h" 16 | #include "comp_chroma.h" 17 | #include "mfmidi.h" 18 | #include "sautils.h" 19 | #ifdef SA_VERBOSE 20 | #include // cout 21 | #endif 22 | using namespace std; 23 | 24 | //if 1, causes printing internally 25 | #define PRINT_BIN_ENERGY 1 26 | 27 | #define p1 0.0577622650466621 28 | #define p2 2.1011784386926213 29 | 30 | // each row is one chroma vector, 31 | // data is stored as an array of chroma vectors: 32 | // vector 1, vector 2, ... 33 | #define CHROM(row, column) AREF2((*chrom_energy), row, column) 34 | 35 | float hz_to_step(float hz) 36 | { 37 | return float((log(hz) - p2) / p1); 38 | } 39 | 40 | /* GEN_MAGNITUDE 41 | given the real and imaginary portions of a complex FFT function, compute 42 | the magnitude of the fft bin. 43 | given input of 2 arrays (inR and inI) of length n, takes the ith element 44 | from each, squares them, sums them, takes the square root of the sum and 45 | puts the output into the ith position in the array out. 46 | 47 | NOTE: out should be length n 48 | */ 49 | void gen_Magnitude(float* inR,float* inI, int low, int hi, float* out) 50 | { 51 | int i; 52 | for (i = low; i < hi; i++) { 53 | float magVal = sqrt(inR[i] * inR[i] + inI[i] * inI[i]); 54 | //printf(" %d: sqrt(%g^2+%g^2)=%g\n",i,inR[i],inI[i+1],magVal); 55 | out[i]= magVal; 56 | #ifdef SA_VERBOSE 57 | if (i == 1000) printf("gen_Magnitude: %d %g\n", i, magVal); 58 | #endif 59 | } 60 | } 61 | 62 | 63 | /* PRINT_BINS 64 | This function is intended for debugging purposes. 65 | pass in an array representing the "mid point" 66 | of each bin, and the number of bins. The 67 | function will print out: 68 | i value 69 | index falue 70 | low range of the bin 71 | middle of the bin 72 | high range of the bin 73 | */ 74 | void print_Bins(float* bins, int numBins){ 75 | printf("BINS: \n"); 76 | int i; 77 | for (i=0; i bins[i]) { 106 | minValue = bins[i]; 107 | minIndex = i; 108 | } 109 | } 110 | return minIndex; 111 | } 112 | 113 | 114 | /* GEN_HAMMING 115 | given data from reading in a section of a sound file 116 | applies the hamming function to each sample. 117 | n specifies the length of in and out. 118 | */ 119 | void gen_Hamming(float* in, int n, float* out) 120 | { 121 | int k = 0; 122 | for(k = 0; k < n; k++) { 123 | float internalValue = 2.0 * M_PI * k * (1.0 / (n - 1)); 124 | float cosValue = cos(internalValue); 125 | float hammingValue = 0.54F + (-0.46F * cosValue); 126 | #ifdef SA_VERBOSE 127 | if (k == 1000) printf("Hamming %g\n", hammingValue); 128 | #endif 129 | out[k] = hammingValue * in[k]; 130 | } 131 | } 132 | 133 | /* NEXTPOWEROF2 134 | given an int n, finds the next power of 2 larger than 135 | or equal to n. 136 | */ 137 | int nextPowerOf2(int n) 138 | { 139 | int result = 1; 140 | while (result < n) result = (result << 1); 141 | return result; 142 | } 143 | 144 | 145 | /* GEN_CHROMA_AUDIO -- compute chroma for an audio file 146 | */ 147 | /* 148 | generates the chroma energy for a given sequence 149 | with a low cutoff and high cutoff. 150 | The chroma energy is placed in the float *chrom_energy. 151 | this 2D is an array of pointers. 152 | The function returns the number of frames 153 | (aka the length of the 1st dimention of chrom_energy) 154 | */ 155 | int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff, 156 | int lcutoff, float **chrom_energy, float *actual_frame_period, 157 | int id, bool verbose) 158 | { 159 | int i; 160 | double sample_rate = reader.get_sample_rate(); 161 | float reg11[CHROMA_BIN_COUNT]; // temp storage1; 162 | float reg12[CHROMA_BIN_COUNT]; // temp storage2; 163 | 164 | if (verbose) { 165 | printf ("==============FILE %d====================\n", id); 166 | reader.print_info(); 167 | } 168 | // this seems like a poor way to set actual_frame_period_1 or _2 in 169 | // the Scorealign object, but I'm not sure what would be better: 170 | *actual_frame_period = reader.actual_frame_period; 171 | 172 | for (i = 0; i < CHROMA_BIN_COUNT; i++) { 173 | reg11[i] = -999; 174 | } 175 | for (i = 0; i < CHROMA_BIN_COUNT; i++){ 176 | reg12[i] = 0; 177 | } 178 | 179 | /*=============================================================*/ 180 | 181 | // allocate some buffers for use in the loop 182 | int full_data_size = nextPowerOf2(reader.samples_per_frame); 183 | if (verbose) { 184 | printf(" samples per frame is %d \n", reader.samples_per_frame); 185 | printf(" total chroma frames %d\n", reader.frame_count); 186 | // printf(" Window size %g second \n", reader.window_size); 187 | printf(" hopsize in samples %d \n", reader.hop_samples); 188 | printf(" fft size %d\n", full_data_size); 189 | } 190 | 191 | float *full_data = ALLOC(float, full_data_size); 192 | float *fft_dataR = ALLOC(float, full_data_size); 193 | float *fft_dataI = ALLOC(float, full_data_size); 194 | //set to zero 195 | memset(full_data, 0, full_data_size * sizeof(float)); 196 | memset(fft_dataR, 0, full_data_size * sizeof(float)); 197 | memset(fft_dataI, 0, full_data_size * sizeof(float)); 198 | //check to see if memory has been allocated 199 | assert(full_data != NULL); 200 | assert(fft_dataR != NULL); 201 | assert(fft_dataI != NULL); 202 | 203 | int *bin_map = ALLOC(int, full_data_size); 204 | 205 | //set up the chrom_energy array; 206 | *chrom_energy = ALLOC(float, reader.frame_count * (CHROMA_BIN_COUNT + 1)); 207 | int cv_index = 0; 208 | 209 | // set up mapping from spectral bins to chroma bins 210 | // ordinarily, we would add 0.5 to round to nearest bin, but we also 211 | // want to subtract 0.5 because the bin has a width of +/- 0.5. These 212 | // two cancel out, so we can just round down and get the right answer. 213 | int num_bins_to_use = (int) (hcutoff * full_data_size / sample_rate); 214 | // But then we want to add 1 because the loops will only go to 215 | // high_bin - 1: 216 | int high_bin = min(num_bins_to_use + 1, full_data_size); 217 | //printf("center freq of high bin is %g\n", (high_bin - 1) * sample_rate / 218 | // full_data_size); 219 | //printf("high freq of high bin is %g\n", 220 | // (high_bin - 1 + 0.5) * sample_rate / full_data_size); 221 | // If we add 0.5, we'll round to nearest bin center frequency, but 222 | // bin covers a frequency range that goes 0.5 bin width lower, so we 223 | // add 1 before rounding. 224 | int low_bin = (int) (lcutoff * full_data_size / sample_rate); 225 | //printf("center freq of low bin is %g\n", low_bin * sample_rate / 226 | // full_data_size); 227 | //printf("low freq of low bin is %g\n", (low_bin - 0.5) * sample_rate / 228 | // full_data_size); 229 | //printf("frequency spacing of bins is %g\n", 230 | // sample_rate / full_data_size); 231 | double freq = low_bin * sample_rate / full_data_size; 232 | for (i = low_bin; i < high_bin; i++) { 233 | float raw_bin = hz_to_step(freq); 234 | int round_bin = (int) (raw_bin + 0.5F); 235 | int mod_bin = round_bin % 12; 236 | bin_map[i] = mod_bin; 237 | freq += sample_rate / full_data_size; 238 | } 239 | // printf("BIN_COUNT is !!!!!!!!!!!!! %d\n",CHROMA_BIN_COUNT); 240 | 241 | while (reader.read_window(full_data)) { 242 | //fill out array with 0's till next power of 2 243 | #ifdef SA_VERBOSE 244 | printf("samples_per_frame %d sample %g\n", reader.samples_per_frame, 245 | full_data[0]); 246 | #endif 247 | for (i = reader.samples_per_frame; i < full_data_size; i++) 248 | full_data[i] = 0; 249 | 250 | #ifdef AS_VERBOSE 251 | printf("preFFT: full_data[1000] %g\n", full_data[1000]); 252 | #endif 253 | 254 | //the data from the wave file, each point mult by a hamming value 255 | gen_Hamming(full_data, full_data_size, full_data); 256 | 257 | #ifdef SA_VERBOSE 258 | printf("preFFT: hammingData[1000] %g\n", full_data[1000]); 259 | #endif 260 | FFT3(full_data_size, 0, full_data, NULL, fft_dataR, fft_dataI); //fft3 261 | 262 | //given the fft, compute the energy of each point 263 | gen_Magnitude(fft_dataR, fft_dataI, low_bin, high_bin, full_data); 264 | 265 | /*------------------------------------- 266 | GENERATE BINS AND PUT 267 | THE CORRECT ENERGY IN 268 | EACH BIN, CORRESPONDING 269 | TO THE CORRECT PITCH 270 | -------------------------------------*/ 271 | 272 | float binEnergy[CHROMA_BIN_COUNT]; 273 | int binCount[CHROMA_BIN_COUNT]; 274 | 275 | for (i = 0; i < CHROMA_BIN_COUNT; i++) { 276 | binCount[i] = 0; 277 | binEnergy[i] = 0.0; 278 | } 279 | 280 | for (i = low_bin; i < high_bin; i++) { 281 | int mod_bin = bin_map[i]; 282 | binEnergy[mod_bin] += full_data[i]; 283 | binCount[mod_bin]++; 284 | } 285 | 286 | /*------------------------------------- 287 | END OF BIN GENERATION 288 | -------------------------------------*/ 289 | /* THE FOLLOWING LOOKS LIKE SOME OLD CODE TO COMPUTE 290 | * CHROMA FLUX, BUT IT IS NOT IN USE NOW 291 | 292 | if (PRINT_BIN_ENERGY) { 293 | float mao1; 294 | float sum=0.; 295 | 296 | for (i = 0; i < CHROMA_BIN_COUNT; i++) { 297 | reg12[i]=binEnergy[i] / binCount[i]; 298 | } 299 | 300 | if (reg11[0]==-999){ 301 | printf("Chroma Flux \n\n"); 302 | } else { 303 | for (i = 0; i < CHROMA_BIN_COUNT; i++) { 304 | } 305 | for (int k = 0; k < CHROMA_BIN_COUNT; k++) { 306 | float x = reg11[k]; 307 | float y = reg12[k]; 308 | float diff = x - y; 309 | sum += diff * diff; 310 | } 311 | mao1 = sqrt(sum); 312 | sequence++; 313 | sum = 0.; 314 | mao1 = 0.; 315 | } 316 | for (i = 0; i < CHROMA_BIN_COUNT; i++) { 317 | reg11[i]=reg12[i]; 318 | } 319 | //fclose(Pointer); 320 | } 321 | */ 322 | //put chrom energy into the returned array 323 | 324 | #ifdef SA_VERBOSE 325 | printf("cv_index %d\n", cv_index); 326 | #endif 327 | assert(cv_index < reader.frame_count); 328 | for (i = 0; i < CHROMA_BIN_COUNT; i++) 329 | CHROM(cv_index, i) = binEnergy[i] / binCount[i]; 330 | cv_index++; 331 | } // end of while ((readcount = read_mono_floats... 332 | 333 | free(fft_dataI); 334 | free(fft_dataR); 335 | free(full_data); 336 | if (verbose) 337 | printf("\nGenerated Chroma. file%d_frames is %i\n", id, file1_frames); 338 | return cv_index; 339 | } 340 | 341 | 342 | class Event_list { 343 | public: 344 | Alg_note_ptr note; 345 | Event_list *next; 346 | 347 | Event_list(Alg_event_ptr event_, Event_list *next_) { 348 | note = (Alg_note_ptr) event_; 349 | next = next_; 350 | } 351 | 352 | ~Event_list() { 353 | } 354 | }; 355 | typedef Event_list *Event_list_ptr; 356 | 357 | 358 | /* gen_chroma_midi -- generate chroma vectors for midi file */ 359 | /* 360 | generates the chroma energy for a given sequence 361 | with a low cutoff and high cutoff. 362 | The chroma energy is placed in the float *chrom_energy. 363 | this 2D is an array of pointers. 364 | The function returns the number of frames 365 | (aka the length of the 1st dimention of chrom_energy) 366 | * 367 | * 368 | Notes: keep a list of notes that are sounding. 369 | For each frame, 370 | zero the vector 371 | while next note starts before end of frame, insert note in list 372 | for each note in list, compute weight and add to vector. Remove 373 | if note ends before frame start time. 374 | How many frames? 375 | */ 376 | 377 | int Scorealign::gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff, 378 | float **chrom_energy, float *actual_frame_period, 379 | int id, bool verbose) 380 | { 381 | if (verbose) { 382 | printf ("==============FILE %d====================\n", id); 383 | SA_V(seq.write(cout, true)); 384 | } 385 | /*=============================================================*/ 386 | 387 | *actual_frame_period = (frame_period) ; // since we don't quantize to samples 388 | 389 | /*=============================================================*/ 390 | 391 | seq.convert_to_seconds(); 392 | /* find duration */ 393 | float dur = 0.0F; 394 | int nnotes = 0; 395 | nnotes= find_midi_duration(seq, &dur); 396 | 397 | /*================================================================*/ 398 | 399 | int frame_count= (int)ceil(((float)dur/ frame_period + 1)); 400 | 401 | /*================================================================*/ 402 | 403 | if (verbose) { 404 | printf(" note count = %d\n", nnotes); 405 | printf(" duration in sec = %f\n", dur); 406 | printf(" chroma frames %d\n", frame_count); 407 | } 408 | 409 | //set up the chrom_energy array; 410 | (*chrom_energy) = ALLOC(float, frame_count * (CHROMA_BIN_COUNT + 1)); 411 | Event_list_ptr list = NULL; 412 | Alg_iterator iterator(&seq, true); 413 | iterator.begin(); 414 | Alg_event_ptr event = iterator.next(); 415 | int cv_index; 416 | for (cv_index = 0; cv_index < frame_count; cv_index++) { 417 | 418 | /*====================================================*/ 419 | 420 | float frame_begin = max((cv_index * (frame_period)) - 421 | window_size/2 , 0.0F); //chooses zero if negative 422 | 423 | float frame_end= frame_begin +(window_size/2); 424 | /*============================================================*/ 425 | /* zero the vector */ 426 | for (int i = 0; i < CHROMA_BIN_COUNT; i++) CHROM(cv_index, i) = 0; 427 | /* add new notes that are in the frame */ 428 | while (event && event->time < frame_end) { 429 | if (event->is_note()) { 430 | list = new Event_list(event, list); 431 | } 432 | event = iterator.next(); 433 | } 434 | /* remove notes that are no longer sounding */ 435 | Event_list_ptr *ptr = &list; 436 | while (*ptr) { 437 | while ((*ptr) && 438 | (*ptr)->note->time + (*ptr)->note->dur < frame_begin) { 439 | Event_list_ptr temp = *ptr; 440 | *ptr = (*ptr)->next; 441 | delete temp; 442 | } 443 | if (*ptr) ptr = &((*ptr)->next); 444 | } 445 | for (Event_list_ptr item = list; item; item = item->next) { 446 | /* compute duration of overlap */ 447 | float overlap = 448 | min(frame_end, (float) (item->note->time + item->note->dur)) - 449 | max(frame_begin, (float) item->note->time); 450 | float velocity = item->note->loud; 451 | float weight = overlap * velocity; 452 | #if DEBUG_LOG 453 | fprintf(dbf, "%3d pitch %g key %d overlap %g velocity %g\n", 454 | cv_index, item->note->pitch, item->note->get_identifier(), 455 | overlap, velocity); 456 | #endif 457 | CHROM(cv_index, (int)item->note->pitch % 12) += weight; 458 | } 459 | #if DEBUG_LOG 460 | for (int i = 0; i < CHROMA_BIN_COUNT; i++) { 461 | fprintf(dbf, "%d:%g ", i, CHROM(cv_index, i)); 462 | } 463 | fprintf(dbf, "\n\n"); 464 | #endif 465 | } 466 | while (list) { 467 | Event_list_ptr temp = list; 468 | list = list->next; 469 | delete temp; 470 | } 471 | iterator.end(); 472 | if (verbose) 473 | printf("\nGenerated Chroma. file%d_frames is %i\n", id, file1_frames); 474 | return frame_count; 475 | } 476 | -------------------------------------------------------------------------------- /scorealign.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 42; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 3D6E91710E351D8300FE12E2 /* FFT3.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D6E916F0E351D8300FE12E2 /* FFT3.cpp */; }; 11 | 3D6E91720E351D8300FE12E2 /* FFT3.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D6E91700E351D8300FE12E2 /* FFT3.h */; }; 12 | 3D848DC60E2C391300EDB5E3 /* audiofilereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D848DC50E2C391300EDB5E3 /* audiofilereader.cpp */; }; 13 | 3D848DC90E2C392600EDB5E3 /* audioreader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D848DC80E2C392600EDB5E3 /* audioreader.cpp */; }; 14 | 3D848E540E2CE2B300EDB5E3 /* alignfiles.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D848E530E2CE2B300EDB5E3 /* alignfiles.cpp */; }; 15 | 3D848E7A0E2CEC4200EDB5E3 /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D848E790E2CEC4200EDB5E3 /* main.cpp */; }; 16 | 3D9037AD0CC7CB3C00F46FD5 /* comp_chroma.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037AB0CC7CB3C00F46FD5 /* comp_chroma.cpp */; }; 17 | 3D9037AE0CC7CB3C00F46FD5 /* comp_chroma.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037AC0CC7CB3C00F46FD5 /* comp_chroma.h */; }; 18 | 3D9037B10CC7CB9C00F46FD5 /* gen_chroma.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037AF0CC7CB9C00F46FD5 /* gen_chroma.cpp */; }; 19 | 3D9037B20CC7CB9C00F46FD5 /* gen_chroma.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037B00CC7CB9C00F46FD5 /* gen_chroma.h */; }; 20 | 3D9037B80CC7D16200F46FD5 /* regression.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037B70CC7D16200F46FD5 /* regression.cpp */; }; 21 | 3D9037BA0CC7D18400F46FD5 /* regression.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037B90CC7D18400F46FD5 /* regression.h */; }; 22 | 3D9037BD0CC7D18F00F46FD5 /* scorealign.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037BB0CC7D18F00F46FD5 /* scorealign.cpp */; }; 23 | 3D9037BE0CC7D18F00F46FD5 /* scorealign.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037BC0CC7D18F00F46FD5 /* scorealign.h */; }; 24 | 3D9037E20CC7E2E000F46FD5 /* allegro.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037E10CC7E2E000F46FD5 /* allegro.cpp */; }; 25 | 3D9037E40CC7E2F700F46FD5 /* allegro.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037E30CC7E2F700F46FD5 /* allegro.h */; }; 26 | 3D9037E60CC7E30600F46FD5 /* allegrord.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037E50CC7E30600F46FD5 /* allegrord.cpp */; }; 27 | 3D9037E80CC7E31300F46FD5 /* allegrosmfrd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037E70CC7E31300F46FD5 /* allegrosmfrd.cpp */; }; 28 | 3D9037EA0CC7E31C00F46FD5 /* allegrowr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037E90CC7E31C00F46FD5 /* allegrowr.cpp */; }; 29 | 3D9037EC0CC7E32400F46FD5 /* allegrosmfwr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037EB0CC7E32400F46FD5 /* allegrosmfwr.cpp */; }; 30 | 3D9037EF0CC7E33200F46FD5 /* mfmidi.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037ED0CC7E33200F46FD5 /* mfmidi.cpp */; }; 31 | 3D9037F30CC7E33D00F46FD5 /* strparse.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9037F10CC7E33D00F46FD5 /* strparse.cpp */; }; 32 | 3D9037F40CC7E33D00F46FD5 /* strparse.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037F20CC7E33D00F46FD5 /* strparse.h */; }; 33 | 3D9037FF0CC7E46700F46FD5 /* mfmidi.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9037FE0CC7E46700F46FD5 /* mfmidi.h */; }; 34 | 3D9038800CC9B54000F46FD5 /* curvefit.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D90387E0CC9B54000F46FD5 /* curvefit.h */; }; 35 | 3D9038810CC9B54000F46FD5 /* curvefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D90387F0CC9B54000F46FD5 /* curvefit.cpp */; }; 36 | 3D9038880CCA25E100F46FD5 /* hillclimb.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9038860CCA25E100F46FD5 /* hillclimb.h */; }; 37 | 3D9038890CCA25E100F46FD5 /* hillclimb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9038870CCA25E100F46FD5 /* hillclimb.cpp */; }; 38 | 3D9038DE0CCA815D00F46FD5 /* sautils.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 3D9038DC0CCA815D00F46FD5 /* sautils.h */; }; 39 | 3D9038DF0CCA815D00F46FD5 /* sautils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3D9038DD0CCA815D00F46FD5 /* sautils.cpp */; }; 40 | 8DD76F6A0486A84900D96B5E /* scorealign.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6859E8B029090EE04C91782 /* scorealign.1 */; }; 41 | /* End PBXBuildFile section */ 42 | 43 | /* Begin PBXCopyFilesBuildPhase section */ 44 | 8DD76F690486A84900D96B5E /* CopyFiles */ = { 45 | isa = PBXCopyFilesBuildPhase; 46 | buildActionMask = 8; 47 | dstPath = /usr/share/man/man1/; 48 | dstSubfolderSpec = 0; 49 | files = ( 50 | 8DD76F6A0486A84900D96B5E /* scorealign.1 in CopyFiles */, 51 | 3D9037AE0CC7CB3C00F46FD5 /* comp_chroma.h in CopyFiles */, 52 | 3D9037B20CC7CB9C00F46FD5 /* gen_chroma.h in CopyFiles */, 53 | 3D9037BA0CC7D18400F46FD5 /* regression.h in CopyFiles */, 54 | 3D9037BE0CC7D18F00F46FD5 /* scorealign.h in CopyFiles */, 55 | 3D9037E40CC7E2F700F46FD5 /* allegro.h in CopyFiles */, 56 | 3D9037F40CC7E33D00F46FD5 /* strparse.h in CopyFiles */, 57 | 3D9037FF0CC7E46700F46FD5 /* mfmidi.h in CopyFiles */, 58 | 3D9038800CC9B54000F46FD5 /* curvefit.h in CopyFiles */, 59 | 3D9038880CCA25E100F46FD5 /* hillclimb.h in CopyFiles */, 60 | 3D9038DE0CCA815D00F46FD5 /* sautils.h in CopyFiles */, 61 | 3D6E91720E351D8300FE12E2 /* FFT3.h in CopyFiles */, 62 | ); 63 | runOnlyForDeploymentPostprocessing = 1; 64 | }; 65 | /* End PBXCopyFilesBuildPhase section */ 66 | 67 | /* Begin PBXFileReference section */ 68 | 3D6E916F0E351D8300FE12E2 /* FFT3.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = FFT3.cpp; path = fft3/FFT3.cpp; sourceTree = ""; }; 69 | 3D6E91700E351D8300FE12E2 /* FFT3.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = FFT3.h; path = fft3/FFT3.h; sourceTree = ""; }; 70 | 3D848DC50E2C391300EDB5E3 /* audiofilereader.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = audiofilereader.cpp; sourceTree = ""; }; 71 | 3D848DC80E2C392600EDB5E3 /* audioreader.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = audioreader.cpp; sourceTree = ""; }; 72 | 3D848E530E2CE2B300EDB5E3 /* alignfiles.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = alignfiles.cpp; sourceTree = ""; }; 73 | 3D848E790E2CEC4200EDB5E3 /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = main.cpp; sourceTree = ""; }; 74 | 3D9037AB0CC7CB3C00F46FD5 /* comp_chroma.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = comp_chroma.cpp; sourceTree = ""; }; 75 | 3D9037AC0CC7CB3C00F46FD5 /* comp_chroma.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = comp_chroma.h; sourceTree = ""; }; 76 | 3D9037AF0CC7CB9C00F46FD5 /* gen_chroma.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = gen_chroma.cpp; sourceTree = ""; }; 77 | 3D9037B00CC7CB9C00F46FD5 /* gen_chroma.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = gen_chroma.h; sourceTree = ""; }; 78 | 3D9037B70CC7D16200F46FD5 /* regression.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = regression.cpp; sourceTree = ""; }; 79 | 3D9037B90CC7D18400F46FD5 /* regression.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = regression.h; sourceTree = ""; }; 80 | 3D9037BB0CC7D18F00F46FD5 /* scorealign.cpp */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.cpp.cpp; path = scorealign.cpp; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 81 | 3D9037BC0CC7D18F00F46FD5 /* scorealign.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = scorealign.h; sourceTree = ""; }; 82 | 3D9037E10CC7E2E000F46FD5 /* allegro.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = allegro.cpp; path = ../portsmf/allegro.cpp; sourceTree = SOURCE_ROOT; }; 83 | 3D9037E30CC7E2F700F46FD5 /* allegro.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = allegro.h; path = ../portsmf/allegro.h; sourceTree = SOURCE_ROOT; }; 84 | 3D9037E50CC7E30600F46FD5 /* allegrord.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = allegrord.cpp; path = ../portsmf/allegrord.cpp; sourceTree = SOURCE_ROOT; }; 85 | 3D9037E70CC7E31300F46FD5 /* allegrosmfrd.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = allegrosmfrd.cpp; path = ../portsmf/allegrosmfrd.cpp; sourceTree = SOURCE_ROOT; }; 86 | 3D9037E90CC7E31C00F46FD5 /* allegrowr.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = allegrowr.cpp; path = ../portsmf/allegrowr.cpp; sourceTree = SOURCE_ROOT; }; 87 | 3D9037EB0CC7E32400F46FD5 /* allegrosmfwr.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = allegrosmfwr.cpp; path = ../portsmf/allegrosmfwr.cpp; sourceTree = SOURCE_ROOT; }; 88 | 3D9037ED0CC7E33200F46FD5 /* mfmidi.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = mfmidi.cpp; path = ../portsmf/mfmidi.cpp; sourceTree = SOURCE_ROOT; }; 89 | 3D9037F10CC7E33D00F46FD5 /* strparse.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = strparse.cpp; path = ../portsmf/strparse.cpp; sourceTree = SOURCE_ROOT; }; 90 | 3D9037F20CC7E33D00F46FD5 /* strparse.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = strparse.h; path = ../portsmf/strparse.h; sourceTree = SOURCE_ROOT; }; 91 | 3D9037FE0CC7E46700F46FD5 /* mfmidi.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = mfmidi.h; path = ../portsmf/mfmidi.h; sourceTree = SOURCE_ROOT; }; 92 | 3D90387E0CC9B54000F46FD5 /* curvefit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = curvefit.h; sourceTree = ""; }; 93 | 3D90387F0CC9B54000F46FD5 /* curvefit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = curvefit.cpp; sourceTree = ""; }; 94 | 3D9038860CCA25E100F46FD5 /* hillclimb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hillclimb.h; sourceTree = ""; }; 95 | 3D9038870CCA25E100F46FD5 /* hillclimb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hillclimb.cpp; sourceTree = ""; }; 96 | 3D9038DC0CCA815D00F46FD5 /* sautils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sautils.h; sourceTree = ""; }; 97 | 3D9038DD0CCA815D00F46FD5 /* sautils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sautils.cpp; sourceTree = ""; }; 98 | 8DD76F6C0486A84900D96B5E /* scorealign */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = scorealign; sourceTree = BUILT_PRODUCTS_DIR; }; 99 | C6859E8B029090EE04C91782 /* scorealign.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = scorealign.1; sourceTree = ""; }; 100 | /* End PBXFileReference section */ 101 | 102 | /* Begin PBXFrameworksBuildPhase section */ 103 | 8DD76F660486A84900D96B5E /* Frameworks */ = { 104 | isa = PBXFrameworksBuildPhase; 105 | buildActionMask = 2147483647; 106 | files = ( 107 | ); 108 | runOnlyForDeploymentPostprocessing = 0; 109 | }; 110 | /* End PBXFrameworksBuildPhase section */ 111 | 112 | /* Begin PBXGroup section */ 113 | 08FB7794FE84155DC02AAC07 /* scorealign */ = { 114 | isa = PBXGroup; 115 | children = ( 116 | 08FB7795FE84155DC02AAC07 /* Source */, 117 | C6859E8C029090F304C91782 /* Documentation */, 118 | 1AB674ADFE9D54B511CA2CBB /* Products */, 119 | ); 120 | name = scorealign; 121 | sourceTree = ""; 122 | }; 123 | 08FB7795FE84155DC02AAC07 /* Source */ = { 124 | isa = PBXGroup; 125 | children = ( 126 | 3D6E916F0E351D8300FE12E2 /* FFT3.cpp */, 127 | 3D6E91700E351D8300FE12E2 /* FFT3.h */, 128 | 3D9038DC0CCA815D00F46FD5 /* sautils.h */, 129 | 3D9038DD0CCA815D00F46FD5 /* sautils.cpp */, 130 | 3D9038860CCA25E100F46FD5 /* hillclimb.h */, 131 | 3D9038870CCA25E100F46FD5 /* hillclimb.cpp */, 132 | 3D90387E0CC9B54000F46FD5 /* curvefit.h */, 133 | 3D90387F0CC9B54000F46FD5 /* curvefit.cpp */, 134 | 3D9037F10CC7E33D00F46FD5 /* strparse.cpp */, 135 | 3D9037F20CC7E33D00F46FD5 /* strparse.h */, 136 | 3D9037ED0CC7E33200F46FD5 /* mfmidi.cpp */, 137 | 3D9037EB0CC7E32400F46FD5 /* allegrosmfwr.cpp */, 138 | 3D9037E90CC7E31C00F46FD5 /* allegrowr.cpp */, 139 | 3D848DC80E2C392600EDB5E3 /* audioreader.cpp */, 140 | 3D848E790E2CEC4200EDB5E3 /* main.cpp */, 141 | 3D848DC50E2C391300EDB5E3 /* audiofilereader.cpp */, 142 | 3D9037E70CC7E31300F46FD5 /* allegrosmfrd.cpp */, 143 | 3D9037E50CC7E30600F46FD5 /* allegrord.cpp */, 144 | 3D9037E30CC7E2F700F46FD5 /* allegro.h */, 145 | 3D848E530E2CE2B300EDB5E3 /* alignfiles.cpp */, 146 | 3D9037E10CC7E2E000F46FD5 /* allegro.cpp */, 147 | 3D9037BC0CC7D18F00F46FD5 /* scorealign.h */, 148 | 3D9037B90CC7D18400F46FD5 /* regression.h */, 149 | 3D9037B70CC7D16200F46FD5 /* regression.cpp */, 150 | 3D9037AF0CC7CB9C00F46FD5 /* gen_chroma.cpp */, 151 | 3D9037B00CC7CB9C00F46FD5 /* gen_chroma.h */, 152 | 3D9037AB0CC7CB3C00F46FD5 /* comp_chroma.cpp */, 153 | 3D9037FE0CC7E46700F46FD5 /* mfmidi.h */, 154 | 3D9037AC0CC7CB3C00F46FD5 /* comp_chroma.h */, 155 | 3D9037BB0CC7D18F00F46FD5 /* scorealign.cpp */, 156 | ); 157 | name = Source; 158 | sourceTree = ""; 159 | }; 160 | 1AB674ADFE9D54B511CA2CBB /* Products */ = { 161 | isa = PBXGroup; 162 | children = ( 163 | 8DD76F6C0486A84900D96B5E /* scorealign */, 164 | ); 165 | name = Products; 166 | sourceTree = ""; 167 | }; 168 | C6859E8C029090F304C91782 /* Documentation */ = { 169 | isa = PBXGroup; 170 | children = ( 171 | C6859E8B029090EE04C91782 /* scorealign.1 */, 172 | ); 173 | name = Documentation; 174 | sourceTree = ""; 175 | }; 176 | /* End PBXGroup section */ 177 | 178 | /* Begin PBXNativeTarget section */ 179 | 8DD76F620486A84900D96B5E /* scorealign */ = { 180 | isa = PBXNativeTarget; 181 | buildConfigurationList = 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "scorealign" */; 182 | buildPhases = ( 183 | 8DD76F640486A84900D96B5E /* Sources */, 184 | 8DD76F660486A84900D96B5E /* Frameworks */, 185 | 8DD76F690486A84900D96B5E /* CopyFiles */, 186 | ); 187 | buildRules = ( 188 | ); 189 | dependencies = ( 190 | ); 191 | name = scorealign; 192 | productInstallPath = "$(HOME)/bin"; 193 | productName = scorealign; 194 | productReference = 8DD76F6C0486A84900D96B5E /* scorealign */; 195 | productType = "com.apple.product-type.tool"; 196 | }; 197 | /* End PBXNativeTarget section */ 198 | 199 | /* Begin PBXProject section */ 200 | 08FB7793FE84155DC02AAC07 /* Project object */ = { 201 | isa = PBXProject; 202 | buildConfigurationList = 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "scorealign" */; 203 | hasScannedForEncodings = 1; 204 | mainGroup = 08FB7794FE84155DC02AAC07 /* scorealign */; 205 | projectDirPath = ""; 206 | targets = ( 207 | 8DD76F620486A84900D96B5E /* scorealign */, 208 | ); 209 | }; 210 | /* End PBXProject section */ 211 | 212 | /* Begin PBXSourcesBuildPhase section */ 213 | 8DD76F640486A84900D96B5E /* Sources */ = { 214 | isa = PBXSourcesBuildPhase; 215 | buildActionMask = 2147483647; 216 | files = ( 217 | 3D9037AD0CC7CB3C00F46FD5 /* comp_chroma.cpp in Sources */, 218 | 3D9037B10CC7CB9C00F46FD5 /* gen_chroma.cpp in Sources */, 219 | 3D9037B80CC7D16200F46FD5 /* regression.cpp in Sources */, 220 | 3D9037BD0CC7D18F00F46FD5 /* scorealign.cpp in Sources */, 221 | 3D9037E20CC7E2E000F46FD5 /* allegro.cpp in Sources */, 222 | 3D9037E60CC7E30600F46FD5 /* allegrord.cpp in Sources */, 223 | 3D9037E80CC7E31300F46FD5 /* allegrosmfrd.cpp in Sources */, 224 | 3D9037EA0CC7E31C00F46FD5 /* allegrowr.cpp in Sources */, 225 | 3D9037EC0CC7E32400F46FD5 /* allegrosmfwr.cpp in Sources */, 226 | 3D9037EF0CC7E33200F46FD5 /* mfmidi.cpp in Sources */, 227 | 3D9037F30CC7E33D00F46FD5 /* strparse.cpp in Sources */, 228 | 3D9038890CCA25E100F46FD5 /* hillclimb.cpp in Sources */, 229 | 3D9038810CC9B54000F46FD5 /* curvefit.cpp in Sources */, 230 | 3D9038DF0CCA815D00F46FD5 /* sautils.cpp in Sources */, 231 | 3D848DC60E2C391300EDB5E3 /* audiofilereader.cpp in Sources */, 232 | 3D848DC90E2C392600EDB5E3 /* audioreader.cpp in Sources */, 233 | 3D848E540E2CE2B300EDB5E3 /* alignfiles.cpp in Sources */, 234 | 3D848E7A0E2CEC4200EDB5E3 /* main.cpp in Sources */, 235 | 3D6E91710E351D8300FE12E2 /* FFT3.cpp in Sources */, 236 | ); 237 | runOnlyForDeploymentPostprocessing = 0; 238 | }; 239 | /* End PBXSourcesBuildPhase section */ 240 | 241 | /* Begin XCBuildConfiguration section */ 242 | 1DEB923208733DC60010E9CD /* Debug */ = { 243 | isa = XCBuildConfiguration; 244 | buildSettings = { 245 | COPY_PHASE_STRIP = NO; 246 | GCC_DYNAMIC_NO_PIC = NO; 247 | GCC_ENABLE_FIX_AND_CONTINUE = YES; 248 | GCC_MODEL_TUNING = G5; 249 | GCC_OPTIMIZATION_LEVEL = 0; 250 | INSTALL_PATH = "$(HOME)/bin"; 251 | OTHER_LDFLAGS = ( 252 | "-L/usr/local/lib", 253 | "-lsndfile", 254 | ); 255 | PRODUCT_NAME = scorealign; 256 | USER_HEADER_SEARCH_PATHS = "../portsmf/ /usr/local/include"; 257 | ZERO_LINK = YES; 258 | }; 259 | name = Debug; 260 | }; 261 | 1DEB923308733DC60010E9CD /* Release */ = { 262 | isa = XCBuildConfiguration; 263 | buildSettings = { 264 | ARCHS = ( 265 | ppc, 266 | i386, 267 | ); 268 | GCC_GENERATE_DEBUGGING_SYMBOLS = NO; 269 | GCC_MODEL_TUNING = G5; 270 | INSTALL_PATH = "$(HOME)/bin"; 271 | PRODUCT_NAME = scorealign; 272 | USER_HEADER_SEARCH_PATHS = ../portsmf/; 273 | }; 274 | name = Release; 275 | }; 276 | 1DEB923608733DC60010E9CD /* Debug */ = { 277 | isa = XCBuildConfiguration; 278 | buildSettings = { 279 | GCC_WARN_ABOUT_RETURN_TYPE = YES; 280 | GCC_WARN_UNUSED_VARIABLE = YES; 281 | PREBINDING = NO; 282 | SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk; 283 | USER_HEADER_SEARCH_PATHS = ../portsmf/; 284 | }; 285 | name = Debug; 286 | }; 287 | 1DEB923708733DC60010E9CD /* Release */ = { 288 | isa = XCBuildConfiguration; 289 | buildSettings = { 290 | GCC_WARN_ABOUT_RETURN_TYPE = YES; 291 | GCC_WARN_UNUSED_VARIABLE = YES; 292 | PREBINDING = NO; 293 | SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk; 294 | }; 295 | name = Release; 296 | }; 297 | /* End XCBuildConfiguration section */ 298 | 299 | /* Begin XCConfigurationList section */ 300 | 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "scorealign" */ = { 301 | isa = XCConfigurationList; 302 | buildConfigurations = ( 303 | 1DEB923208733DC60010E9CD /* Debug */, 304 | 1DEB923308733DC60010E9CD /* Release */, 305 | ); 306 | defaultConfigurationIsVisible = 0; 307 | defaultConfigurationName = Release; 308 | }; 309 | 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "scorealign" */ = { 310 | isa = XCConfigurationList; 311 | buildConfigurations = ( 312 | 1DEB923608733DC60010E9CD /* Debug */, 313 | 1DEB923708733DC60010E9CD /* Release */, 314 | ); 315 | defaultConfigurationIsVisible = 0; 316 | defaultConfigurationName = Release; 317 | }; 318 | /* End XCConfigurationList section */ 319 | }; 320 | rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; 321 | } 322 | -------------------------------------------------------------------------------- /scorealign.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #ifndef __MACH__ 7 | #include 8 | #endif 9 | #include 10 | #include "allegro.h" 11 | #include "audioreader.h" 12 | #include "scorealign.h" 13 | #include "gen_chroma.h" 14 | #include "comp_chroma.h" 15 | #include "curvefit.h" 16 | #include "mfmidi.h" 17 | #include "regression.h" 18 | #include "sautils.h" 19 | 20 | #if (defined (WIN32) || defined (_WIN32)) 21 | #define snprintf _snprintf 22 | #endif 23 | 24 | #define LOW_CUTOFF 40 25 | #define HIGH_CUTOFF 2000 26 | 27 | // Note: There are "verbose" flags passed as parameters that 28 | // enable some printing. The SA_VERBOSE compiler flag causes a 29 | // lot more debugging output, so it could be called VERY_VERBOSE 30 | // as opposed to the quieter verbose flags. 31 | 32 | #ifdef SA_VERBOSE 33 | #include "main.h" 34 | #endif 35 | 36 | // for presmoothing, how near does a point have to be to be "on the line" 37 | #define NEAR 1.5 38 | 39 | // path is file1_frames by file2_frames array, so first index 40 | // (rows) is in [0 .. file1_frames]. Array is sequence of rows. 41 | // columns (j) ranges from [0 .. file2_frames] 42 | #define PATH(i,j) (path[(i) * file2_frames + (j)]) 43 | 44 | /*===========================================================================*/ 45 | 46 | #if DEBUG_LOG 47 | FILE *dbf = NULL; 48 | #endif 49 | 50 | 51 | /* MAP_TIME 52 | lookup time of file1 in smooth_time_map and interpolate 53 | to get time in file2 54 | */ 55 | 56 | float Scorealign::map_time(float t1) 57 | { 58 | t1 /= actual_frame_period_1; // convert from seconds to frames 59 | int i = (int) t1; // round down 60 | if (i < 0) i = 0; 61 | if (i >= file1_frames - 1) i = file1_frames - 2; 62 | // interpolate to get time 63 | return actual_frame_period_2 * 64 | interpolate(i, smooth_time_map[i], i+1, smooth_time_map[i+1], 65 | t1); 66 | } 67 | 68 | 69 | /* FIND_MIDI_DURATION 70 | Finds the duration of a midi song where the end 71 | is defined by where the last note off occurs. Duration 72 | in seconds is given in DUR, and returns in int the number 73 | of notes in the song 74 | */ 75 | 76 | int find_midi_duration(Alg_seq &seq, float *dur) 77 | { 78 | *dur = 0.0F; 79 | int nnotes = 0; 80 | int i, j; 81 | seq.convert_to_seconds(); 82 | for (j = 0; j < seq.track_list.length(); j++) { 83 | Alg_events ¬es = (seq.track_list[j]); 84 | 85 | for (i = 0; i < notes.length(); i++) { 86 | Alg_event_ptr e = notes[i]; 87 | if (e->is_note()) { 88 | Alg_note_ptr n = (Alg_note_ptr) e; 89 | float note_end = n->time + n->dur; 90 | if (note_end > *dur) *dur = note_end; 91 | nnotes++; 92 | } 93 | } 94 | } 95 | return nnotes; 96 | } 97 | 98 | 99 | 100 | /* Returns the minimum of three values */ 101 | double min3(double x, double y, double z) 102 | { 103 | return (x < y ? 104 | (x < z ? x : z) : 105 | (y < z ? y : z)); 106 | } 107 | 108 | 109 | void save_frames(char *name, int frames, float **chrom_energy) 110 | { 111 | FILE *outf = fopen(name, "w"); 112 | int i,j; 113 | for (j=0; j < frames; j++) { 114 | float *chrom_energy_frame = chrom_energy[j]; 115 | for (i = 0; i <= CHROMA_BIN_COUNT; i++) { 116 | fprintf(outf, "%g ", chrom_energy_frame[i]); 117 | } 118 | fprintf(outf, "\n"); 119 | } 120 | fclose(outf); 121 | } 122 | 123 | 124 | /* steps through the dynamic programming path 125 | */ 126 | void Scorealign::path_step(int i, int j) 127 | { 128 | #if DEBUG_LOG 129 | fprintf(dbf, "(%i,%i) ", i, j); 130 | if (++path_count % 5 == 0 || 131 | (i == 0 && j == 0)) 132 | fprintf(dbf, "\n"); 133 | #endif 134 | pathx[pathlen] = i; 135 | pathy[pathlen] = j; 136 | pathlen++; 137 | } 138 | 139 | 140 | /* path_reverse -- path is computed from last to first, flip it */ 141 | /**/ 142 | void Scorealign::path_reverse() 143 | { 144 | int i = 0; 145 | int j = pathlen - 1; 146 | while (i < j) { 147 | short tempx = pathx[i]; short tempy = pathy[i]; 148 | pathx[i] = pathx[j]; pathy[i] = pathy[j]; 149 | pathx[j] = tempx; pathy[j] = tempy; 150 | i++; j--; 151 | } 152 | } 153 | 154 | /* 155 | Sees if the chroma energy vector is silent (indicated by the 12th element being one) 156 | Returns true if it is silent. False if it is not silent 157 | */ 158 | bool silent( int i, float *chrom_energy) 159 | { 160 | if (AREF2(chrom_energy, i,CHROMA_BIN_COUNT) == 1.0F) 161 | return true; 162 | else 163 | return false; 164 | 165 | } 166 | 167 | /* 168 | returns the first index in pathy where the element is bigger than sec 169 | */ 170 | int Scorealign::sec_to_pathy_index(float sec) 171 | { 172 | for (int i = 0 ; i < (file1_frames + file2_frames); i++) { 173 | if (smooth_time_map[i] * actual_frame_period_2 >= sec) { 174 | return i; 175 | } 176 | //printf("%i\n" ,pathy[i]); 177 | } 178 | return -1; 179 | } 180 | 181 | 182 | /* 183 | given a chrom_energy vector, sees how many 184 | of the inital frames are designated as silent 185 | */ 186 | 187 | int frames_of_init_silence( float *chrom_energy, int frame_count) 188 | { 189 | bool silence = true; 190 | int frames=0; 191 | while (silence) { 192 | if (silent(frames, chrom_energy)) 193 | frames++; 194 | else 195 | silence=false; 196 | } 197 | 198 | return frames; 199 | } 200 | 201 | 202 | /* COMPARE_CHROMA 203 | Perform Dynamic Programming to find optimal alignment 204 | */ 205 | void Scorealign::compare_chroma(bool verbose) 206 | { 207 | float *path; 208 | int x = 0; 209 | int y = 0; 210 | 211 | /* Allocate the distance matrix */ 212 | path = (float *) calloc(file1_frames * file2_frames, sizeof(float)); 213 | 214 | /* Initialize first row and column */ 215 | 216 | /* allow free skip over initial silence in either signal, but not both */ 217 | /* silence is indicated by a run of zeros along the first row and or 218 | * column, starting at the origin (0,0). After computing these runs, we 219 | * put the proper value at (0,0) 220 | */ 221 | if (verbose) printf("Performing silent skip DP \n"); 222 | PATH(0, 0) = (silent(0, chrom_energy1) ? 0 : 223 | gen_dist(0, 0, chrom_energy1, chrom_energy2)); 224 | for (int i = 1; i < file1_frames; i++) 225 | PATH(i, 0) = (PATH(i-1, 0) == 0 && silent(i, chrom_energy1) ? 0 : 226 | gen_dist(i, 0, chrom_energy1, chrom_energy2) + 227 | PATH(i-1, 0)); 228 | PATH(0, 0) = (silent(0, chrom_energy2) ? 0 : 229 | gen_dist(0, 0, chrom_energy1, chrom_energy2)); 230 | for (int j = 1; j < file2_frames; j++) 231 | PATH(0, j) = (PATH(0, j-1) == 0 && silent(j, chrom_energy2) ? 0 : 232 | gen_dist(0, j, chrom_energy1, chrom_energy2) + 233 | PATH(0, j-1)); 234 | /* first row and first column are done, put proper value at (0,0) */ 235 | PATH(0, 0) = (!silent(0, chrom_energy1) || !silent(0, chrom_energy2) ? 236 | gen_dist(0, 0, chrom_energy1, chrom_energy2) : 0); 237 | 238 | /* Perform DP for the rest of the matrix */ 239 | for (int i = 1; i < file1_frames; i++) 240 | for (int j = 1; j < file2_frames; j++) 241 | PATH(i, j) = gen_dist(i, j, chrom_energy1, chrom_energy2) + 242 | min3(PATH(i-1, j-1), PATH(i-1, j), PATH(i, j-1)); 243 | 244 | if (verbose) printf("Completed Dynamic Programming.\n"); 245 | 246 | 247 | x = file1_frames - 1; 248 | y = file2_frames - 1; 249 | 250 | //x and y are the ending points, it can end at either the end of midi, 251 | // or end of audio but not both 252 | pathx = ALLOC(short, (x + y + 2)); 253 | pathy = ALLOC(short, (x + y + 2)); 254 | 255 | assert(pathx != NULL); 256 | assert(pathy != NULL); 257 | 258 | // map from file1 time to file2 time 259 | time_map = ALLOC(float, file1_frames); 260 | smooth_time_map = ALLOC(float, file1_frames); 261 | 262 | #if DEBUG_LOG 263 | fprintf(dbf, "\nOptimal Path: "); 264 | #endif 265 | while (1) { 266 | /* Check for stopping */ 267 | if (x == 0 & y == 0) { 268 | path_step(0, 0); 269 | path_reverse(); 270 | break; 271 | } 272 | 273 | /* Print the current coordinate in the path*/ 274 | path_step(x, y); 275 | 276 | /* Check for the optimal path backwards*/ 277 | if (x > 0 && y > 0 && PATH(x-1, y-1) <= PATH(x-1, y) && 278 | PATH(x-1, y-1) <= PATH(x, y-1)) { 279 | x--; 280 | y--; 281 | } else if (x > 0 && y > 0 && PATH(x-1, y) <= PATH(x, y-1)) { 282 | x--; 283 | } else if (y > 0) { 284 | y--; 285 | } else if (x > 0) { 286 | x--; 287 | } 288 | } 289 | free(path); 290 | } 291 | 292 | 293 | 294 | void Scorealign::linear_regression(int n, int width, float &a, float &b) 295 | { 296 | int hw = (width - 1) / 2; // a more convenient form: 1/2 width 297 | // compute average of x = avg of time_map[i] 298 | float xsum = 0; 299 | float ysum = 0; 300 | float xavg, yavg; 301 | int i; 302 | for (i = n - hw; i <= n + hw; i++) { 303 | xsum += i; 304 | ysum += time_map[i]; 305 | } 306 | xavg = xsum / width; 307 | yavg = ysum / width; 308 | float num = 0; 309 | float den = 0; 310 | for (i = n - hw; i <= n + hw; i++) { 311 | num += (i - xavg) * (time_map[i] - yavg); 312 | den += (i - xavg) * (i - xavg); 313 | } 314 | b = num / den; 315 | a = yavg - b * xavg; 316 | } 317 | 318 | 319 | 320 | 321 | 322 | /* COMPUTE_SMOOTH_TIME_MAP 323 | compute regression line and estimate point at i 324 | 325 | Number of points in regression is smooth (an odd number). First 326 | index to compute is (smooth-1)/2. Use that line for the first 327 | (smooth+1)/2 points. The last index to compute is 328 | (file1_frames - (smooth+1)/2). Use that line for the last 329 | (smooth+1)/2 points. 330 | */ 331 | void Scorealign::compute_smooth_time_map() 332 | { 333 | // do the first points: 334 | float a, b; 335 | linear_regression((smooth - 1) / 2, smooth, a, b); 336 | int i; 337 | for (i = 0; i < (smooth + 1) / 2; i++) { 338 | smooth_time_map[i] = a + b*i; 339 | } 340 | 341 | // do the middle points: 342 | for (i = (smooth + 1) / 2; i < file1_frames - (smooth + 1) / 2; i++) { 343 | linear_regression(i, smooth, a, b); 344 | smooth_time_map[i] = a + b*i; 345 | 346 | #if DEBUG_LOG 347 | fprintf(dbf, "time_map[%d] = %g, smooth_time_map[%d] = %g\n", 348 | i, time_map[i], i, a + b*i); 349 | #endif 350 | 351 | } 352 | 353 | // do the last points 354 | linear_regression(file1_frames - (smooth + 1) / 2, smooth, a, b); 355 | for (i = file1_frames - (smooth + 1) / 2; i < file1_frames; i++) { 356 | smooth_time_map[i] = a + b*i; 357 | } 358 | 359 | 360 | } 361 | 362 | 363 | /* near_line -- see if point is near line */ 364 | /**/ 365 | bool near_line(float x1, float y1, float x2, float y2, float x, float y) 366 | { 367 | float exact_y; 368 | if (x1 == x) { 369 | exact_y = y1; 370 | } else { 371 | assert(x1 != x2); 372 | exact_y = y1 + (y2 - y1) * ((x - x1) / (x2 - x1)); 373 | } 374 | y = y - exact_y; 375 | return y < NEAR && y > -NEAR; 376 | } 377 | 378 | 379 | // path_copy -- copy a path for debugging 380 | short *path_copy(short *path, int len) 381 | { 382 | short *new_path = ALLOC(short, len); 383 | memcpy(new_path, path, len * sizeof(path[0])); 384 | return new_path; 385 | } 386 | 387 | 388 | /* presmooth -- try to remove typical dynamic programming errors 389 | * 390 | * A common problem is that the best path wanders off track a ways 391 | * and then comes back. The idea of presmoothing is to see if the 392 | * path is mostly a straight line. If so, adjust the points off of 393 | * the line to fall along the line. The variable presmooth_time is 394 | * the duration of the line. It is drawn between every pair of 395 | * points presmooth_time apart. If 25% of the first half of the line 396 | * falls within one frame of the path, and 25% of the second half of 397 | * the line falls within one frame of the path, then find the best 398 | * fit of the line to the points within 1 frame. Then adjust the middle 399 | * part of the line (from 25% to 75%) to fall along the line. 400 | * Note that all this curve fitting is done on integer coordinates. 401 | */ 402 | void Scorealign::presmooth() 403 | { 404 | int n = ROUND(presmooth_time / actual_frame_period_2); 405 | n = (n + 3) & ~3; // round up to multiple of 4 406 | int i = 0; 407 | while (pathx[i] + n < file2_frames) { 408 | /* line goes from i to i+n-1 */ 409 | int x1 = pathx[i]; 410 | int xmid = x1 + n/2; 411 | int x2 = x1 + n; 412 | int y1 = pathy[i]; 413 | int y2; 414 | int j; 415 | /* search for y2 = pathy[j] s.t. pathx[j] == x2 */ 416 | for (j = i + n; j < pathlen; j++) { 417 | if (pathx[j] == x2) { 418 | y2 = pathy[j]; 419 | break; 420 | } 421 | } 422 | Regression regr; 423 | /* see if line fits the data */ 424 | int k = i; 425 | int count = 0; 426 | while (pathx[k] < xmid) { // search first half 427 | if (near_line(x1, y1, x2, y2, pathx[k], pathy[k])) { 428 | count++; 429 | regr.point(pathx[k], pathy[k]); 430 | } 431 | k++; 432 | } 433 | /* see if points were close to line */ 434 | if (count < n/4) { 435 | i++; 436 | continue; 437 | } 438 | /* see if line fits top half of the data */ 439 | while (pathx[k] < x2) { 440 | if (near_line(x1, y1, x2, y2, pathx[k], pathy[k])) { 441 | count++; 442 | regr.point(pathx[k], pathy[k]); 443 | } 444 | k++; 445 | } 446 | /* see if points were close to line */ 447 | if (count < n/4) { 448 | i++; 449 | continue; 450 | } 451 | /* debug: */ 452 | SA_V(printf("presmoothing path from %d to %d:\n", i, j);) 453 | SA_V(print_path_range(pathx, pathy, i, j);) 454 | /* fit line to nearby points */ 455 | regr.regress(); 456 | /* adjust points to fall along line */ 457 | // basically reconstruct pathx and pathy from i to j 458 | short x = pathx[i]; 459 | short y = pathy[i]; 460 | k = i + 1; 461 | SA_V(printf("start loop: j %d, pathx %d, pathy %d\n", 462 | j, pathx[j], pathy[j]);) 463 | while (x < pathx[j] || y < pathy[j]) { 464 | SA_V(printf("top of loop: x %d, y %d\n", x, y);) 465 | // iteratively make an optional move in the +y direction 466 | // then make a move in the x direction 467 | // check y direction: want to move to y+1 if either we are below 468 | // the desired y coordinate or we are below the maximum slope 469 | // line (if y is too low, we'll have to go at sharper than 2:1 470 | // slope to get to pathx[j], pathy[j], which is bad 471 | int target_y = ROUND(regr.f(x)); 472 | SA_V(printf("target_y@%d %d, r %g, ", x, target_y, regr.f(x));) 473 | // but what if the line goes way below the last point? 474 | // we don't want to go below a diagonal through the last point 475 | int dist_to_last_point = pathx[j] - x; 476 | int minimum_y = pathy[j] - 2 * dist_to_last_point; 477 | if (target_y < minimum_y) { 478 | target_y = minimum_y; 479 | SA_V(printf("minimum_y %d, ", minimum_y);) 480 | } 481 | // alternatively, if line goes too high: 482 | int maximum_y = pathy[j] - dist_to_last_point / 2; 483 | if (target_y > maximum_y) { 484 | target_y = maximum_y; 485 | SA_V(printf("maximum y %d, ", maximum_y);) 486 | } 487 | // now advance to target_y 488 | if (target_y > y) { 489 | pathx[k] = x; 490 | pathy[k] = y + 1; 491 | SA_V(printf("up: pathx[%d] %d, pathy[%d] %d\n", 492 | k, pathx[k], k, pathy[k]);) 493 | k++; 494 | y++; 495 | } 496 | if (x < pathx[j]) { 497 | // now advance x 498 | x++; 499 | // y can either go horizontal or diagonal, i.e. y either 500 | // stays the same or increments by one 501 | target_y = ROUND(regr.f(x)); 502 | SA_V(printf("target_y@%d %d, r %g, ", x, target_y, regr.f(x));) 503 | if (target_y > y) y++; 504 | pathx[k] = x; 505 | pathy[k] = y; 506 | SA_V(printf("pathx[%d] %d, pathy[%d] %d\n", 507 | k, pathx[k], k, pathy[k]);) 508 | k++; 509 | } 510 | } 511 | // make sure new path is no longer than original path 512 | // the last point we wrote was k - 1 513 | k = k - 1; // the last point we wrote is now k 514 | // DEBUG 515 | if (k > j) { 516 | printf("oops: k %d, j %d\n", k, j); 517 | SA_V(print_path_range(pathx, pathy, i, k);) 518 | } 519 | assert(k <= j); 520 | // if new path is shorter than original, then fix up path 521 | if (k < j) { 522 | memmove(&pathx[k], &pathx[j], sizeof(pathx[0]) * (pathlen - j)); 523 | memmove(&pathy[k], &pathy[j], sizeof(pathy[0]) * (pathlen - j)); 524 | pathlen -= (j - k); 525 | } 526 | /* debug */ 527 | SA_V(printf("after presmoothing:\n");) 528 | SA_V(print_path_range(pathx, pathy, i, k);) 529 | /* since we adjusted the path, skip by 3/4 of n */ 530 | i = i + 3 * n/4; 531 | } 532 | } 533 | 534 | 535 | /* COMPUTE_REGRESSION_LINES 536 | computes the smooth time map from the path computed 537 | by dynamic programming 538 | 539 | */ 540 | void Scorealign::compute_regression_lines() 541 | { 542 | // first, compute the y value of the path at 543 | // each x value. If the path has multiple values 544 | // on x, take the average. 545 | int p = 0; 546 | int i; 547 | int upper, lower; 548 | for (i = 0; i < file1_frames; i++) { 549 | lower = pathy[p]; 550 | while (p < pathlen && pathx[p] == i) { 551 | upper = pathy[p]; 552 | p = p + 1; 553 | } 554 | time_map[i] = (lower + upper) * 0.5; 555 | } 556 | // now fit a line to the nearest WINDOW points and record the 557 | // line's y value for each x. 558 | compute_smooth_time_map(); 559 | } 560 | 561 | 562 | void Scorealign::midi_tempo_align(Alg_seq &seq, bool verbose) 563 | { 564 | // We create a new time map out of the alignment, and replace 565 | // the original time map in the Alg_seq sequence 566 | Alg_seq new_time_map_seq; 567 | 568 | /** align at all integer beats **/ 569 | int totalbeats; 570 | float dur_in_sec; 571 | find_midi_duration(seq, &dur_in_sec); 572 | // totalbeat = lastbeat + 1 and round up the beat 573 | totalbeats = (int) (seq.get_time_map()->time_to_beat(dur_in_sec) + 2); 574 | if (verbose) 575 | printf("midi duration = %f, totalbeats=%i \n", dur_in_sec, totalbeats); 576 | 577 | for (int i = 0; i < totalbeats; i++) { 578 | double newtime = map_time(seq.get_time_map()->beat_to_time(i)); 579 | if (newtime > 0) 580 | new_time_map_seq.insert_beat(newtime, (double) i); 581 | } 582 | seq.convert_to_beats(); 583 | seq.set_time_map(new_time_map_seq.get_time_map()); 584 | } 585 | 586 | 587 | // this routine performs an alignment by adjusting midi to match audio 588 | // 589 | void Scorealign::align_midi_to_audio(Alg_seq &seq, Audio_reader &reader, 590 | bool verbose) 591 | { 592 | /* Generate the chroma for file 1 593 | * This will always be the MIDI File when aligning midi with audio. 594 | */ 595 | file1_frames = gen_chroma_midi(seq, HIGH_CUTOFF, LOW_CUTOFF, 596 | &chrom_energy1, &actual_frame_period_1, 1, verbose); 597 | 598 | /* Generate the chroma for file 2 */ 599 | file2_frames = gen_chroma_audio(reader, HIGH_CUTOFF, LOW_CUTOFF, 600 | &chrom_energy2, &actual_frame_period_2, 2, verbose); 601 | 602 | align_chromagrams(verbose); 603 | } 604 | 605 | void Scorealign::align_audio_to_audio(Audio_reader &reader1, 606 | Audio_reader &reader2, bool verbose) 607 | { 608 | file1_frames = gen_chroma_audio(reader1, HIGH_CUTOFF, LOW_CUTOFF, 609 | &chrom_energy1, &actual_frame_period_1, 1, verbose); 610 | file2_frames = gen_chroma_audio(reader2, HIGH_CUTOFF, LOW_CUTOFF, 611 | &chrom_energy2, &actual_frame_period_2, 2, verbose); 612 | align_chromagrams(verbose); 613 | } 614 | 615 | 616 | void Scorealign::align_midi_to_midi(Alg_seq &seq1, Alg_seq &seq2, 617 | bool verbose) 618 | { 619 | file1_frames = gen_chroma_midi(seq1, HIGH_CUTOFF, LOW_CUTOFF, 620 | &chrom_energy1, &actual_frame_period_1, 1, verbose); 621 | 622 | file2_frames = gen_chroma_midi(seq2, HIGH_CUTOFF, LOW_CUTOFF, 623 | &chrom_energy2, &actual_frame_period_2, 2, verbose); 624 | 625 | align_chromagrams(verbose); 626 | } 627 | 628 | void Scorealign::align_chromagrams(bool verbose) 629 | { 630 | if (verbose) 631 | printf("\nGenerated Chroma.\n"); 632 | /* now that we have actual_frame_period_2, we can compute smooth */ 633 | // smooth is an odd number of frames that spans about smooth_time 634 | smooth = ROUND(smooth_time / actual_frame_period_2); 635 | if (smooth < 3) smooth = 3; 636 | if (!(smooth & 1)) smooth++; // must be odd 637 | if (verbose) { 638 | printf("smoothing time is %g\n", smooth_time); 639 | printf("smooth count is %d\n", smooth); 640 | } 641 | /* Normalize the chroma frames */ 642 | norm_chroma(file1_frames, chrom_energy1); 643 | SA_V(printf("Chromagram data for file 1:\n");) 644 | SA_V(print_chroma_table(chrom_energy1, file1_frames);) 645 | norm_chroma(file2_frames, chrom_energy2); 646 | SA_V(printf("Chromagram data for file 2:\n");) 647 | SA_V(print_chroma_table(chrom_energy2, file2_frames);) 648 | if (verbose) 649 | printf("Normalized Chroma.\n"); 650 | 651 | /* Compare the chroma frames */ 652 | compare_chroma(verbose); 653 | /* Compute the smooth time map now for use by curve-fitting */ 654 | compute_regression_lines(); 655 | /* if line_time is set, do curve-fitting */ 656 | if (line_time > 0.0) { 657 | curve_fitting(this, verbose); 658 | /* Redo the smooth time map after curve fitting or smoothing */ 659 | compute_regression_lines(); 660 | } 661 | /* if presmooth_time is set, do presmoothing */ 662 | if (presmooth_time > 0.0) { 663 | presmooth(); 664 | /* Redo the smooth time map after curve fitting or smoothing */ 665 | compute_regression_lines(); 666 | } 667 | } 668 | --------------------------------------------------------------------------------