├── .gitignore ├── LICENSE ├── README.md ├── data ├── fft.json ├── mfcc.json └── ulaw2pcm.json ├── index.js ├── mfcc.js ├── package.json ├── src └── mfcc.js └── test ├── 1khz.wav ├── speech.wav └── test.js /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | 5 | # Runtime data 6 | pids 7 | *.pid 8 | *.seed 9 | 10 | # Directory for instrumented libs generated by jscoverage/JSCover 11 | lib-cov 12 | 13 | # Coverage directory used by tools like istanbul 14 | coverage 15 | 16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 17 | .grunt 18 | 19 | # node-waf configuration 20 | .lock-wscript 21 | 22 | # Compiled binary addons (http://nodejs.org/api/addons.html) 23 | build/Release 24 | 25 | # Dependency directory 26 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git 27 | node_modules 28 | 29 | .DS_Store 30 | 31 | #VIM 32 | *.swp 33 | *.swo 34 | 35 | #Idea 36 | .idea 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Vail Systems (Chicago, IL) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mfcc 2 | Node.JS implementation of the MFCC (Mel Frequency Cepstrum Coefficients) algorithm. 3 | 4 | Uses the pure Javascript implementations: 5 | 6 | - Fast Fourier Transform, FFT-JS (https://www.npmjs.com/package/fft-js) 7 | - Discrete Cosine Transform, DCT (https://www.npmjs.com/package/dct) 8 | 9 | Utilizes the standard Mel Scale: 10 | 11 | m = 2595 log (1 + f/700) 12 | 13 | Provides options for customizing the low and high cutoff frequency as well as specifying a custom number of Mel banks. 14 | 15 | Note this is primarily written to be an instructional codebase, and although the mathematics is proven correct by our internal tests the code base is not optimized for production or real-time analysis. 16 | 17 | # Introduction 18 | 19 | Code in this project was made by following the tutorial here: 20 | 21 | [http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/](http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/) 22 | 23 | To compute the MFCC: 24 | 25 | 1. Frame samples into `N=2^X` sized buffers where `X` is an integer. 26 | 2. Pass `N` frames into the Cooley Tukey Fast Fourier Transform to produce `F=N/2` frequency bins. 27 | 3. Optionally perform a power pass `P=G(F)`. 28 | 4. Build a triangular mel-scale filter bank with `M` filters where `M` is the number of mel bands we desire. 29 | 5. For each filter `M`, apply to `P` and then add up the results, resulting in `M` mel-scale scalars (`Ms`). 30 | 6. Perform a discrete cosine transform on `Ms` and keep only the first 12 coefficients. 31 | 32 | The 12 coefficients are the MFCC (Mel-Frequency Cepstral Coefficients). 33 | 34 | # Concepts 35 | 36 | The reason the term 'Cepstrum' is used is that it is a play on spectrum. In ordinary practice, we perform a spectral analysis on 37 | time-domain data. However, in step (6) above we are performing a discrete cosine transform on information that is already in the 38 | frequency domain. As a result, the pseudo-spectral term cepstrum was invented. 39 | 40 | The reason for the discrete cosine transformation step is to both compress the mel-bands and to autocorrelate them. 41 | 42 | # Example 43 | 44 | var fft = require('fft-js'), 45 | MFCC = require('mfcc'); 46 | 47 | // 64 Sample Signal 48 | var signal = [1,0,-1,0,1,0,-1,0,1,0,-1,0,1,0,-1,0, 49 | 1,0,-1,0,1,0,-1,0,1,0,-1,0,1,0,-1,0, 50 | 1,0,-1,0,1,0,-1,0,1,0,-1,0,1,0,-1,0, 51 | 1,0,-1,0,1,0,-1,0,1,0,-1,0,1,0,-1,0]; 52 | 53 | // Get our 32 complex FFT Phasors 54 | var phasors = fft.fft(signal); 55 | 56 | // Get our 32 frequency magnitudes 57 | var mags = fft.util.fftMag(phasors); 58 | 59 | // Construct an MFCC with the characteristics we desire 60 | var mfcc = MFCC.construct(32, // Number of expected FFT magnitudes 61 | 20, // Number of Mel filter banks 62 | 300, // Low frequency cutoff 63 | 3500, // High frequency cutoff 64 | 8000); // Sample Rate (8khz) 65 | 66 | // Run our MFCC on the FFT magnitudes 67 | var coef = mfcc(mags); 68 | 69 | console.log(coef); 70 | 71 | # Command Line Example 72 | 73 | Processing the MFCC for a `.wav` file: 74 | 75 | node mfcc.js -w test/1khz.wav 76 | 77 | To see all available options: 78 | 79 | node mfcc.js 80 | 81 | # License 82 | 83 | The MIT License (MIT) 84 | 85 | Copyright (c) 2015 Vail Systems (Chicago, IL) 86 | 87 | Permission is hereby granted, free of charge, to any person obtaining a copy 88 | of this software and associated documentation files (the "Software"), to deal 89 | in the Software without restriction, including without limitation the rights 90 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 91 | copies of the Software, and to permit persons to whom the Software is 92 | furnished to do so, subject to the following conditions: 93 | 94 | The above copyright notice and this permission notice shall be included in all 95 | copies or substantial portions of the Software. 96 | 97 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 98 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 99 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 100 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 101 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 102 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 103 | SOFTWARE. 104 | -------------------------------------------------------------------------------- /data/fft.json: -------------------------------------------------------------------------------- 1 | [ 2 | "2.501355e-03","2.593649e-03","4.414261e-03","6.796888e-03","1.186498e-02","2.304970e-02","6.347384e-02","5.950284e+00","1.368039e+01","5.950122e+00","6.370711e-02","2.355956e-02","1.235338e-02","7.709333e-03","5.132264e-03","3.820439e-03","2.756116e-03","2.312987e-03","1.705814e-03","1.610769e-03","1.208178e-03","1.342253e-03","1.447967e-03","7.011019e-02","1.628496e-01","7.022385e-02","1.085920e-03","8.700015e-04","3.752826e-04","6.996035e-04","2.146980e-04","6.562428e-04","1.641641e-04" 3 | ] 4 | -------------------------------------------------------------------------------- /data/mfcc.json: -------------------------------------------------------------------------------- 1 | [ 5.904757629127576e-7, 2 | 0.0000013999298934710302, 3 | 0.0000013999298934710302, 4 | 0.000004265992436375758, 5 | 0.000004265992436375758, 6 | 0.000016099656669393937, 7 | 0.000016099656669393937, 8 | 0.00012208873831350304, 9 | 1.0729054448683637, 10 | 1.0729054448683637, 11 | 5.671305168245454, 12 | 1.0728470246934547, 13 | 0.00012298775347127576, 14 | 0.000016819783860412122, 15 | 0.000016819783860412122, 16 | 0.0000046244241643757575, 17 | 0.000004512329724240515, 18 | 4.4229558038548484e-7, 19 | 2.3018713349866664e-7, 20 | 1.6211845036875755e-7, 21 | 2.085467945260909e-7, 22 | 4.423315392981819e-8, 23 | 5.459524593966667e-8, 24 | 0.00007456717578512557, 25 | 0.0009528304892572517, 26 | 0.00007476521902917127 ] 27 | -------------------------------------------------------------------------------- /data/ulaw2pcm.json: -------------------------------------------------------------------------------- 1 | {"0": -32124, 2 | "1": -31100, 3 | "2": -30076, 4 | "3": -29052, 5 | "4": -28028, 6 | "5": -27004, 7 | "6": -25980, 8 | "7": -24956, 9 | "8": -23932, 10 | "9": -22908, 11 | "10": -21884, 12 | "11": -20860, 13 | "12": -19836, 14 | "13": -18812, 15 | "14": -17788, 16 | "15": -16764, 17 | "16": -15996, 18 | "17": -15484, 19 | "18": -14972, 20 | "19": -14460, 21 | "20": -13948, 22 | "21": -13436, 23 | "22": -12924, 24 | "23": -12412, 25 | "24": -11900, 26 | "25": -11388, 27 | "26": -10876, 28 | "27": -10364, 29 | "28": -9852, 30 | "29": -9340, 31 | "30": -8828, 32 | "31": -8316, 33 | "32": -7932, 34 | "33": -7676, 35 | "34": -7420, 36 | "35": -7164, 37 | "36": -6908, 38 | "37": -6652, 39 | "38": -6396, 40 | "39": -6140, 41 | "40": -5884, 42 | "41": -5628, 43 | "42": -5372, 44 | "43": -5116, 45 | "44": -4860, 46 | "45": -4604, 47 | "46": -4348, 48 | "47": -4092, 49 | "48": -3900, 50 | "49": -3772, 51 | "50": -3644, 52 | "51": -3516, 53 | "52": -3388, 54 | "53": -3260, 55 | "54": -3132, 56 | "55": -3004, 57 | "56": -2876, 58 | "57": -2748, 59 | "58": -2620, 60 | "59": -2492, 61 | "60": -2364, 62 | "61": -2236, 63 | "62": -2108, 64 | "63": -1980, 65 | "64": -1884, 66 | "65": -1820, 67 | "66": -1756, 68 | "67": -1692, 69 | "68": -1628, 70 | "69": -1564, 71 | "70": -1500, 72 | "71": -1436, 73 | "72": -1372, 74 | "73": -1308, 75 | "74": -1244, 76 | "75": -1180, 77 | "76": -1116, 78 | "77": -1052, 79 | "78": -988, 80 | "79": -924, 81 | "80": -876, 82 | "81": -844, 83 | "82": -812, 84 | "83": -780, 85 | "84": -748, 86 | "85": -716, 87 | "86": -684, 88 | "87": -652, 89 | "88": -620, 90 | "89": -588, 91 | "90": -556, 92 | "91": -524, 93 | "92": -492, 94 | "93": -460, 95 | "94": -428, 96 | "95": -396, 97 | "96": -372, 98 | "97": -356, 99 | "98": -340, 100 | "99": -324, 101 | "100": -308, 102 | "101": -292, 103 | "102": -276, 104 | "103": -260, 105 | "104": -244, 106 | "105": -228, 107 | "106": -212, 108 | "107": -196, 109 | "108": -180, 110 | "109": -164, 111 | "110": -148, 112 | "111": -132, 113 | "112": -120, 114 | "113": -112, 115 | "114": -104, 116 | "115": -96, 117 | "116": -88, 118 | "117": -80, 119 | "118": -72, 120 | "119": -64, 121 | "120": -56, 122 | "121": -48, 123 | "122": -40, 124 | "123": -32, 125 | "124": -24, 126 | "125": -16, 127 | "126": -8, 128 | "127": 0, 129 | "128": 32124, 130 | "129": 31100, 131 | "130": 30076, 132 | "131": 29052, 133 | "132": 28028, 134 | "133": 27004, 135 | "134": 25980, 136 | "135": 24956, 137 | "136": 23932, 138 | "137": 22908, 139 | "138": 21884, 140 | "139": 20860, 141 | "140": 19836, 142 | "141": 18812, 143 | "142": 17788, 144 | "143": 16764, 145 | "144": 15996, 146 | "145": 15484, 147 | "146": 14972, 148 | "147": 14460, 149 | "148": 13948, 150 | "149": 13436, 151 | "150": 12924, 152 | "151": 12412, 153 | "152": 11900, 154 | "153": 11388, 155 | "154": 10876, 156 | "155": 10364, 157 | "156": 9852, 158 | "157": 9340, 159 | "158": 8828, 160 | "159": 8316, 161 | "160": 7932, 162 | "161": 7676, 163 | "162": 7420, 164 | "163": 7164, 165 | "164": 6908, 166 | "165": 6652, 167 | "166": 6396, 168 | "167": 6140, 169 | "168": 5884, 170 | "169": 5628, 171 | "170": 5372, 172 | "171": 5116, 173 | "172": 4860, 174 | "173": 4604, 175 | "174": 4348, 176 | "175": 4092, 177 | "176": 3900, 178 | "177": 3772, 179 | "178": 3644, 180 | "179": 3516, 181 | "180": 3388, 182 | "181": 3260, 183 | "182": 3132, 184 | "183": 3004, 185 | "184": 2876, 186 | "185": 2748, 187 | "186": 2620, 188 | "187": 2492, 189 | "188": 2364, 190 | "189": 2236, 191 | "190": 2108, 192 | "191": 1980, 193 | "192": 1884, 194 | "193": 1820, 195 | "194": 1756, 196 | "195": 1692, 197 | "196": 1628, 198 | "197": 1564, 199 | "198": 1500, 200 | "199": 1436, 201 | "200": 1372, 202 | "201": 1308, 203 | "202": 1244, 204 | "203": 1180, 205 | "204": 1116, 206 | "205": 1052, 207 | "206": 988, 208 | "207": 924, 209 | "208": 876, 210 | "209": 844, 211 | "210": 812, 212 | "211": 780, 213 | "212": 748, 214 | "213": 716, 215 | "214": 684, 216 | "215": 652, 217 | "216": 620, 218 | "217": 588, 219 | "218": 556, 220 | "219": 524, 221 | "220": 492, 222 | "221": 460, 223 | "222": 428, 224 | "223": 396, 225 | "224": 372, 226 | "225": 356, 227 | "226": 340, 228 | "227": 324, 229 | "228": 308, 230 | "229": 292, 231 | "230": 276, 232 | "231": 260, 233 | "232": 244, 234 | "233": 228, 235 | "234": 212, 236 | "235": 196, 237 | "236": 180, 238 | "237": 164, 239 | "238": 148, 240 | "239": 132, 241 | "240": 120, 242 | "241": 112, 243 | "242": 104, 244 | "243": 96, 245 | "244": 88, 246 | "245": 80, 247 | "246": 72, 248 | "247": 64, 249 | "248": 56, 250 | "249": 48, 251 | "250": 40, 252 | "251": 32, 253 | "252": 24, 254 | "253": 16, 255 | "254": 8, 256 | "255": 0 257 | } 258 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | module.exports = require('./src/mfcc'); 2 | -------------------------------------------------------------------------------- /mfcc.js: -------------------------------------------------------------------------------- 1 | /*===========================================================================*\ 2 | * Experimental implementation of MFCC. 3 | * (c) Vail Systems. Joshua Jung and Ben Bryan. 2015 4 | * 5 | * This code is not designed to be highly optimized but as an educational 6 | * tool to understand the Mel-scale and its related coefficients used in 7 | * human speech analysis. 8 | \*===========================================================================*/ 9 | var program = require('commander'), 10 | fs = require('fs'), 11 | path = require('path'), 12 | wav = require('wav'), 13 | fft = require('fft-js'), 14 | Framer = require('signal-windows').framer, 15 | ham = undefined, 16 | mfcc = require('./'); 17 | 18 | program.version('0.1') 19 | .usage('[options]') 20 | .option('-v, --verbose', 'True for verbose output.', 0) 21 | .option('-d, --debug [type]', '0: none, 1: Output power spectrum, post-filterbank values, and Mel coefficients. 2: output filter banks used. 3: output frequency magnitudes. Default 0.', 0) 22 | .option('-w, --wav [wav]', 'Wave file path to process.', undefined) 23 | .option('-m, --minFrequency [number]', 'Low frequency cutoff for MFCC. Default 300', 300) 24 | .option('-x, --maxFrequency [number]', 'High frequency cutoff for MFCC. Default 3500', 3500) 25 | .option('-f, --numMelSpecFilters [number]', 'Number of mel spec filter banks to use. Default is 26.', 26) 26 | .option('-n, --samplesPerFrame [number]', 'Number of samples per frame to pass into the FFT. Default 128.', 128) 27 | .option('-s, --samplesPerStep [number]', 'Number of samples to step between each frame. Default is samplesPerFrame.', 128); 28 | 29 | program.parse(process.argv); 30 | 31 | if (program.wav === undefined) 32 | { 33 | console.log('Please provide a wave file to process.'); 34 | program.outputHelp(); 35 | process.exit(1); 36 | } 37 | 38 | program.minFrequency = parseInt(program.minFrequency); 39 | program.maxFrequency = parseInt(program.maxFrequency); 40 | program.numMelSpecFilters = parseInt(program.numMelSpecFilters); 41 | program.samplesPerFrame = parseInt(program.samplesPerFrame); 42 | program.samplesPerStep = parseInt(program.samplesPerStep); 43 | 44 | if (program.samplesPerFrame & (program.samplesPerFrame-1) !== 0) 45 | throw Error('Please provide a samplesPerFrame that is a power of 2 (e.g. 32, 64, 128, 256, etc.). Was: ' + program.samplesPerFrame); 46 | 47 | var mfcc, // We construct after loading the wav file and reading the header. 48 | framer, // Framer is also constructed after loading the wav file 49 | sampleRate; 50 | 51 | /*-----------------------------------------------------------------------------------*\ 52 | * .wav file 53 | \*-----------------------------------------------------------------------------------*/ 54 | var wr = new wav.Reader(); 55 | 56 | wr.on('data', function (buffer, offset, length) { 57 | framer.frame(buffer, function (frame, fIx) { 58 | if (frame.length != program.samplesPerFrame) return; 59 | 60 | var phasors = fft.fft(frame), 61 | phasorMagnitudes = fft.util.fftMag(phasors), 62 | result = mfcc(phasorMagnitudes, program.debug && true); 63 | 64 | if (program.debug == 1) 65 | { 66 | console.log('Frame ' + fIx); 67 | console.log('Frame ' + frame.join(',')); 68 | console.log('FFT ' + phasorMagnitudes.join(',')); 69 | console.log('Post-filters(' + result.melSpec.length + '): ' + result.melSpec.join(',')); 70 | console.log('Post-filters Log(' + result.melSpecLog.length + '): ' + result.melSpecLog.join(',')); 71 | console.log('Post-DCT: ' + result.melCoef.join(',')); 72 | } 73 | else if (program.debug == 2) 74 | { 75 | console.log('Filters: ', result.filters); 76 | } 77 | else if (program.debug == 3) 78 | { 79 | console.log(phasorMagnitudes.join(',')); 80 | } 81 | else if (!program.debug) 82 | { 83 | result = result.map(function (f) {return f.toFixed(4);}); 84 | 85 | console.log(fIx + ',' + result.join(',')); 86 | } 87 | }); 88 | }); 89 | 90 | wr.on('format', function (format) { 91 | 92 | var sampleRate = format.sampleRate; 93 | 94 | ham = require('signal-windows').windows.construct('ham', program.samplesPerFrame); 95 | 96 | var ulawMap = format.ulaw ? JSON.parse(fs.readFileSync('data/ulaw2pcm.json').toString()) : undefined; 97 | 98 | if (ulawMap) for (var k in ulawMap) ulawMap[k] = ulawMap[k]/32767; 99 | 100 | if (format.channels != 1) 101 | throw Error('Right now this MFCC code only works on single channel 8-bit wave files.'); 102 | if (format.bitDepth != 8) 103 | throw Error('Right now this MFCC code only works on single channel 8-bit wave files.'); 104 | 105 | // Breaks samples up into frames and runs them through a transform (map) if 106 | // provided. In our case we want to transform from u-law if the wave file is 107 | // formatted as such. 108 | // By default we force a 'hamming' window. 109 | framer = new Framer({ 110 | map: ulawMap, 111 | frameSize: program.samplesPerFrame, 112 | frameStep: program.samplesPerStep, 113 | scale: ham, 114 | sampleType: 'UInt8' 115 | }); 116 | 117 | mfcc = mfcc.construct(program.samplesPerFrame / 2, 118 | program.numMelSpecFilters, 119 | program.minFrequency, 120 | program.maxFrequency, 121 | format.sampleRate); 122 | }); 123 | 124 | wr.on('end', function () { 125 | process.exit(1); 126 | }); 127 | 128 | fs.createReadStream(program.wav).pipe(wr); 129 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mfcc", 3 | "version": "0.0.3", 4 | "description": "Node.js implementation of the MFCC audio speech analysis algorithm.", 5 | "license": "MIT", 6 | "homepage": "https://github.com/vail-systems/node-mfcc", 7 | "repository": "vail-systems/node-mfcc", 8 | "contributors": [ 9 | "Joshua Jung (http://www.joshjung.com)", 10 | "Ben Bryan " 11 | ], 12 | "engines": { 13 | "node": ">=0.12.0" 14 | }, 15 | "main": "index.js", 16 | "files": [ 17 | "index.js", 18 | "mfcc.js", 19 | "src", 20 | "test", 21 | "data" 22 | ], 23 | "keywords": [ 24 | "mfcc", 25 | "mel", 26 | "scale", 27 | "cepstrum", 28 | "coefficients", 29 | "frequency", 30 | "fft", 31 | "audio", 32 | "speech" 33 | ], 34 | "dependencies": { 35 | "commander": "^2.8.1", 36 | "dct": "^0.0.3", 37 | "fft-js": "0.0.9", 38 | "signal-windows": "^0.0.1", 39 | "wav": "^1.0.0" 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/mfcc.js: -------------------------------------------------------------------------------- 1 | /*===========================================================================*\ 2 | * Experimental implementation of MFCC. 3 | * (c) Vail Systems. Joshua Jung and Ben Bryan. 2015 4 | * 5 | * This code is not designed to be highly optimized but as an educational 6 | * tool to understand the Mel-scale and its related coefficients used in 7 | * human speech analysis. 8 | \*===========================================================================*/ 9 | var dct = require('dct'); 10 | 11 | module.exports = { 12 | /* 13 | * Given a set of amplitudes, estimates the power for those amplitudes. 14 | */ 15 | powerSpectrum: powerSpectrum, 16 | /* 17 | * Converts from hertz to the Mel-scale. Used by constructFilterBank. 18 | * 19 | * Based on the concept that human perception of an equidistant pitch decreases as 20 | * pitch increases. 21 | */ 22 | hzToMels: hzToMels, 23 | /* 24 | * Inverse of hzToMels. 25 | */ 26 | melsToHz: melsToHz, 27 | /* 28 | * Returns a filter bank with bankCount triangular filters distributed according to the mel scale. 29 | * 30 | * Focused specifically on human speech (300 hz - 8000 hz) 31 | * 32 | * Recommended values for u-law 8000 hz: 33 | * 34 | * - fftSize == 64 (128 bin FFT) 35 | * - bankCount == 31 36 | * - Low Frequency == 200 37 | * - High Frequency == 3500 38 | */ 39 | constructMelFilterBank: constructMelFilterBank, 40 | construct: construct 41 | }; 42 | 43 | function construct(fftSize, bankCount, lowFrequency, highFrequency, sampleRate) { 44 | if (!fftSize) throw Error('Please provide an fftSize'); 45 | if (!bankCount) throw Error('Please provide a bankCount'); 46 | if (!lowFrequency) throw Error('Please provide a low frequency cutoff.'); 47 | if (!highFrequency) throw Error('Please provide a high frequency cutoff.'); 48 | if (!sampleRate) throw Error('Please provide a valid sampleRate.'); 49 | 50 | var filterBank = constructMelFilterBank(fftSize, bankCount, lowFrequency, highFrequency, sampleRate); 51 | 52 | /** 53 | * Perform a full MFCC on a FFT spectrum. 54 | * 55 | * FFT Array passed in should contain frequency amplitudes only. 56 | * 57 | * Pass in truthy for debug if you wish to return outputs of each step (freq. powers, melSpec, and MelCoef) 58 | */ 59 | return function (fft, debug) { 60 | if (fft.length != fftSize) 61 | throw Error('Passed in FFT bins were incorrect size. Expected ' + fftSize + ' but was ' + fft.length); 62 | 63 | var //powers = powerSpectrum(fft), 64 | melSpec = filterBank.filter(fft), 65 | melSpecLog = melSpec.map(log), 66 | melCoef = dct(melSpecLog).slice(0,13), 67 | power = melCoef.splice(0,1); 68 | 69 | return debug ? { 70 | melSpec: melSpec, 71 | melSpecLog: melSpecLog, 72 | melCoef: melCoef, 73 | filters: filterBank, 74 | power: power 75 | } : melCoef; 76 | 77 | function log(m){return Math.log(1+m);}; 78 | } 79 | } 80 | 81 | function constructMelFilterBank(fftSize, nFilters, lowF, highF, sampleRate) { 82 | var bins = [], 83 | fq = [], 84 | filters = []; 85 | 86 | var lowM = hzToMels(lowF), 87 | highM = hzToMels(highF), 88 | deltaM = (highM - lowM) / (nFilters+1); 89 | 90 | // Construct equidistant Mel values between lowM and highM. 91 | for (var i = 0; i < nFilters; i++) { 92 | // Get the Mel value and convert back to frequency. 93 | // e.g. 200 hz <=> 401.25 Mel 94 | fq[i] = melsToHz(lowM + (i * deltaM)); 95 | 96 | // Round the frequency we derived from the Mel-scale to the nearest actual FFT bin that we have. 97 | // For example, in a 64 sample FFT for 8khz audio we have 32 bins from 0-8khz evenly spaced. 98 | bins[i] = Math.floor((fftSize+1) * fq[i] / (sampleRate/2)); 99 | } 100 | 101 | // Construct one cone filter per bin. 102 | // Filters end up looking similar to [... 0, 0, 0.33, 0.66, 1.0, 0.66, 0.33, 0, 0...] 103 | for (var i = 0; i < bins.length; i++) 104 | { 105 | filters[i] = []; 106 | var filterRange = (i != bins.length-1) ? bins[i+1] - bins[i] : bins[i] - bins[i-1]; 107 | filters[i].filterRange = filterRange; 108 | for (var f = 0; f < fftSize; f++) { 109 | // Right, outside of cone 110 | if (f > bins[i] + filterRange) filters[i][f] = 0.0; 111 | // Right edge of cone 112 | else if (f > bins[i]) filters[i][f] = 1.0 - ((f - bins[i]) / filterRange); 113 | // Peak of cone 114 | else if (f == bins[i]) filters[i][f] = 1.0; 115 | // Left edge of cone 116 | else if (f >= bins[i] - filterRange) filters[i][f] = 1.0 - (bins[i] - f) / filterRange; 117 | // Left, outside of cone 118 | else filters[i][f] = 0.0; 119 | } 120 | } 121 | 122 | // Store for debugging. 123 | filters.bins = bins; 124 | 125 | // Here we actually apply the filters one by one. Then we add up the results of each applied filter 126 | // to get the estimated power contained within that Mel-scale bin. 127 | // 128 | // First argument is expected to be the result of the frequencies passed to the powerSpectrum 129 | // method. 130 | return { 131 | filters: filters, 132 | lowMel: lowM, 133 | highMel: highM, 134 | deltaMel: deltaM, 135 | lowFreq: lowF, 136 | highFreq: highF, 137 | filter: function (freqPowers) { 138 | var ret = []; 139 | 140 | filters.forEach(function (filter, fIx) { 141 | var tot = 0; 142 | freqPowers.forEach(function (fp, pIx) { 143 | tot += fp * filter[pIx]; 144 | }); 145 | ret[fIx] = tot; 146 | }); 147 | return ret; 148 | } 149 | }; 150 | } 151 | 152 | function melsToHz(mels) { 153 | return 700 * (Math.exp(mels / 1127) - 1); 154 | } 155 | 156 | function hzToMels(hertz) { 157 | return 1127 * Math.log(1 + hertz/700); 158 | } 159 | 160 | /** 161 | * Estimate the power spectrum density from FFT amplitudes. 162 | */ 163 | function powerSpectrum(amplitudes) { 164 | var N = amplitudes.length; 165 | 166 | return amplitudes.map(function (a) { 167 | return (a * a) / N; 168 | }); 169 | } 170 | -------------------------------------------------------------------------------- /test/1khz.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vail-systems/node-mfcc/f67ca5e3cc7c7fc80cd4bcbed07962dbedc126e1/test/1khz.wav -------------------------------------------------------------------------------- /test/speech.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vail-systems/node-mfcc/f67ca5e3cc7c7fc80cd4bcbed07962dbedc126e1/test/speech.wav -------------------------------------------------------------------------------- /test/test.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert'), 2 | mfcc = require('../'); 3 | 4 | mfcc = new mfcc.construct(64, 32, 300, 3500, 8000); 5 | 6 | var flat = [1,1,1,1,1,1,1,1, 7 | 1,1,1,1,1,1,1,1, 8 | 1,1,1,1,1,1,1,1, 9 | 1,1,1,1,1,1,1,1, 10 | 1,1,1,1,1,1,1,1, 11 | 1,1,1,1,1,1,1,1, 12 | 1,1,1,1,1,1,1,1, 13 | 1,1,1,1,1,1,1,1]; 14 | 15 | describe('MFCC', function () { 16 | describe('Output should be valid', function () { 17 | it('Length Test', function () { 18 | var coef = mfcc(flat); 19 | 20 | assert.equal(coef.length, 12); 21 | }); 22 | 23 | it('Flat Test', function () { 24 | var coef = mfcc(flat); 25 | 26 | // First coefficient should be large and all the others should be low since 27 | // the input magnitudes are all flat. 28 | for (var i = 1; i < coef.length; i++) 29 | assert(Math.abs(coef[0]) > Math.abs(coef[i])); 30 | }); 31 | }); 32 | }); 33 | 34 | function equalWithThresh(val1, val2, threshold) { 35 | return (val1 > val2 - threshold) && 36 | (val1 < val2 + threshold); 37 | }; 38 | --------------------------------------------------------------------------------