├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── codec.js ├── compress.js ├── index.html ├── main.js ├── signal.js └── ss.png /.gitattributes: -------------------------------------------------------------------------------- 1 | LICENSE text eol=lf encoding=utf-8 2 | *.md text eol=lf encoding=utf-8 3 | *.html text eol=lf encoding=utf-8 4 | *.js text eol=lf encoding=utf-8 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 Shuhei Kuno 2 | https://github.com/redlily 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a 5 | copy of this software and associated documentation files (the 6 | "Software"), to deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to 10 | the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Webで動作する音声圧縮の実証実験 2 | 3 | ## 概要 4 | 5 | 修正離散コサイン変換 (Modified Discrete Cosine Transform : MDCT) を使用したWebブラウザ上でJavaScriptを使用して 6 | 現実的な時間のエンコード、デコード可能な軽量な音声圧縮フォーマットの開発を目的とした実証実験用のプログラムです。 7 | 8 | ## Webアプリケーション 9 | 10 | https://redlily.github.io/training-webaudio-compression 11 | 12 | 13 | 14 | ### 使い方 15 | 16 | 1. 音声ファイル(おすすめは無圧縮)を選択してデータを読み込ませます。 17 | 1. 圧縮オプションを選び圧縮を実行、数秒から数十秒で圧縮が完了します。 18 | 1. 圧縮が完了すると再生ボタンとダウンロードリンクが有効になります。 19 | 1. 再生を押すと圧縮データの再生を開始、ダウンロードを押すと圧縮データのダウンロードを行います。 20 | 1. 圧縮したデータはこのプログラムでデータ読み込み、再生が出来ます。 21 | 22 | ## データフォーマット 23 | 24 | ### ヘッダ 25 | 26 | |変数名|型|説明| 27 | |:---|:---|:---| 28 | |MAGIC_NUMBER|UINT32|マジックナンバー、"WAM0"が固定値| 29 | |DATA_SIZE|UINT32|データのバイトサイズ| 30 | |DATA_TYPE|UINT32|拡張用のデータタイプ、"SMD0"が固定値| 31 | |VERSION|UINT32|データのバージョン| 32 | |SAMPLE_RATE|UINT32|サンプリングレート| 33 | |CHANNEL_SIZE|UINT32|チャネル数、1がモノラル、2がステレオ| 34 | |SAMPLE_COUNT|UINT32|データに含まれるサンプル数| 35 | |FREQUENCY_RANGE|UINT16|周波数ブロックのサイズ| 36 | |FREQUENCY_UPPER_LIMIT|UINT16|周波数ブロックの上限値| 37 | |FREQUENCY_TABLE_SIZE|UINT16|周波数テーブルのサイズ| 38 | |-|UINT16|バイトアライメント調整用の領域、今後の拡張次第では何か数値が入るかも| 39 | |FRAME_COUNT|UINT32|データに含まれるフレーム数| 40 | |FRAME_DATA|FRAME[CHANNEL_SIZE * FRAME_COUNT]|フレーム配列、チャネル数が2の場合、左、右とフーレムが並ぶ| 41 | 42 | #### フレーム 43 | 44 | |変数名|型|説明| 45 | |:---|:---|:---| 46 | |MASTER_SCALE|UINT32|このフーレムの主音量| 47 | |SUB_SCALES|UINT4[8]|8つの周波数帯用の音量を調整するためのスケール値| 48 | |ENABLE_FREQUENCIES|1bit[FREQUENCY_UPPER_LIMIT]
or
ceil(log_2(FREQUENCY_UPPER_LIMIT))bit[FREQUENCY_TABLE_SIZE]|周波数の有効無効を収納した1bitのフラグ配列、もしくは有効な周波数のインデックスを収納した配列
バイト数の小さい方を使用し4バイトアライメントに適合するサイズにする| 49 | |FREQUENCY_VALUES|4bit[FREQUENCY_TABLE_SIZE]|有効な周波数の対数で符号化された数値| 50 | -------------------------------------------------------------------------------- /codec.js: -------------------------------------------------------------------------------- 1 | // サウンドエンコーダとデコーダの実装 2 | 3 | var wamCodec = wamCodec || {}; 4 | 5 | (function () { 6 | 7 | // マジックナンバー ORPH sound data format 8 | const MAGIC_NUMBER = 9 | ("O".charCodeAt(0)) | ("R".charCodeAt(0) << 8) | ("P".charCodeAt(0) << 16) | ("H".charCodeAt(0) << 24); 10 | // ファイルタイプ、 Simple Modified discrete cosine transform Data 11 | const FILE_TYPE_SMD0 = 12 | ("S".charCodeAt(0)) | ("M".charCodeAt(0) << 8) | ("D".charCodeAt(0) << 16) | ("0".charCodeAt(0) << 24); 13 | // SMD0形式のバージョン 14 | const SMD0_VERSION = 0; 15 | 16 | // ヘッダオフセット、マジックナンバー 17 | const HEADER_OFFSET_MAGIC_NUMBER = 0; 18 | // ヘッダオフセット、データサイズ 19 | const HEADER_OFFSET_DATA_SIZE = 4; 20 | // ヘッダオフセット、データタイプ、拡張用 21 | const HEADER_OFFSET_DATA_TYPE = 8; 22 | // ヘッダオフセット、バージョン 23 | const HEADER_OFFSET_VERSION = 12; 24 | // ヘッダオフセット、サンプリングレート 25 | const HEADER_OFFSET_SAMPLE_RATE = 16; 26 | // ヘッダオフセット、サンプル数 27 | const HEADER_OFFSET_SAMPLE_COUNT = 20; 28 | // ヘッダオフセット、フレーム数 29 | const HEADER_OFFSET_FRAME_COUNT = 24; 30 | // ヘッダオフセット、チャネル数、1がモノラル、2がステレオ 31 | const HEADER_OFFSET_CHANNEL_SIZE = 28; 32 | // ヘッダオフセット、周波数レンジ、2のべき乗の値を設定する必要がある 33 | const HEADER_OFFSET_FREQUENCY_RANGE = 30; 34 | // ヘッダオフセット、周波数の上限 35 | const HEADER_OFFSET_FREQUENCY_UPPER_LIMIT = 32; 36 | // ヘッダオフセット、周波数テーブルサイズ、32で割れる数を指定すると効率が良い 37 | const HEADER_OFFSET_FREQUENCY_TABLE_SIZE = 34; 38 | // ヘッダオフセット、データ 39 | const HEADER_OFFSET_DATA = 36; 40 | 41 | // フレームヘッダ、オフセット、振幅のメインスケール 42 | const FRAME_OFFSET_MASTER_SCALE = 0; 43 | // フーレムヘッダ、オフセット、振幅のサブスケール、4bitで8つのメインスケールからのスケール値を対数で保持する 44 | const FRAME_OFFSET_SUB_SCALE = 4; 45 | // フレームヘッダ、オフセット、データ 46 | const FRAME_OFFSET_DATA = 8; 47 | 48 | // 対数による量子化で使用する対数の底 49 | const BASE_OF_LOGARITHM = 2; 50 | 51 | // アサート 52 | function assert(test, message) { 53 | if (!test) throw new Error(message || "Failed to test."); 54 | } 55 | 56 | // Web Audio Media コーダ 57 | class WamCoder { 58 | 59 | constructor() { 60 | this.data = null; 61 | this.frameCount = 0; 62 | this.numChannels = 0; 63 | this.frequencyRange = 0; 64 | this.frequencyUpperLimit = 0; 65 | this.frequencyTableSize = 0; 66 | this.subScales = null; 67 | this.windowFunction = null; 68 | this.samples = null; 69 | this.indexBitSize = 0; 70 | this.indicesSize = 0; 71 | this.isIndexMode = false; 72 | } 73 | 74 | readHalfUbyte(offset, index) { 75 | return 0xf & (this.data.getUint8(offset) >>> (index << 2)); 76 | } 77 | 78 | writeHalfUbyte(offset, index, value) { 79 | this.data.setUint8( 80 | offset, 81 | (0xff & (this.data.getUint8(offset) & ~(0xf << (index << 2)))) | ((0xf & value) << (index << 2))); 82 | } 83 | 84 | // 窓関数となる配列を生成、窓の種類はVorbis窓 85 | setupWindowFunction() { 86 | this.windowFunction = new Float32Array(this.frequencyRange << 1); 87 | for (let i = 0; i < this.frequencyRange; ++i) { 88 | let value = Math.sin(Math.PI / 2 * Math.pow(Math.sin(Math.PI * (i / ((this.frequencyRange << 1) - 1))), 2)); 89 | this.windowFunction[i] = value; 90 | this.windowFunction[(this.frequencyRange << 1) - 1 - i] = value; 91 | } 92 | } 93 | 94 | // 窓関数をサンプルに適用する 95 | applyWindowFunction() { 96 | for (let i = 0; i < this.frequencyRange << 1; ++i) { 97 | this.samples[i] *= this.windowFunction[i]; 98 | } 99 | } 100 | 101 | getDataOffset(frame, channel) { 102 | return HEADER_OFFSET_DATA + 103 | (FRAME_OFFSET_DATA + 104 | (this.isIndexMode ? (this.indicesSize / 8) : (this.frequencyUpperLimit / 8)) + 105 | (this.frequencyTableSize >>> 1)) * 106 | (this.numChannels * frame + channel); 107 | } 108 | } 109 | 110 | // Web Audio Media エンコーダ 111 | class WamEncoder extends WamCoder { 112 | 113 | constructor(sampleRate, numChannels, frequencyRange, frequencyUpperLimit, frequencyTableSize, initSampleCount = 4096) { 114 | super(); 115 | 116 | this.sampleRate = sampleRate; 117 | this.numChannels = numChannels; 118 | this.frequencyRange = frequencyRange != null ? frequencyRange : 1024; 119 | this.frequencyUpperLimit = frequencyUpperLimit != null ? frequencyUpperLimit : this.frequencyRange; 120 | this.frequencyTableSize = frequencyTableSize != null ? frequencyTableSize : this.frequencyRange >>> 2; 121 | 122 | assert(this.sampleRate > 0); 123 | assert(this.numChannels > 0); 124 | assert(this.frequencyRange > 0); 125 | assert(this.frequencyRange % 32 == 0); // 効率を重視して32の倍数である必要がある 126 | assert(this.frequencyUpperLimit <= frequencyRange); 127 | assert(this.frequencyTableSize > 0); 128 | assert(this.frequencyTableSize % 8 == 0); // バイト境界を考慮して8の倍数である必要がある 129 | 130 | let initBufferSize = HEADER_OFFSET_DATA + 131 | (FRAME_OFFSET_DATA + (this.frequencyRange / 32) * 4 + this.frequencyTableSize) * 132 | this.numChannels * Math.ceil(initSampleCount / this.frequencyRange); 133 | 134 | this.data = new DataView(new ArrayBuffer(initBufferSize)); 135 | this.data.setUint32(HEADER_OFFSET_MAGIC_NUMBER, MAGIC_NUMBER); 136 | this.data.setUint32(HEADER_OFFSET_DATA_SIZE, 0); 137 | this.data.setUint32(HEADER_OFFSET_DATA_TYPE, FILE_TYPE_SMD0); 138 | this.data.setUint32(HEADER_OFFSET_VERSION, SMD0_VERSION); 139 | this.data.setUint32(HEADER_OFFSET_SAMPLE_RATE, this.sampleRate); 140 | this.data.setUint32(HEADER_OFFSET_SAMPLE_COUNT, 0); 141 | this.data.setUint32(HEADER_OFFSET_FRAME_COUNT, 0); 142 | this.data.setUint16(HEADER_OFFSET_CHANNEL_SIZE, this.numChannels); 143 | this.data.setUint16(HEADER_OFFSET_FREQUENCY_RANGE, this.frequencyRange); 144 | this.data.setUint16(HEADER_OFFSET_FREQUENCY_UPPER_LIMIT, this.frequencyUpperLimit); 145 | this.data.setUint16(HEADER_OFFSET_FREQUENCY_TABLE_SIZE, this.frequencyTableSize); 146 | 147 | this.setupWindowFunction(); 148 | 149 | this.indexBitSize = Math.ceil(Math.log2(this.frequencyUpperLimit)); 150 | this.indicesSize = Math.ceil(this.indexBitSize * this.frequencyTableSize / 32) * 32; 151 | this.isIndexMode = (1 << this.indexBitSize) > this.indicesSize; 152 | this.subScales = new Uint8Array(Math.min(this.indexBitSize, 8)); 153 | this.subScaleStart = this.frequencyUpperLimit / (1 << Math.min(Math.ceil(Math.log2(this.frequencyUpperLimit)), 7)); 154 | this.frequencyFlags = new Uint32Array(this.frequencyUpperLimit / 32); 155 | this.frequencies = new Float32Array(this.frequencyRange); 156 | this.frequencyPowers = new Float32Array(this.frequencyUpperLimit); 157 | this.samples = new Float32Array(this.frequencyRange << 1); 158 | this.prevInputs = new Array(this.numChannels); 159 | for (let i = 0; i < this.numChannels; ++i) { 160 | this.prevInputs[i] = new Float32Array(this.frequencyRange); 161 | } 162 | this.workBuffers = new Array(this.numChannels); 163 | for (let i = 0; i < this.numChannels; ++i) { 164 | this.workBuffers[i] = new Float32Array(this.frequencyRange); 165 | } 166 | this.workBufferOffset = 0; 167 | } 168 | 169 | writeFrame(inputData, start = 0, length = this.frequencyRange) { 170 | assert(inputData.length >= this.numChannels); 171 | assert(length <= this.frequencyRange && length >= 0); 172 | 173 | this.nextFrame(); 174 | for (let i = 0; i < this.numChannels; ++i) { 175 | let input = inputData[i]; 176 | let dataOffset = this.getDataOffset(this.frameCount - 1, i); 177 | 178 | // 前回の入力を処理バッファの前半に充填 179 | let prevInput = this.prevInputs[i]; 180 | for (let j = 0; j < this.frequencyRange; ++j) { 181 | this.samples[j] = prevInput[j]; 182 | } 183 | 184 | // 今回の入力を処理バッファの後半に充填し、次回の処理に備え保存 185 | for (let j = 0; j < length; ++j) { 186 | let value = input[start + j] * ((1 << 16) - 1); // [-1, 1]の数値を16bitの数値にスケール 187 | this.samples[this.frequencyRange + j] = value; 188 | prevInput[j] = value; 189 | } 190 | for (let j = length; j < this.frequencyRange; ++j) { 191 | this.samples[this.frequencyRange + j] = 0; 192 | prevInput[j] = 0; 193 | } 194 | 195 | // 窓関数をかける 196 | this.applyWindowFunction(); 197 | 198 | // MDCTをかける 199 | FastMDCT.mdct(this.frequencyRange, this.samples, this.frequencies); 200 | 201 | // 振幅のマスタスケールを書き出し 202 | let masterScale = 1; 203 | for (let j = 0; j < this.frequencyUpperLimit; ++j) { 204 | let power = Math.abs(this.frequencies[j]); 205 | if (power > masterScale) { 206 | masterScale = power; 207 | } 208 | } 209 | this.data.setUint32(dataOffset + FRAME_OFFSET_MASTER_SCALE, masterScale); 210 | 211 | // 振幅のサブスケールを書き出す 212 | for (let j = 0; j < this.subScales.length; ++j) { 213 | let subScale = 1; 214 | for (let k = j == 0 ? 0 : this.subScaleStart << (j - 1); k < this.subScaleStart << j && k < this.frequencyUpperLimit; ++k) { 215 | let power = Math.abs(this.frequencies[k]); 216 | if (power > subScale) { 217 | subScale = power; 218 | } 219 | } 220 | let power = Math.floor(Math.min(-Math.log(subScale / masterScale) / Math.log(BASE_OF_LOGARITHM) * 2, 15)); 221 | this.subScales[j] = power; 222 | this.writeHalfUbyte(dataOffset + FRAME_OFFSET_SUB_SCALE + (j >>> 1), 0x1 & j, power); 223 | } 224 | 225 | // 各周波数のパワーを計算しておく 226 | for (let j = 0; j < this.subScales.length; ++j) { 227 | let subScale = this.subScales[j]; 228 | for (let k = j == 0 ? 0 : this.subScaleStart << (j - 1); k < this.subScaleStart << j && k < this.frequencyUpperLimit; ++k) { 229 | let power = Math.abs(this.frequencies[k]) / masterScale; 230 | this.frequencyPowers[k] = power > Math.pow(BASE_OF_LOGARITHM, -7 - subScale * 0.5) ? power : 0; 231 | } 232 | } 233 | 234 | // 書き出す周波数を選択 235 | this.frequencyFlags.fill(0); 236 | let writeCount = 0; 237 | while (writeCount < this.frequencyTableSize) { 238 | let sumPower = 0; 239 | for (let j = 0; j < this.frequencyUpperLimit; ++j) { 240 | sumPower += this.frequencyPowers[j]; 241 | } 242 | if (sumPower <= 0) { 243 | break; 244 | } 245 | 246 | let sum = 0; 247 | let maxIndex = this.frequencyUpperLimit - 1; 248 | let maxPower = this.frequencyPowers[maxIndex]; 249 | for (let j = this.frequencyUpperLimit - 1; j >= 0 && writeCount < this.frequencyTableSize; --j) { 250 | let power = this.frequencyPowers[j]; 251 | sum += power; 252 | 253 | if (power > maxPower) { 254 | maxPower = power; 255 | maxIndex = j; 256 | } 257 | 258 | if (sum >= sumPower / this.frequencyTableSize) { 259 | this.frequencyFlags[Math.floor(maxIndex / 32)] |= 1 << (maxIndex % 32); 260 | this.frequencyPowers[maxIndex] = 0; 261 | writeCount++; 262 | 263 | sum = 0; 264 | maxIndex = j - 1; 265 | maxPower = this.frequencyPowers[maxIndex]; 266 | } 267 | } 268 | } 269 | 270 | // 周波数フラグを書き出し 271 | dataOffset += FRAME_OFFSET_DATA; 272 | if (this.isIndexMode) { 273 | // 有効な周波数をインデックスで書き出す 274 | let value = 0; 275 | let index = 0; 276 | for (let j = 0; j < this.frequencyRange; ++j) { 277 | if ((this.frequencyFlags[Math.floor(j / 32)] >>> j % 32) & 0x1 != 0) { 278 | value |= j << index; 279 | index += this.indexBitSize; 280 | if (index >= 32) { 281 | this.data.setUint32(dataOffset, value); 282 | dataOffset += 4; 283 | index %= 32; 284 | value = j >> (this.indexBitSize - index); 285 | } 286 | } 287 | } 288 | if (index != 0) { 289 | this.data.setUint32(dataOffset, value); 290 | dataOffset += 4; 291 | } 292 | } else { 293 | // 有効な周波数を1bitのフラグで書き出す 294 | for (let j = 0; j < this.frequencyFlags.length; ++j) { 295 | this.data.setUint32(dataOffset, this.frequencyFlags[j]); 296 | dataOffset += 4; 297 | } 298 | } 299 | 300 | // MDCT用の周波数配列から必要な分を周波数テーブルへ書き出し 301 | let frequencyOffset = 0; 302 | for (let j = 0; j < this.subScales.length; ++j) { 303 | let subScale = this.subScales[j]; 304 | for (let k = j == 0 ? 0 : this.subScaleStart << (j - 1); k < this.subScaleStart << j && k < this.frequencyRange; ++k) { 305 | if ((this.frequencyFlags[Math.floor(k / 32)] >>> (k % 32)) & 0x1 != 0) { 306 | let value = this.frequencies[k] / masterScale; 307 | let signed = value >= 0 ? 0x0 : 0x8; 308 | let power = Math.ceil(Math.min(-Math.log(Math.abs(value)) / Math.log(BASE_OF_LOGARITHM) - subScale * 0.5, 7)); 309 | this.writeHalfUbyte( 310 | dataOffset + (frequencyOffset >>> 1), 311 | 0x1 & frequencyOffset, 312 | signed | power); 313 | frequencyOffset += 1; 314 | } 315 | } 316 | } 317 | } 318 | this.sampleCount += length; 319 | } 320 | 321 | nextFrame() { 322 | this.frameCount++; 323 | if (this.getDataSize() > this.data.buffer.byteLength) { 324 | let buffer = new ArrayBuffer(this.data.buffer.byteLength << 1); 325 | new Uint8Array(buffer).set(new Uint8Array(this.data.buffer)); 326 | this.data = new DataView(buffer); 327 | } 328 | } 329 | 330 | write(inputData, start = 0, length = this.frequencyRange) { 331 | assert(inputData.length >= this.numChannels); 332 | 333 | // 書き込み出来ていないサンプルを書き込む 334 | if (this.workBufferOffset > 0) { 335 | let writeSize = Math.min(this.frequencyRange - this.workBufferOffset, length); 336 | for (let i = 0; i < this.numChannels; ++i) { 337 | let input = inputData[i]; 338 | let workBuffer = this.workBuffers[i]; 339 | for (let j = 0; j < writeSize; ++j) { 340 | workBuffer[this.workBufferOffset + j] = input[start + j]; 341 | } 342 | } 343 | start += writeSize; 344 | length -= writeSize; 345 | this.workBufferOffset += writeSize; 346 | if (this.workBufferOffset >= this.frequencyRange) { 347 | this.writeFrame(this.workBuffers); 348 | this.workBufferOffset = 0; 349 | } 350 | } 351 | 352 | // 入力バッファをフレーム単位で読み込む 353 | while (length >= this.frequencyRange) { 354 | this.writeFrame(inputData, start); 355 | start += this.frequencyRange; 356 | length -= this.frequencyRange; 357 | } 358 | 359 | // まだ入力バッファに書き込むデータが残っている場合 360 | if (length > 0) { 361 | for (let i = 0; i < this.numChannels; ++i) { 362 | let input = inputData[i]; 363 | let workBuffer = this.workBuffers[i]; 364 | for (let j = 0; j < length; ++j) { 365 | workBuffer[j] = input[start + j]; 366 | } 367 | } 368 | this.workBufferOffset = length; 369 | } 370 | } 371 | 372 | flush() { 373 | if (this.workBufferOffset > 0) { 374 | for (let i = 0; i < this.numChannels; ++i) { 375 | this.workBuffers[i].fill(0, this.workBufferOffset, this.frequencyRange); 376 | } 377 | this.writeFrame(this.workBuffers); 378 | this.workBufferOffset = 0; 379 | } 380 | } 381 | 382 | getDataSize() { 383 | return this.getDataOffset(this.frameCount, 0); 384 | } 385 | 386 | getDataBuffer() { 387 | let dataSize = this.getDataSize(); 388 | this.data.setUint32(HEADER_OFFSET_DATA_SIZE, dataSize); 389 | this.data.setUint32(HEADER_OFFSET_SAMPLE_COUNT, this.frequencyRange * this.frameCount); 390 | this.data.setUint32(HEADER_OFFSET_FRAME_COUNT, this.frameCount); 391 | return this.data.buffer.slice(0, this.getDataSize()); 392 | } 393 | } 394 | 395 | wamCodec.WamEncoder = WamEncoder; 396 | 397 | // Web Audio Media デコーダ 398 | class WamDecoder extends WamCoder { 399 | 400 | static isWamData(data) { 401 | return new DataView(data).getUint32(HEADER_OFFSET_MAGIC_NUMBER) == MAGIC_NUMBER; 402 | } 403 | 404 | constructor(data) { 405 | super(); 406 | 407 | this.data = new DataView(data); 408 | this.magicNumber = this.data.getUint32(HEADER_OFFSET_MAGIC_NUMBER); 409 | this.fileSize = this.data.getUint32(HEADER_OFFSET_DATA_SIZE); 410 | this.fileType = this.data.getUint32(HEADER_OFFSET_DATA_TYPE); 411 | this.version = this.data.getUint32(HEADER_OFFSET_VERSION); 412 | this.sampleRate = this.data.getUint32(HEADER_OFFSET_SAMPLE_RATE); 413 | this.sampleCount = this.data.getUint32(HEADER_OFFSET_SAMPLE_COUNT); 414 | this.frameCount = this.data.getUint32(HEADER_OFFSET_FRAME_COUNT); 415 | this.numChannels = this.data.getUint16(HEADER_OFFSET_CHANNEL_SIZE); 416 | this.frequencyRange = this.data.getUint16(HEADER_OFFSET_FREQUENCY_RANGE); 417 | this.frequencyUpperLimit = this.data.getUint16(HEADER_OFFSET_FREQUENCY_UPPER_LIMIT); 418 | this.frequencyTableSize = this.data.getUint16(HEADER_OFFSET_FREQUENCY_TABLE_SIZE); 419 | 420 | assert(this.magicNumber == MAGIC_NUMBER); 421 | assert(this.fileSize <= data.byteLength); 422 | assert(this.fileType == FILE_TYPE_SMD0); 423 | assert(this.version == 0); 424 | assert(this.sampleRate > 0); 425 | assert(this.sampleCount <= this.frequencyRange * this.frameCount); 426 | assert(this.numChannels > 0); 427 | assert(this.frequencyRange > 0); 428 | assert(this.frequencyUpperLimit <= this.frequencyRange); 429 | assert(this.frequencyTableSize > 0); 430 | 431 | this.setupWindowFunction(); 432 | 433 | this.indexBitSize = Math.ceil(Math.log2(this.frequencyUpperLimit)); 434 | this.indicesSize = Math.ceil(this.indexBitSize * this.frequencyTableSize / 32) * 32; 435 | this.isIndexMode = (1 << this.indexBitSize) > this.indicesSize; 436 | this.subScales = new Uint8Array(Math.min(this.indexBitSize, 8)); 437 | this.subScaleStart = this.frequencyUpperLimit / (1 << Math.min(Math.ceil(Math.log2(this.frequencyUpperLimit)), 7)); 438 | this.frequencyFlags = new Uint32Array(this.frequencyUpperLimit / 32); 439 | this.frequencies = new Float32Array(this.frequencyRange); 440 | this.samples = new Float32Array(this.frequencyRange << 1); 441 | this.prevOutputs = new Array(this.numChannels); 442 | for (let i = 0; i < this.numChannels; ++i) { 443 | this.prevOutputs[i] = new Float32Array(this.frequencyRange); 444 | } 445 | this.currentFrame = 0; 446 | this.workBuffers = new Array(this.numChannels); 447 | for (let i = 0; i < this.numChannels; ++i) { 448 | this.workBuffers[i] = new Float32Array(this.frequencyRange); 449 | } 450 | this.workBufferOffset = this.frequencyRange; 451 | } 452 | 453 | read(outputData, start = 0, length = this.frequencyRange) { 454 | assert(outputData.length >= this.numChannels); 455 | 456 | // 書き込み出来ていないサンプルを出力バッファ書き込む 457 | if (this.workBufferOffset < this.frequencyRange) { 458 | let writeSize = Math.min(length, this.frequencyRange - this.workBufferOffset); 459 | for (let i = 0; i < this.numChannels; ++i) { 460 | let output = outputData[i]; 461 | let workBuffer = this.workBuffers[i]; 462 | for (let j = 0; j < writeSize; ++j) { 463 | output[start + j] = workBuffer[this.workBufferOffset + j]; 464 | } 465 | } 466 | start += writeSize; 467 | length -= writeSize; 468 | this.workBufferOffset += writeSize; 469 | } 470 | 471 | // 出力バッファにフレーム単位で読み込む 472 | while (length >= this.frequencyRange) { 473 | this.readFrame(outputData, start); 474 | start += this.frequencyRange; 475 | length -= this.frequencyRange; 476 | } 477 | 478 | // まだ出力バッファに書き込みきれていない場合 479 | if (length > 0) { 480 | this.readFrame(this.workBuffers, 0); 481 | for (let i = 0; i < this.numChannels; ++i) { 482 | let output = outputData[i]; 483 | let workBuffer = this.workBuffers[i]; 484 | for (let j = 0; j < length; ++j) { 485 | output[start + j] = workBuffer[j]; 486 | } 487 | } 488 | this.workBufferOffset = length; 489 | } 490 | } 491 | 492 | readFrame(outputData, start = 0, length = this.frequencyRange) { 493 | assert(outputData.length >= this.numChannels); 494 | assert(length <= this.frequencyRange && length >= 0); 495 | 496 | for (let i = 0; i < this.numChannels; ++i) { 497 | let output = outputData[i]; 498 | let dataOffset = this.getDataOffset(this.currentFrame, i); 499 | 500 | // 振幅のマスタボリュームを取得 501 | let masterVolume = this.data.getUint32(dataOffset + FRAME_OFFSET_MASTER_SCALE); 502 | 503 | // 振幅のサブスケールを取得 504 | for (let j = 0; j < this.subScales.length; ++j) { 505 | this.subScales[j] = this.readHalfUbyte(dataOffset + FRAME_OFFSET_SUB_SCALE + (j >>> 1), 0x1 & j); 506 | } 507 | 508 | // 周波数フラグを取得 509 | dataOffset += FRAME_OFFSET_DATA; 510 | if (this.isIndexMode) { 511 | // 有効な周波数をインデックスで判別 512 | this.frequencyFlags.fill(0); 513 | let index = 0; 514 | let mask = (1 << this.indexBitSize) - 1; 515 | let value = this.data.getUint32(dataOffset); 516 | dataOffset += 4; 517 | for (let j = 0; j < this.frequencyTableSize; ++j) { 518 | let bitIndex = mask & value; 519 | value >>>= this.indexBitSize; 520 | index += this.indexBitSize; 521 | if (index > 32) { 522 | value = this.data.getUint32(dataOffset); 523 | dataOffset += 4; 524 | index %= 32; 525 | bitIndex |= mask & (value << (this.indexBitSize - index)); 526 | value >>>= index; 527 | } 528 | this.frequencyFlags[Math.floor(bitIndex / 32)] |= 1 << (bitIndex % 32); 529 | } 530 | } else { 531 | // 有効な周波数を1bitのフラグで判別 532 | for (let j = 0; j < this.frequencyFlags.length; ++j) { 533 | this.frequencyFlags[j] = this.data.getUint32(dataOffset); 534 | dataOffset += 4; 535 | } 536 | } 537 | 538 | // 周波数テーブルを取得、MDCT用の周波数配列に書き込み 539 | this.frequencies.fill(0); 540 | let frequencyOffset = 0; 541 | for (let j = 0; j < this.subScales.length; ++j) { 542 | let subScale = this.subScales[j]; 543 | for (let k = j == 0 ? 0 : this.subScaleStart << (j - 1); k < this.subScaleStart << j && k < this.frequencyUpperLimit; ++k) { 544 | if ((this.frequencyFlags[Math.floor(k / 32)] >>> k % 32) & 0x1 != 0) { 545 | let value = this.readHalfUbyte(dataOffset + (frequencyOffset >>> 1), 0x1 & frequencyOffset); 546 | let signed = 0x8 & value; 547 | let power = Math.pow(BASE_OF_LOGARITHM, -(0x7 & value) - subScale * 0.5) * masterVolume; 548 | this.frequencies[k] = signed == 0 ? power : -power; 549 | frequencyOffset += 1; 550 | } 551 | } 552 | } 553 | 554 | // 逆MDCTをかける 555 | FastMDCT.imdct(this.frequencyRange, this.samples, this.frequencies); 556 | 557 | // 窓関数をかける 558 | this.applyWindowFunction(); 559 | 560 | // 前回の後半の計算結果と今回の前半の計算結果をクロスフェードして出力 561 | let prevOutput = this.prevOutputs[i]; 562 | for (let j = 0; j < length; ++j) { 563 | output[start + j] = prevOutput[j] + this.samples[j] / ((1 << 16) - 1); // 16bitの数値を[-1, 1]の数値にスケール 564 | prevOutput[j] = this.samples[this.frequencyRange + j] / ((1 << 16) - 1); 565 | } 566 | for (let j = length; j < this.frequencyRange; ++j) { 567 | prevOutput[j] = this.samples[this.frequencyRange + j] / ((1 << 16) - 1); 568 | } 569 | } 570 | this.nextFrame(); 571 | } 572 | 573 | nextFrame() { 574 | this.currentFrame = (this.currentFrame + 1) % this.frameCount; 575 | } 576 | } 577 | 578 | wamCodec.WamDcoder = WamDecoder; 579 | 580 | })(); 581 | -------------------------------------------------------------------------------- /compress.js: -------------------------------------------------------------------------------- 1 | // 音声圧縮処理を行うためのWorker 2 | 3 | importScripts("signal.js"); 4 | importScripts("codec.js"); 5 | 6 | self.addEventListener("message", (message) => { 7 | // パラメータ取得 8 | let sampleRate = message.data["sampleRate"]; 9 | let channelSize = message.data["numChannels"]; 10 | let frequencyRange = message.data["frequencyRange"]; 11 | let frequencyUpperLimit = message.data["frequencyUpperLimit"]; 12 | let frequencyTableSize = message.data["frequencyTableSize"]; 13 | let originalSampleRate = message.data["originalSampleRate"]; 14 | let originalChannelSize = message.data["originalChannelSize"]; 15 | let originalSampleData = message.data["originalSampleData"]; 16 | let originalSampleCount = originalSampleData[0].length; 17 | let sampleCount = originalSampleCount; 18 | 19 | // サンプリングレートが元データと異なる場合 20 | if (sampleRate < originalSampleRate) { 21 | // 減らす場合 22 | let times = originalSampleRate / sampleRate; 23 | sampleCount = Math.floor(sampleCount / times); 24 | for (let i = 0; i < originalChannelSize; ++i) { 25 | let samples = originalSampleData[i]; 26 | 27 | for (let j = 0; j < originalSampleCount; ++j) { 28 | samples[j] = samples[Math.floor(j * times)]; 29 | } 30 | } 31 | } else if (sampleRate > originalSampleRate) { 32 | // 増やす場合、増やさない 33 | sampleRate = originalSampleRate; 34 | } 35 | 36 | // チャネル数が元データと異なる場合 37 | if (channelSize == 1 && originalChannelSize == 2) { 38 | // 減らす場合 39 | let left = originalSampleData[0]; 40 | let right = originalSampleData[1]; 41 | for (let i = 0; i < sampleCount; ++i) { 42 | left[i] += right[i]; 43 | } 44 | } else if (channelSize > sampleCount) { 45 | // 増やす場合、増やさない 46 | channelSize = originalChannelSize; 47 | } 48 | 49 | console.log(`encoding`); 50 | console.log(`sample rate ${sampleRate}`); 51 | console.log(`channel size ${channelSize}`); 52 | console.log(`frequency range ${frequencyRange}`); 53 | console.log(`frequency upper limit ${frequencyUpperLimit}`); 54 | console.log(`frequency table size ${frequencyTableSize}`); 55 | console.log(`sample count ${sampleCount}`); 56 | 57 | // エンコード 58 | let encoder = new wamCodec.WamEncoder( 59 | sampleRate, channelSize, 60 | frequencyRange, frequencyUpperLimit, frequencyTableSize, 61 | sampleCount); 62 | for (let k = 0; k < (sampleCount / frequencyRange) - 1; ++k) { 63 | encoder.write(originalSampleData, frequencyRange * k, Math.min(frequencyRange, sampleCount - frequencyRange * (k + 1))); 64 | self.postMessage({ 65 | "kind": "update", 66 | "progress": (k * frequencyRange) / sampleCount 67 | }); 68 | } 69 | encoder.flush(); 70 | self.postMessage({ 71 | "kind": "update", 72 | "progress": 1.0 73 | }); 74 | 75 | // 結果を返す 76 | let encodedBuffer = encoder.getDataBuffer(); 77 | self.postMessage({ 78 | "kind": "completed", 79 | "encodedBuffer": encodedBuffer, 80 | }, [encodedBuffer]); 81 | }); 82 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 圧縮テスト 6 | 7 | 8 | 9 | 18 | 19 | 20 | 21 | 22 | 23 | 26 | 29 | 30 | 31 | 33 | 36 | 37 | 38 | 41 | 42 | 43 | 44 | 45 | 48 | 56 | 57 | 58 | 61 | 73 | 74 | 75 | 78 | 90 | 91 | 92 | 95 | 109 | 110 | 111 | 114 | 133 | 134 | 135 | 138 | 141 | 142 | 143 | 146 | 147 | 148 | 149 | 150 | 152 | 155 | 156 | 157 | 160 | 163 | 164 | 165 | 168 | 169 | 170 | 171 | 172 | 174 | 177 | 178 | 179 | 181 | 184 | 185 |
24 | 入力ファイル 25 | 27 | 28 |
32 | 34 | すでにORPH形式で圧縮済みのデータです。 35 |
39 |
40 |
46 | チャネル数 47 | 49 | 55 |
59 | サンプリングレート 60 | 62 | 72 |
76 | 処理ブロックサイズ 77 | 79 | 89 |
93 | 周波数の上限 94 | 96 | 108 |
112 | 周波数テーブルサイズ 113 | 115 | 132 |
136 | ビットレート 137 | 139 | N/S 140 |
144 |
145 |
151 | 153 | 154 |
158 | 圧縮処理の進捗 159 | 161 | 0% 162 |
166 |
167 |
173 | 175 | 176 |
180 | 182 | ダウンロード 183 |
186 | 187 | 188 | -------------------------------------------------------------------------------- /main.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | 3 | addEventListener("load", () => { 4 | initializeUI(); 5 | }); 6 | 7 | addEventListener("unload", () => { 8 | terminateAudio(); 9 | terminateUI(); 10 | }); 11 | 12 | // UI関連 13 | 14 | // ファイルオープンボタン 15 | let openFileButton; 16 | // すでに圧縮済みのファイルであることの表示 17 | let alreadyCompressedDataLabel; 18 | // チャネル数選択 19 | let channelSizeSelector; 20 | // サンプルレート選択 21 | let sampleRateSelector; 22 | // 周波数レンジ選択 23 | let frequencyRangeSelector; 24 | // MDCT処理レンジ選択 25 | let frequencyUpperLimitSelector; 26 | // 周波数テーブル選択 27 | let frequencyTableSizeSelector; 28 | // 圧縮後のビットレート 29 | let compressedBitRateLabel; 30 | // エンコーディングの進捗率 31 | let encodingRateLabel; 32 | // 圧縮ボタン 33 | let compressButton; 34 | // 再生ボタン 35 | let playButton; 36 | // ダウンロードボタン 37 | let downloadButton; 38 | 39 | // UI関連を初期化 40 | function initializeUI() { 41 | openFileButton = document.getElementById("openFileButton"); 42 | openFileButton.addEventListener("change", onChangedSourceFile); 43 | alreadyCompressedDataLabel = document.getElementById("alreadyCompressedDataLabel"); 44 | alreadyCompressedDataLabel.style.display = "none"; 45 | channelSizeSelector = document.getElementById("channelSizeSelector"); 46 | channelSizeSelector.addEventListener("change", onChangedChannelSize); 47 | sampleRateSelector = document.getElementById("sampleRateSelector"); 48 | sampleRateSelector.addEventListener("change", onChangedSampleRate); 49 | frequencyRangeSelector = document.getElementById("frequencyRangeSelector"); 50 | frequencyRangeSelector.addEventListener("change", onChangedFrequencyRange); 51 | frequencyUpperLimitSelector = document.getElementById("frequencyUpperLimitSelector"); 52 | frequencyUpperLimitSelector.addEventListener("change", onChangedFrequencyUpperLimit); 53 | frequencyTableSizeSelector = document.getElementById("frequencyTableSizeSelector"); 54 | frequencyTableSizeSelector.addEventListener("change", onChangedFrequencyTableSize); 55 | compressedBitRateLabel = document.getElementById("compressedBitRateLabel"); 56 | encodingRateLabel = document.getElementById("encodingRateLabel"); 57 | compressButton = document.getElementById("compressButton"); 58 | compressButton.addEventListener("click", onClickedCompressButton); 59 | compressButton.disabled = "disabled"; 60 | playButton = document.getElementById("playButton"); 61 | playButton.addEventListener("click", onClickedPlayButton); 62 | playButton.disabled = "disabled"; 63 | downloadButton = document.getElementById("downloadButton"); 64 | downloadButton.style.visibility = "hidden"; 65 | updateBitRateOfCompressedAudio(); 66 | } 67 | 68 | // UI関連の後処理 69 | function terminateUI() { 70 | openFileButton.removeEventListener("change", onChangedSourceFile); 71 | channelSizeSelector.removeEventListener("change", onChangedChannelSize); 72 | sampleRateSelector.removeEventListener("change", onChangedSampleRate); 73 | frequencyRangeSelector.removeEventListener("change", onChangedFrequencyRange); 74 | frequencyTableSizeSelector.removeEventListener("change", onChangedFrequencyTableSize); 75 | compressButton.removeEventListener("click", onClickedCompressButton); 76 | playButton.removeEventListener("click", onClickedPlayButton); 77 | } 78 | 79 | // 入力ファイルが変更 80 | function onChangedSourceFile(event) { 81 | console.log("Changed the source file"); 82 | 83 | playButton.disabled = "disabled"; 84 | compressButton.disabled = "disabled"; 85 | encodingRateLabel.textContent = "0%"; 86 | playButton.disabled = "disabled"; 87 | downloadButton.style.visibility = "hidden"; 88 | 89 | if (isInitializedAudio()) { 90 | pauseAudio(); 91 | initializeAudio(); 92 | } 93 | 94 | originalFile = event.target.files[0]; 95 | 96 | // 圧縮済みデータかをチェックする 97 | let fileReader = new FileReader(); 98 | fileReader.addEventListener("loadend", (event) => { 99 | if (wamCodec.WamDcoder.isWamData(fileReader.result)) { 100 | makeCompressedAudioNode(fileReader.result); 101 | alreadyCompressedDataLabel.style.display = "inline"; 102 | playButton.disabled = ""; 103 | } else { 104 | alreadyCompressedDataLabel.style.display = "none"; 105 | compressButton.disabled = ""; 106 | } 107 | }); 108 | fileReader.readAsArrayBuffer(originalFile); 109 | } 110 | 111 | // チャネル数が変更 112 | function onChangedChannelSize(event) { 113 | let value = this.options[this.selectedIndex].value; 114 | console.log(`Changed the channel count from ${channelSize} to ${value}.`); 115 | channelSize = Number.parseFloat(value); 116 | updateBitRateOfCompressedAudio(); 117 | } 118 | 119 | // サンプルレートが変更 120 | function onChangedSampleRate(event) { 121 | let value = this.options[this.selectedIndex].value; 122 | console.log(`Changed the sample rate from ${sampleRate} to ${value}.`); 123 | sampleRate = Number.parseFloat(value); 124 | updateBitRateOfCompressedAudio(); 125 | } 126 | 127 | // 周波数レンジが変更 128 | function onChangedFrequencyRange(event) { 129 | let value = this.options[this.selectedIndex].value; 130 | console.log(`Changed the frequency range from ${frequencyRange} to ${value}.`); 131 | frequencyRange = Number.parseFloat(value); 132 | updateFrequencyUpperLimit(); 133 | updateFrequencyTableSize(); 134 | updateBitRateOfCompressedAudio(); 135 | } 136 | 137 | // 周波数の上限が変更 138 | function onChangedFrequencyUpperLimit(event) { 139 | let value = frequencyUpperLimit; 140 | updateFrequencyUpperLimit(); 141 | updateBitRateOfCompressedAudio(); 142 | console.log(`Changed the frequency upper limit from ${value} to ${frequencyUpperLimit}.`); 143 | } 144 | 145 | // 周波数テーブルサイズが変更 146 | function onChangedFrequencyTableSize(event) { 147 | let prev = frequencyTableSize; 148 | updateFrequencyTableSize(); 149 | updateBitRateOfCompressedAudio(); 150 | console.log(`Changed the frequency table size from ${prev} to ${frequencyTableSize}.`); 151 | } 152 | 153 | // 周波数の上限を更新 154 | function updateFrequencyUpperLimit() { 155 | frequencyUpperLimit = 156 | frequencyRange * 157 | Number.parseFloat(frequencyUpperLimitSelector.options[frequencyUpperLimitSelector.selectedIndex].value) / 8; 158 | } 159 | 160 | // 周波数テーブルサイズの更新 161 | function updateFrequencyTableSize() { 162 | frequencyTableSize = 163 | FREQUENCY_TABLE_SIZES[frequencyTableSizeSelector.selectedIndex] * frequencyRange / DEFAULT_FREQUENCY_RANGE; 164 | for (let i = 0; i < FREQUENCY_TABLE_SIZES.length; ++i) { 165 | frequencyTableSizeSelector.options[i].text = 166 | `${Math.round(FREQUENCY_TABLE_SIZES[i] * frequencyRange / DEFAULT_FREQUENCY_RANGE)}`; 167 | } 168 | } 169 | 170 | // 圧縮後のビットレート 171 | function updateBitRateOfCompressedAudio() { 172 | // (主音量 + 副音量(8チャネル) + 周波数フラグ + 周波数テーブル) * チャネル数 173 | let frameSize = 32 + 32 174 | + (Math.min( 175 | 1 * frequencyUpperLimit, 176 | Math.ceil(Math.ceil(Math.log2(frequencyUpperLimit)) * frequencyTableSize / 32) * 32) 177 | + 4 * frequencyTableSize) 178 | * channelSize; 179 | compressedBitRateLabel.textContent = `${Math.round(frameSize * sampleRate / frequencyRange / 1000)} kbps`; 180 | } 181 | 182 | // 圧縮ボタンがクリックされた 183 | function onClickedCompressButton(event) { 184 | console.log("Clicked the compress button."); 185 | 186 | compressButton.disabled = "disabled"; 187 | playButton.disabled = "disabled"; 188 | downloadButton.style.visibility = "hidden"; 189 | 190 | pauseAudio(); 191 | terminateAudio(); 192 | initializeAudio(); 193 | 194 | // ファイルの読み込み 195 | let fileReader = new FileReader(); 196 | fileReader.addEventListener("loadend", (event) => { 197 | try { 198 | audioContext.decodeAudioData(fileReader.result, (audioBuffer) => { 199 | encodeAudioData(audioBuffer).then((encodedBuffer) => { 200 | makeDownloadLink(encodedBuffer); 201 | makeCompressedAudioNode(encodedBuffer); 202 | compressButton.disabled = ""; 203 | playButton.disabled = ""; 204 | }).catch((error) => { 205 | compressButton.disabled = ""; 206 | }); 207 | }); 208 | } catch (error) { 209 | compressButton.disabled = ""; 210 | } 211 | 212 | }); 213 | fileReader.readAsArrayBuffer(originalFile); 214 | } 215 | 216 | // エンコーディング 217 | function encodeAudioData(audioBuffer) { 218 | return new Promise((resolve, reject) => { 219 | // サンプルを配列に詰め直し 220 | let sampleData = new Array(audioBuffer.numberOfChannels); 221 | for (let i = 0; i < sampleData.length; ++i) { 222 | sampleData[i] = audioBuffer.getChannelData(i); 223 | } 224 | 225 | // WebWorkerでエンコードを実行 226 | let worker = new Worker("compress.js"); 227 | worker.addEventListener('message', (message) => { 228 | switch (message.data["kind"]) { 229 | case "update": 230 | encodingRateLabel.textContent = `${Math.ceil(message.data["progress"] * 100)} %`; 231 | break; 232 | case "completed": { 233 | resolve(message.data["encodedBuffer"]); 234 | worker.terminate(); 235 | break; 236 | } 237 | case "failed": 238 | default: 239 | reject(new Error("Failed to encoding")); 240 | worker.terminate(); 241 | break; 242 | } 243 | }); 244 | worker.postMessage({ 245 | "numChannels": channelSize, 246 | "sampleRate": sampleRate, 247 | "frequencyRange": frequencyRange, 248 | "frequencyUpperLimit": frequencyUpperLimit, 249 | "frequencyTableSize": frequencyTableSize, 250 | "originalSampleRate": audioBuffer.sampleRate, 251 | "originalChannelSize": audioBuffer.numberOfChannels, 252 | "originalSampleData": sampleData 253 | }, sampleData.map((value => value.buffer))); 254 | }); 255 | } 256 | 257 | // ダウンロード用のリンクを作成 258 | function makeDownloadLink(buffer) { 259 | let blob = new Blob([buffer], {type: "application/octet-binary"}); 260 | downloadButton.href = window.URL.createObjectURL(blob); 261 | downloadButton.download = `${(originalFile.name.indexOf(".") != -1 ? 262 | originalFile.name.substring(0, originalFile.name.indexOf(".")) : 263 | originalFile.name)}.orp`; 264 | downloadButton.style.visibility = "visible"; 265 | } 266 | 267 | // 再生ボタンがクリックされた 268 | function onClickedPlayButton(event) { 269 | console.log("Clicked the play button."); 270 | if (!isPlayAudio()) { 271 | playAudio() 272 | } else { 273 | pauseAudio(); 274 | } 275 | } 276 | 277 | // 音声関連 278 | 279 | // テーブルサイズマップ 280 | const FREQUENCY_TABLE_SIZES = [ 281 | 8, 282 | 12, 283 | 16, 284 | 24, 285 | 32, 286 | 48, 287 | 64, 288 | 96, 289 | 128, 290 | 192, 291 | 256, 292 | 384, 293 | 512, 294 | ]; 295 | 296 | // デフォルト、サンプリングレート 297 | const DEFAULT_SAMPLE_RATE = 48000; 298 | // デフォルト、チャネル数 299 | const DEFAULT_CHANNEL_SIZE = 2; 300 | // デフォルト、周波数レンジ 301 | const DEFAULT_FREQUENCY_RANGE = 1024; 302 | // デフォルト、周波数の上限 303 | const DEFAULT_FREQUENCY_UPPER_LIMIT = DEFAULT_FREQUENCY_RANGE * 6 / 8; 304 | // デフォルト、周波数テーブルサイズ 305 | const DEFAULT_FREQUENCY_TABLE_SIZE = 192; 306 | 307 | // サンプリングレート 308 | let sampleRate = DEFAULT_SAMPLE_RATE; 309 | // チャネル数 310 | let channelSize = DEFAULT_CHANNEL_SIZE; 311 | // 周波数レンジ 312 | let frequencyRange = DEFAULT_FREQUENCY_RANGE; 313 | // 周波数の上限 314 | let frequencyUpperLimit = DEFAULT_FREQUENCY_UPPER_LIMIT; 315 | // 周波数テーブルサイズ 316 | let frequencyTableSize = DEFAULT_FREQUENCY_TABLE_SIZE; 317 | 318 | // 入力元のファイル名 319 | let originalFile = null; 320 | // AudioContextのインスタンス 321 | let audioContext = null; 322 | // 再生用のAudioSource 323 | let audioSource = null; 324 | // 圧縮処理済みのAudioNode 325 | let compressedAudioNode = null; 326 | 327 | // Audioは初期化済みか 328 | function isInitializedAudio() { 329 | return audioContext != null; 330 | } 331 | 332 | // Audioの初期化 333 | function initializeAudio(sampleRate = 48000) { 334 | if (isInitializedAudio()) { 335 | return; 336 | } 337 | try { 338 | audioContext = window.AudioContext != null ? 339 | new window.AudioContext({"sampleRate": sampleRate}) : 340 | new window.webkitAudioContext(); 341 | } catch (e) { 342 | console.error(e); 343 | } 344 | } 345 | 346 | // Audioの後処理 347 | function terminateAudio() { 348 | if (!isInitializedAudio()) { 349 | return; 350 | } 351 | pauseAudio(); 352 | audioContext = null; 353 | } 354 | 355 | // 圧縮されたデータ再生用のAudioNodeを作成 356 | function makeCompressedAudioNode(buffer) { 357 | let decoder = new wamCodec.WamDcoder(buffer); 358 | terminateAudio(); 359 | initializeAudio(decoder.sampleRate); 360 | compressedAudioNode = audioContext.createScriptProcessor(4096, decoder.numChannels, decoder.numChannels); 361 | compressedAudioNode.addEventListener("audioprocess", (event) => { 362 | let sampleData = new Array(event.outputBuffer.numberOfChannels); 363 | for (let i = 0; i < sampleData.length; ++i) { 364 | sampleData[i] = event.outputBuffer.getChannelData(i); 365 | } 366 | 367 | // デコード 368 | let sampleTimes = audioContext.sampleRate / decoder.sampleRate; 369 | let sampleCount = Math.floor(event.outputBuffer.length / sampleTimes); 370 | decoder.read(sampleData, 0, sampleCount); 371 | 372 | // AudioContextとサンプリングレートが合わない場合は修正 373 | if (sampleTimes > 1) { 374 | for (let i = 0; i < decoder.numChannels; ++i) { 375 | let samples = sampleData[i]; 376 | for (let j = sampleCount - 1; j >= 0; --j) { 377 | let sample = samples[j]; 378 | for (let k = Math.floor(j * sampleTimes); k < Math.floor(j * sampleTimes) + sampleTimes; ++k) { 379 | samples[k] = sample; 380 | } 381 | } 382 | } 383 | } 384 | }); 385 | } 386 | 387 | // 再生中か否か 388 | function isPlayAudio() { 389 | return isInitializedAudio() && audioSource != null; 390 | } 391 | 392 | // 再生を行う 393 | function playAudio() { 394 | if (compressedAudioNode != null && isPlayAudio()) { 395 | return; 396 | } 397 | audioSource = audioContext.createBufferSource(); 398 | audioSource.connect(compressedAudioNode); 399 | compressedAudioNode.connect(audioContext.destination); 400 | audioSource.start(); 401 | } 402 | 403 | // 再生を中断する 404 | function pauseAudio() { 405 | if (!isPlayAudio()) { 406 | return; 407 | } 408 | compressedAudioNode.disconnect(audioContext.destination); 409 | audioSource.disconnect(compressedAudioNode); 410 | audioSource = null; 411 | } 412 | 413 | })(); 414 | 415 | -------------------------------------------------------------------------------- /signal.js: -------------------------------------------------------------------------------- 1 | // 信号処理用のユーティリティ 2 | 3 | /** 4 | * 高速離散フーリエ変換用のクラス 5 | * 6 | * アルゴリズムは Cooly and Tukey 7 | * 8 | * 参考 9 | * https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#The_radix-2_DIT_case 10 | * 11 | * y[2k] = Σ[N-1,j=0] (x[j] + x[N/2 + j]) e^((-2πijk) / (N/2)) 12 | * y[2k-1] = Σ[N-1,j=0] ((x[j] - x[N/2 + j]) e^(-2πik / N)) e^((-2πijk) / (N/2)) 13 | * 14 | * N - データ数 15 | * x - サンプリング配列 16 | * y - 周波数配列 17 | * j - サンプリング配列の添字 (時間) 18 | * k - 周波数配列への添字 (周波数) 19 | * i - 単位虚数 20 | */ 21 | class FastDFT { 22 | 23 | // 要素を入れ替える 24 | static swap(v, a, b) { 25 | let ar = v[a + 0]; 26 | let ai = v[a + 1]; 27 | v[a + 0] = v[b + 0]; 28 | v[a + 1] = v[b + 1]; 29 | v[b + 0] = ar; 30 | v[b + 1] = ai; 31 | } 32 | 33 | // 離散フーリエ変換 34 | // n - サンプル数、2のべき乗である必要がある 35 | // x - 変換対象のサンプル配列、実数と虚数のn個の複素数配列 36 | // inv - 逆変換か否か 37 | static dft(n, x, inv) { 38 | inv = inv == null ? false : inv; 39 | 40 | let rad = (inv ? 2.0 : -2.0) * Math.PI / n; 41 | let cs = Math.cos(rad), sn = Math.sin(rad); // 回転因子の回転用複素数 42 | 43 | for (let m = (n <<= 1), mh; 2 <= (mh = m >>> 1); m = mh) { 44 | // 回転因子が0°の箇所を処理 45 | for (let i = 0; i < n; i += m) { 46 | let j = i + mh; 47 | let ar = x[i + 0], ai = x[i + 1]; 48 | let br = x[j + 0], bi = x[j + 1]; 49 | 50 | // 前半 (a + b) 51 | x[i + 0] = ar + br; 52 | x[i + 1] = ai + bi; 53 | 54 | // 後半 (a - b) 55 | x[j + 0] = ar - br; 56 | x[j + 1] = ai - bi; 57 | } 58 | 59 | // 回転因子が0°以外の箇所を処理 60 | let wcs = cs, wsn = sn; // 回転因子 61 | 62 | for (let i = 2; i < mh; i += 2) { 63 | for (let j = i; j < n; j += m) { 64 | let k = j + mh; 65 | let ar = x[j + 0], ai = x[j + 1]; 66 | let br = x[k + 0], bi = x[k + 1]; 67 | 68 | // 前半 (a + b) 69 | x[j + 0] = ar + br; 70 | x[j + 1] = ai + bi; 71 | 72 | // 後半 (a - b) * w 73 | let xr = ar - br; 74 | let xi = ai - bi; 75 | x[k + 0] = xr * wcs - xi * wsn; 76 | x[k + 1] = xr * wsn + xi * wcs; 77 | } 78 | 79 | // 回転因子を回転 80 | let tcs = wcs * cs - wsn * sn; 81 | wsn = wcs * sn + wsn * cs; 82 | wcs = tcs; 83 | } 84 | 85 | // 回転因子の回転用の複素数を自乗して回転 86 | let tcs = cs * cs - sn * sn; 87 | sn = 2.0 * (cs * sn); 88 | cs = tcs; 89 | } 90 | 91 | let m = n >>> 1; 92 | let m2 = m + 2; 93 | let mh = n >>> 2; 94 | for (let i = 0, j = 0; i < m; i += 4) { 95 | // データの入れ替え 96 | FastDFT.swap(x, i + m, j + 2); 97 | if (i < j) { 98 | FastDFT.swap(x, i + m2, j + m2); 99 | FastDFT.swap(x, i, j); 100 | } 101 | 102 | // ビットオーダを反転した変数としてインクリメント 103 | for (let k = mh; (j ^= k) < k; k >>= 1) { 104 | } 105 | } 106 | 107 | // 逆変換用のスケーリング 108 | if (inv) { 109 | for (let i = 0; i < n; ++i) { 110 | x[i] /= n; 111 | } 112 | } 113 | } 114 | } 115 | 116 | /** 117 | * 高速離散コサイン変換用のクラス、タイプIIとタイプIIIを備える 118 | * 119 | * アルゴリズムは Byeong Gi Lee 120 | * 121 | * 参考 122 | * https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.118.3056&rep=rep1&type=pdf#page=34 123 | * https://pdfs.semanticscholar.org/ed00/5160f5befd45073fd01b697227d009cad919.pdf 124 | * 125 | * DCT-IIの数式、DCT-IIIはこれの逆順で実装 126 | * y[2k] = Σ[n/2,m=0] (x[m]+x[n/2-1-m]) cos(π(2m+1)k/n) 127 | * y[2k-1] + y[2k+1] = Σ[n/2,m=0] (x[m]-x[n/2-1-m]) 2cos(π(2m+1)/2n) cos(π(m+1)k/n) 128 | * 129 | * N - データ数 130 | * x - サンプリング配列 131 | * y - 周波数配列 132 | * j - サンプリング配列の添字 (時間) 133 | * k - 周波数配列への添字 (周波数) 134 | */ 135 | class FastDCT { 136 | 137 | // 要素を入れ替える 138 | static swap(v, a, b) { 139 | let t = v[a]; 140 | v[a] = v[b]; 141 | v[b] = t; 142 | } 143 | 144 | // 要素配列の並び替え 145 | static swapElements(n, x) { 146 | let nh = n >> 1; 147 | let nh1 = nh + 1; 148 | let nq = n >> 2; 149 | for (let i = 0, j = 0; i < nh; i += 2) { 150 | FastDCT.swap(x, i + nh, j + 1); 151 | if (i < j) { 152 | FastDCT.swap(x, i + nh1, j + nh1); 153 | FastDCT.swap(x, i, j); 154 | } 155 | 156 | // ビットオーダを反転した変数としてインクリメント 157 | for (let k = nq; (j ^= k) < k; k >>= 1) { 158 | } 159 | } 160 | } 161 | 162 | // 離散コサイン変換、タイプII 163 | // n - サンプル数、2のべき乗である必要がある 164 | // x - n個のサンプルの配列 165 | static dctII(n, x) { 166 | // バタフライ演算 167 | let rad = Math.PI / (n << 1); 168 | for (let m = n, mh = m >> 1; 1 < m; m = mh, mh >>= 1) { 169 | for (let i = 0; i < mh; ++i) { 170 | let cs = 2.0 * Math.cos(rad * ((i << 1) + 1)); 171 | for (let j = i, k = (m - 1) - i; j < n; j += m, k += m) { 172 | let x0 = x[j]; 173 | let x1 = x[k]; 174 | x[j] = x0 + x1; 175 | x[k] = (x0 - x1) * cs; 176 | } 177 | } 178 | rad *= 2.0; 179 | } 180 | 181 | // データの入れ替え 182 | FastDCT.swapElements(n, x); 183 | 184 | // 差分方程式 185 | for (let m = n, mh = m >> 1, mq = mh >> 1; 2 < m; m = mh, mh = mq, mq >>= 1) { 186 | for (let i = mq + mh; i < m; ++i) { 187 | let xt = (x[i] = -x[i] - x[i - mh]); 188 | for (let j = i + mh; j < n; j += m) { 189 | let k = j + mh; 190 | xt = (x[j] -= xt); 191 | xt = (x[k] = -x[k] - xt); 192 | } 193 | } 194 | } 195 | 196 | // スケーリング 197 | for (let i = 1; i < n; ++i) { 198 | x[i] *= 0.5; 199 | } 200 | } 201 | 202 | // 離散コサイン変換、タイプIII 203 | // n - サンプル数、2のべき乗である必要がある 204 | // x - n個のサンプルの配列 205 | static dctIII(n, x) { 206 | // スケーリング 207 | x[0] *= 0.5; 208 | 209 | // 差分方程式 210 | for (let m = 4, mh = 2, mq = 1; m <= n; mq = mh, mh = m, m <<= 1) { 211 | for (let i = n - mq; i < n; ++i) { 212 | let j = i; 213 | while (m < j) { 214 | let k = j - mh; 215 | x[j] = -x[j] - x[k]; 216 | x[k] += x[j = k - mh]; 217 | } 218 | x[j] = -x[j] - x[j - mh]; 219 | } 220 | } 221 | 222 | // データの入れ替え 223 | FastDCT.swapElements(n, x); 224 | 225 | // バタフライ演算 226 | let rad = Math.PI / 2.0; 227 | for (let m = 2, mh = 1; m <= n; mh = m, m <<= 1) { 228 | rad *= 0.5; 229 | for (let i = 0; i < mh; ++i) { 230 | let cs = 2.0 * Math.cos(rad * ((i << 1) + 1)); 231 | for (let j = i, k = (m - 1) - i; j < n; j += m, k += m) { 232 | let x0 = x[j]; 233 | let x1 = x[k] / cs; 234 | x[j] = x0 + x1; 235 | x[k] = x0 - x1; 236 | } 237 | } 238 | } 239 | } 240 | } 241 | 242 | /** 243 | * 高速修正離散コサイン変換用のクラス 244 | * 245 | * アルゴリズムは Mu-Huo Cheng and Yu-Hsin Hsu 246 | * 247 | * 参考 248 | * https://pdfs.semanticscholar.org/2f26/a658836927331d559e723ac36b8dab911b14.pdf 249 | */ 250 | class FastMDCT { 251 | 252 | // 修正コサイン変換 253 | // n - 周波数配列数、2のべき乗である必要がある 254 | // samples - 2n個のサンプル配列、この配列が変換処理の入力元となる 255 | // frequencies - n個の周波数配列、この配列が変換処理の出力先となる 256 | static mdct(n, samples, frequencies) { 257 | // データを結合 258 | let ns1 = n - 1; // n - 1 259 | let nd2 = n >> 1; // n / 2 260 | let nm3d4 = n + nd2; // n * 3 / 4 261 | let nm3d4s1 = nm3d4 - 1; // n * 3 / 4 - 1 262 | for (let i = 0; i < nd2; ++i) { 263 | frequencies[i] = samples[nm3d4 + i] + samples[nm3d4s1 - i]; 264 | frequencies[nd2 + i] = samples[i] - samples[ns1 - i]; 265 | } 266 | 267 | // cos値の変換用の係数をかけ合わせ 268 | let rad = Math.PI / (n << 2); 269 | let i = 0; 270 | let nh = n >> 1; 271 | for (; i < nh; ++i) { 272 | frequencies[i] /= -2.0 * Math.cos(rad * ((i << 1) + 1)); 273 | } 274 | for (; i < n; ++i) { 275 | frequencies[i] /= 2.0 * Math.cos(rad * ((i << 1) + 1)); 276 | } 277 | 278 | // DCT-II 279 | FastDCT.dctII(n, frequencies); 280 | 281 | // 差分方程式 282 | for (let i = 0, j = 1; j < n; i = j++) { 283 | frequencies[i] += frequencies[j]; 284 | } 285 | } 286 | 287 | // 逆修正コサイン変換 288 | // n - 周波数配列数、2のべき乗である必要がある 289 | // samples - 2n個のサンプル配列、この配列が変換処理の出力先となる 290 | // frequencies - n個の周波数配列、この配列が変換処理の入力元となる 291 | static imdct(n, samples, frequencies) { 292 | // TODO 入力元である周波数配列を破壊してしまうので作業用バッファを用いるか、破壊して良い出力先のsamplesを作業用バッファとして用いる 293 | 294 | // cos値の変換用係数を掛け合わせ 295 | let rad = Math.PI / (n << 2); 296 | for (let i = 0; i < n; ++i) { 297 | frequencies[i] *= 2.0 * Math.cos(rad * ((i << 1) + 1)); 298 | } 299 | 300 | // DCT-II 301 | FastDCT.dctII(n, frequencies); 302 | 303 | // 差分方程式 304 | frequencies[0] *= 0.5; 305 | let i = 0, j = 1; 306 | let nh = n >> 1; 307 | for (; i < nh; i = j++) { 308 | frequencies[j] += (frequencies[i] = -frequencies[i]); 309 | } 310 | for (; j < n; i = j++) { 311 | frequencies[j] -= frequencies[i]; 312 | } 313 | 314 | // スケーリング 315 | for (let j = 0; j < n; ++j) { 316 | frequencies[j] /= n; 317 | } 318 | 319 | // データを分離 320 | let ns1 = n - 1; // n - 1 321 | let nd2 = n >> 1; // n / 2 322 | let nm3d4 = n + nd2; // n * 3 / 4 323 | let nm3d4s1 = nm3d4 - 1; // n * 3 / 4 - 1 324 | for (let i = 0; i < nd2; ++i) { 325 | samples[ns1 - i] = -(samples[i] = frequencies[nd2 + i]); 326 | samples[nm3d4 + i] = (samples[nm3d4s1 - i] = frequencies[i]); 327 | } 328 | } 329 | } 330 | -------------------------------------------------------------------------------- /ss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/redlily/training-webaudio-compression/2100a2508bddeb6ee55bc835d9472fca6e816a99/ss.png --------------------------------------------------------------------------------