├── .gitignore ├── library.properties ├── CMakeLists.txt ├── license.txt ├── adpcm-lib.h ├── README.md ├── adpcm-dns.c ├── adpcm-xq.c └── adpcm-lib.c /.gitignore: -------------------------------------------------------------------------------- 1 | build/ -------------------------------------------------------------------------------- /library.properties: -------------------------------------------------------------------------------- 1 | name=adpcm-xq 2 | version=0.3.0 3 | author=David Bryant 4 | maintainer=David Bryant 5 | sentence=ADPCM Codec 6 | paragraph=Encoding and decoding of IMA ADPCM 7 | category=Signal Input/Output 8 | url=https://github.com/drby/adpcm-xq 9 | architectures=* 10 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0...3.5) 2 | 3 | # set the project name 4 | project(adpcm-xq LANGUAGES C) 5 | 6 | include(CheckLibraryExists) 7 | 8 | if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID MATCHES "Clang") 9 | # lots of warnings and all warnings as errors 10 | add_compile_options(-Wall) 11 | # add_compile_options(-Wextra) 12 | endif() 13 | 14 | # define as library 15 | add_library (adpcm-lib STATIC adpcm-lib.c adpcm-dns.c) 16 | 17 | # build executable 18 | add_executable (adpcm-xq-exe adpcm-xq.c) 19 | set_property(TARGET adpcm-xq-exe PROPERTY OUTPUT_NAME adpcm-xq) 20 | 21 | check_library_exists(m pow "" HAVE_LIBM) 22 | if(HAVE_LIBM) 23 | target_link_libraries(adpcm-lib m) 24 | target_link_libraries(adpcm-xq-exe adpcm-lib) 25 | target_link_libraries(adpcm-xq-exe m) 26 | endif() 27 | 28 | # define location for header files 29 | target_include_directories(adpcm-xq-exe PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ) 30 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) David Bryant 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | * Neither the name of Conifer Software nor the names of its contributors 13 | may be used to endorse or promote products derived from this software 14 | without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR 20 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /adpcm-lib.h: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////// 2 | // **** ADPCM-XQ **** // 3 | // Xtreme Quality ADPCM Encoder/Decoder // 4 | // Copyright (c) 2024 David Bryant. // 5 | // All Rights Reserved. // 6 | // Distributed under the BSD Software License (see license.txt) // 7 | //////////////////////////////////////////////////////////////////////////// 8 | 9 | #ifndef ADPCMLIB_H_ 10 | #define ADPCMLIB_H_ 11 | 12 | #define NOISE_SHAPING_OFF 0 // flat noise (no shaping) 13 | #define NOISE_SHAPING_STATIC 0x100 // static 1st-order shaping (configurable, highpass default) 14 | #define NOISE_SHAPING_DYNAMIC 0x200 // dynamically tilted noise based on signal 15 | 16 | #define LOOKAHEAD_DEPTH 0x0ff // depth of search 17 | #define LOOKAHEAD_EXHAUSTIVE 0x800 // full breadth of search (all branches taken) 18 | #define LOOKAHEAD_NO_BRANCHING 0x400 // no branches taken (internal use only!) 19 | 20 | #if defined(_MSC_VER) && _MSC_VER < 1600 21 | typedef unsigned __int64 uint64_t; 22 | typedef unsigned __int32 uint32_t; 23 | typedef unsigned __int16 uint16_t; 24 | typedef unsigned __int8 uint8_t; 25 | typedef __int64 int64_t; 26 | typedef __int32 int32_t; 27 | typedef __int16 int16_t; 28 | typedef __int8 int8_t; 29 | #else 30 | #include 31 | #endif 32 | 33 | #ifdef __cplusplus 34 | extern "C" { 35 | #endif 36 | 37 | /* adpcm-lib.c */ 38 | 39 | int adpcm_sample_count_to_block_size (int sample_count, int num_chans, int bps); 40 | int adpcm_block_size_to_sample_count (int block_size, int num_chans, int bps); 41 | int adpcm_align_block_size (int block_size, int num_chans, int bps, int round_up); 42 | void *adpcm_create_context (int num_channels, int sample_rate, int lookahead, int noise_shaping); 43 | void adpcm_set_shaping_weight (void *p, double shaping_weight); 44 | int adpcm_encode_block_ex (void *p, uint8_t *outbuf, size_t *outbufsize, const int16_t *inbuf, int inbufcount, int bps); 45 | int adpcm_encode_block (void *p, uint8_t *outbuf, size_t *outbufsize, const int16_t *inbuf, int inbufcount); 46 | int adpcm_decode_block_ex (int16_t *outbuf, const uint8_t *inbuf, size_t inbufsize, int channels, int bps); 47 | int adpcm_decode_block (int16_t *outbuf, const uint8_t *inbuf, size_t inbufsize, int channels); 48 | void adpcm_free_context (void *p); 49 | 50 | /* adpcm-dns.c */ 51 | 52 | void generate_dns_values (const int16_t *samples, int sample_count, int num_chans, int sample_rate, 53 | int16_t *values, int16_t min_value, int16_t last_value); 54 | 55 | #ifdef __cplusplus 56 | } 57 | #endif 58 | 59 | 60 | #endif /* ADPCMLIB_H_ */ 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## ADPCM-XQ 2 | 3 | Xtreme Quality ADPCM Encoder/Decoder 4 | 5 | Copyright (c) 2024 David Bryant. 6 | 7 | All Rights Reserved. 8 | 9 | Distributed under the [BSD Software License](https://github.com/dbry/adpcm-xq/blob/master/license.txt). 10 | 11 | ## What is this? 12 | 13 | While very popular at the end of the last century, ADPCM is no longer a 14 | common audio encoding format, and is certainly not recommended as a general 15 | purpose encoder. However, it requires minimal CPU resources for decoding, 16 | and so still is ideally suited for certain embedded games and applications 17 | that contain canned audio samples. 18 | 19 | This encoder combines two different techniques to achieve higher quality 20 | than existing ADPCM encoders while remaining fully compatible with standard 21 | decoders. The first is dynamic noise shaping, which shifts the quantization 22 | noise up or down in frequency based on the spectrum of the source signal. 23 | This technique is identical to the algorithm used in WavPack's lossy mode 24 | and can make any audible quantization noise much less annoying (or, in some 25 | cases, inaudible). 26 | 27 | The other technique is "lookahead" in which the encoder exhaustively 28 | searches ahead to find the optimum coding sequence based on future samples. 29 | This process can reduce the quantization noise from about 1 to 10 dB (depending 30 | on the source) and also reduces or eliminates the harmonic content in the 31 | noise that sometimes plagues ADPCM. Unfortunately, at its maximum settings 32 | this can be very slow, but this should be relatively irrelevant if the 33 | encoder is being used to generate canned samples. 34 | 35 | **Adpcm-xq** consists of three standard C files and a header file ([adpcm-lib.h](adpcm-lib.h)). 36 | It can be used as a stand-alone command-line program implemented in [adpcm-xq.c](adpcm-xq.c), 37 | or the library, which consists of [adpcm-lib.c](adpcm-lib.c) and [adpcm-dns.c](adpcm-dns.c), 38 | can be built into and utilized by another application. The library portion has 39 | been designed with maximum portability in mind and should work correctly even 40 | on 16-bit and big-endian architectures. 41 | 42 | ## What's New? 43 | 44 | The latest version of **adpcm-xq** has many enhancements including greatly 45 | improved performance of the conversion and the ability to calculate and display 46 | the quantization noise introduced in the operation. 47 | [See all the details here.](https://github.com/dbry/adpcm-xq/releases/tag/v0.5) 48 | 49 | ## Variations 50 | 51 | There are several forms and variations of IMA ADPCM encoding. The one handled 52 | by **adpcm-xq** is the canonical one used in Microsoft WAV files. The audio is 53 | divided into fixed-sized blocks that include a 4-byte header (or 8-byte for 54 | stereo) that includes the first sample. The size of the blocks is stored in the 55 | WAV header and the audio nibbles are ordered least-significant temporally first. 56 | 57 | The latest version of adpcm-xq also includes 2-bit, 3-bit and 5-bit ADPCM. These 58 | are **not well-supported at all** (and some support is buggy) but they might be 59 | useful if this library is decoding them in situations where more compression 60 | or higher quality is desired. BTW, [Rockbox](https://www.rockbox.org/) provides 61 | excellent support for them! 62 | 63 | Some applications like games and consoles that decode IMA ADPCM in hardware or 64 | microcode use modified versions of IMA ADPCM that do not use headers and do not 65 | divide the audio into blocks or frames of any kind. The decoding parameters are 66 | simply initialized to zero and the audio nibbles continue uninterrupted to the 67 | end of the clip. I have created an experimental version that will generate two 68 | variations of this data. One is standard nibble order and the other is reversed 69 | nibble order (sometimes called **Intel/DVI4** or **ADP4** and is the format used in 70 | AIFF files). These formats are only writable as "raw" by the **adpcm-xq** command-line 71 | program because they are not representable in WAV files, and they cannot 72 | be decoded by the program either (for the same reason), however the library 73 | itself handles them. [The experimental branch is here.](https://github.com/dbry/adpcm-xq/commits/new-formats/) 74 | 75 | ## Building 76 | 77 | To build the command-line tool (**ADPCM-XQ**) on Linux: 78 | 79 | > $ gcc -O3 *.c -lm -o adpcm-xq 80 | 81 | on Darwin/Mac: 82 | 83 | > $ cmake -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" . ; make 84 | 85 | on MS Visual Studio: 86 | 87 | > C:\cl -O3 adpcm-xq.c adpcm-lib.c adpcm-dns.c 88 | 89 | ## Help 90 | 91 | ``` 92 | ADPCM-XQ Xtreme Quality IMA-ADPCM WAV Encoder / Decoder Version 0.5 93 | Copyright (c) 2024 David Bryant. All Rights Reserved. 94 | 95 | Usage: ADPCM-XQ [-options] infile.wav outfile.wav 96 | 97 | Operation: conversion is performed based on the type of the infile 98 | (either encode 16-bit PCM to 4-bit IMA-ADPCM or decode back) 99 | 100 | Options: -[0-16]= encode lookahead samples (default = 3, max = 16) 101 | -b = override auto block size, 2^n bytes (n = 8-15) 102 | -d = decode only (fail on WAV file already PCM) 103 | -e = encode only (fail on WAV file already ADPCM) 104 | -f = encode flat noise (no noise shaping, aka -s0.0) 105 | -h = display this help message 106 | -n = measure and report quantization noise 107 | -q = quiet mode (display errors only) 108 | -r = raw output (little-endian, no WAV header written) 109 | -s = override default noise shaping, (-1.0 < n <= 1.0) 110 | -v = verbose (display lots of info) 111 | -w = override default 4-bit ADPCM width (2 <= n <= 5) 112 | -x = exhaustive search (old behavior, very slow at depth) 113 | -y = overwrite outfile if it exists 114 | 115 | Web: Visit www.github.com/dbry/adpcm-xq for latest version and info 116 | 117 | ``` 118 | 119 | ## Caveats 120 | 121 | - Unknown RIFF chunk types are correctly parsed on input files, but are not 122 | passed to the output file. 123 | 124 | - In some situations, at high lookahead levels, the operation can get very slow 125 | or even seem to be stuck, however this will happen at much higher lookahead depths 126 | than before. The default level 3 should always be fine and then the user can 127 | simply try increasing levels until the time becomes untenable. The new quantization 128 | noise option (**-n**) can be used to determine if higher levels are providing 129 | improvement (lower numbers are better). Note that the flat noise option (**-f**) 130 | will provide the lowest *measured* noise, but the default dynamic noise shaping 131 | may provide *less audible* noise. 132 | 133 | - Pipes are not yet supported. 134 | -------------------------------------------------------------------------------- /adpcm-dns.c: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////// 2 | // **** ADPCM-XQ **** // 3 | // Xtreme Quality ADPCM Encoder/Decoder // 4 | // Copyright (c) 2024 David Bryant. // 5 | // All Rights Reserved. // 6 | // Distributed under the BSD Software License (see license.txt) // 7 | //////////////////////////////////////////////////////////////////////////// 8 | 9 | // adpcm-dns.c 10 | 11 | // This module handles the implementation of "dynamic noise shaping" which is 12 | // designed to move the spectrum of the quantization noise introduced by lossy 13 | // compression up or down in frequency so that it is more likely to be masked 14 | // by the source material. 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | #include "adpcm-lib.h" 21 | 22 | #define FILTER_LENGTH 15 23 | #define WINDOW_LENGTH 101 24 | #define MIN_BLOCK_SAMPLES 16 25 | 26 | static void win_average_buffer (float *samples, int sample_count, int half_width); 27 | 28 | // Generate the shaping values for the specified buffer of stereo or mono samples, 29 | // one shaping value output for each sample (or stereo pair of samples). This is 30 | // calculated by filtering the audio at fs/6 (7350 Hz at 44.1 kHz) and comparing 31 | // the averaged levels above and below that frequency. The output shaping values 32 | // are nominally in the range of +/-1024, with 1024 indicating first-order HF boost 33 | // shaping and -1024 for similar LF boost. However, since -1024 would result in 34 | // infinite DC boost (not useful) a "min_value" is passed in. An output value of 35 | // zero represents no noise shaping. For stereo input data the channels are summed 36 | // for the calculation and the output is still just mono. Note that at the ends of 37 | // the buffer the values diverge from true because not all the required source 38 | // samples are visible. Use this formula to calculate the number of samples 39 | // required for this process to "settle": 40 | // 41 | // int settle_distance = (WINDOW_LENGTH >> 1) + (FILTER_LENGTH >> 1) + 1; 42 | // 43 | // We also pass in a "last_value" so that we can smoothly interpolate from that 44 | // to the first calculated value during the initial "unknown" samples. This 45 | // reduces discontinuities. 46 | 47 | void generate_dns_values (const int16_t *samples, int sample_count, int num_chans, int sample_rate, 48 | int16_t *values, int16_t min_value, int16_t last_value) 49 | { 50 | float dB_offset = 7.3, dB_scaler = 64.0, max_dB, min_dB, max_ratio, min_ratio; 51 | int filtered_count = sample_count - FILTER_LENGTH + 1, i; 52 | float *low_freq, *high_freq; 53 | 54 | (void) sample_rate; // unused. 55 | 56 | memset (values, 0, sample_count * sizeof (values [0])); 57 | 58 | if (filtered_count <= 0) 59 | return; 60 | 61 | low_freq = malloc (filtered_count * sizeof (float)); 62 | high_freq = malloc (filtered_count * sizeof (float)); 63 | 64 | // First, directly calculate the lowpassed audio using the 15-tap filter. This is 65 | // a basic sinc with Hann windowing (for a fast transition) and because the filter 66 | // is set to exactly fs/6, some terms are zero (which we can skip). Also, because 67 | // it's linear-phase and has an odd number of terms, we can just subtract the LF 68 | // result from the original to get the HF values. 69 | 70 | if (num_chans == 1) 71 | for (i = 0; i < filtered_count; ++i, ++samples) { 72 | float filter_sum = 73 | ((int32_t) samples [0] + samples [14]) * 0.00150031 + 74 | ((int32_t) samples [2] + samples [12]) * -0.01703392 + 75 | ((int32_t) samples [3] + samples [11]) * -0.03449186 + 76 | ((int32_t) samples [5] + samples [ 9]) * 0.11776258 + 77 | ((int32_t) samples [6] + samples [ 8]) * 0.26543272 + 78 | (int32_t) samples [7] * 0.33366033; 79 | 80 | high_freq [i] = samples [FILTER_LENGTH >> 1] - filter_sum; 81 | low_freq [i] = filter_sum; 82 | } 83 | else 84 | for (i = 0; i < filtered_count; ++i, samples += 2) { 85 | float filter_sum = 86 | ((int32_t) samples [ 0] + samples [ 1] + samples [28] + samples [29]) * 0.00150031 + 87 | ((int32_t) samples [ 4] + samples [ 5] + samples [24] + samples [25]) * -0.01703392 + 88 | ((int32_t) samples [ 6] + samples [ 7] + samples [22] + samples [23]) * -0.03449186 + 89 | ((int32_t) samples [10] + samples [11] + samples [18] + samples [19]) * 0.11776258 + 90 | ((int32_t) samples [12] + samples [13] + samples [16] + samples [17]) * 0.26543272 + 91 | ((int32_t) samples [14] + samples [15]) * 0.33366033; 92 | 93 | high_freq [i] = samples [FILTER_LENGTH & ~1] + samples [FILTER_LENGTH] - filter_sum; 94 | low_freq [i] = filter_sum; 95 | } 96 | 97 | // Apply a simple first-order "delta" filter to the lowpass because frequencies below fs/6 98 | // become progressively less important for our purposes as the decorrelation filters make 99 | // those frequencies less and less relevant. Note that after all this filtering, the 100 | // magnitude level of the high frequency array will be 8.7 dB greater than the low frequency 101 | // array when the filters are presented with pure white noise (determined empirically). 102 | 103 | for (i = filtered_count - 1; i; --i) 104 | low_freq [i] -= low_freq [i - 1]; 105 | 106 | low_freq [0] = low_freq [1]; // simply duplicate for the "unknown" sample 107 | 108 | // Next we determine the averaged (absolute) levels for each sample using a box filter. 109 | 110 | win_average_buffer (low_freq, filtered_count, WINDOW_LENGTH >> 1); 111 | win_average_buffer (high_freq, filtered_count, WINDOW_LENGTH >> 1); 112 | 113 | // calculate the minimum and maximum ratios that won't be clipped so that we only 114 | // have to compute the logarithm when needed 115 | 116 | max_dB = 1024 / dB_scaler - dB_offset; 117 | min_dB = min_value / dB_scaler - dB_offset; 118 | max_ratio = pow (10.0, max_dB / 20.0); 119 | min_ratio = pow (10.0, min_dB / 20.0); 120 | 121 | for (i = 0; i < filtered_count; ++i) 122 | if (high_freq [i] > 1.0 && low_freq [i] > 1.0) { 123 | float ratio = high_freq [i] / low_freq [i]; 124 | int shaping_value; 125 | 126 | if (ratio >= max_ratio) 127 | shaping_value = 1024; 128 | else if (ratio <= min_ratio) 129 | shaping_value = min_value; 130 | else 131 | shaping_value = (int) floor ((log10 (ratio) * 20.0 + dB_offset) * dB_scaler + 0.5); 132 | 133 | values [i + (FILTER_LENGTH >> 1)] = shaping_value; 134 | } 135 | 136 | // interpolate the first 7 values from the supplied "last_value" to the first new value 137 | 138 | for (i = 0; i < FILTER_LENGTH >> 1; ++i) 139 | values [i] = 140 | ( 141 | (int32_t) values [FILTER_LENGTH >> 1] * (i + 1) + 142 | (int32_t) last_value * ((FILTER_LENGTH >> 1) - i) + 143 | (FILTER_LENGTH >> 2) 144 | ) / ((FILTER_LENGTH >> 1) + 1); 145 | 146 | // finally, copy the value at the end into the 7 final positions because unfortunately 147 | // we have no "next_value" to interpolate with 148 | 149 | for (i = filtered_count + (FILTER_LENGTH >> 1); i < sample_count; ++i) 150 | values [i] = values [(FILTER_LENGTH >> 1) + filtered_count - 1]; 151 | 152 | free (low_freq); 153 | free (high_freq); 154 | } 155 | 156 | // Given a buffer of floating values, apply a simple box filter of specified half width 157 | // (total filter width is always odd) to determine the averaged magnitude at each point. 158 | // For the ends, we use only the visible samples. 159 | 160 | static void win_average_buffer (float *samples, int sample_count, int half_width) 161 | { 162 | float *output = malloc (sample_count * sizeof (float)); 163 | double sum = 0.0; 164 | int m = 0, n = 0; 165 | int i, j, k; 166 | 167 | for (i = 0; i < sample_count; ++i) { 168 | k = i + half_width + 1; 169 | j = i - half_width; 170 | 171 | if (k > sample_count) k = sample_count; 172 | if (j < 0) j = 0; 173 | 174 | while (m < j) { 175 | if ((sum -= samples [m] * samples [m]) < 0.0) sum = 0.0; 176 | m++; 177 | } 178 | 179 | while (n < k) { 180 | sum += samples [n] * samples [n]; 181 | n++; 182 | } 183 | 184 | output [i] = sqrt (sum / (n - m)); 185 | } 186 | 187 | memcpy (samples, output, sample_count * sizeof (float)); 188 | free (output); 189 | } 190 | -------------------------------------------------------------------------------- /adpcm-xq.c: -------------------------------------------------------------------------------- 1 | #ifndef ARDUINO 2 | 3 | //////////////////////////////////////////////////////////////////////////// 4 | // **** ADPCM-XQ **** // 5 | // Xtreme Quality ADPCM Encoder/Decoder // 6 | // Copyright (c) 2024 David Bryant. // 7 | // All Rights Reserved. // 8 | // Distributed under the BSD Software License (see license.txt) // 9 | //////////////////////////////////////////////////////////////////////////// 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "adpcm-lib.h" 18 | 19 | // This runtime macro is not strictly needed because the code is endian-safe, 20 | // but including it improves performance on little-endian systems because we 21 | // can avoid a couple loops through the audio. 22 | #define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x0100) 23 | 24 | static const char *sign_on = "\n" 25 | " ADPCM-XQ Xtreme Quality IMA-ADPCM WAV Encoder / Decoder Version 0.5\n" 26 | " Copyright (c) 2024 David Bryant. All Rights Reserved.\n\n"; 27 | 28 | static const char *usage = 29 | " Usage: ADPCM-XQ [-options] infile.wav outfile.wav\n\n" 30 | " Operation: conversion is performed based on the type of the infile\n" 31 | " (either encode 16-bit PCM to 4-bit IMA-ADPCM or decode back)\n\n" 32 | " Options: -[0-16]= encode lookahead samples (default = 3, max = 16)\n" 33 | " -b = override auto block size, 2^n bytes (n = 8-15)\n" 34 | " -d = decode only (fail on WAV file already PCM)\n" 35 | " -e = encode only (fail on WAV file already ADPCM)\n" 36 | " -f = encode flat noise (no noise shaping, aka -s0.0)\n" 37 | " -h = display this help message\n" 38 | " -n = measure and report quantization noise\n" 39 | " -q = quiet mode (display errors only)\n" 40 | " -r = raw output (little-endian, no WAV header written)\n" 41 | " -s = override default noise shaping, (-1.0 < n <= 1.0)\n" 42 | " -v = verbose (display lots of info)\n" 43 | " -w = override default 4-bit ADPCM width (2 <= n <= 5)\n" 44 | " -x = exhaustive search (old behavior, very slow at depth)\n" 45 | " -y = overwrite outfile if it exists\n\n" 46 | " Web: Visit www.github.com/dbry/adpcm-xq for latest version and info\n\n"; 47 | 48 | #define ADPCM_FLAG_NOISE_SHAPING 0x1 49 | #define ADPCM_FLAG_RAW_OUTPUT 0x2 50 | #define ADPCM_FLAG_MEASURE_NOISE 0x4 51 | 52 | static double strtod_hexfree (const char *nptr, char **endptr); 53 | static int adpcm_converter (char *infilename, char *outfilename); 54 | static int verbosity = 0, decode_only = 0, encode_only = 0, flags = ADPCM_FLAG_NOISE_SHAPING; 55 | static int lookahead = 3, blocksize_pow2 = 0, encode_width_bits = 4; 56 | static double static_shaping_weight = 0.0; 57 | 58 | int main (int argc, char **argv) 59 | { 60 | int overwrite = 0, asked_help = 0; 61 | char *infilename = NULL, *outfilename = NULL; 62 | FILE *outfile; 63 | 64 | // if the name of the executable ends in "encoder" or "decoder", just do that function 65 | encode_only = argc && strstr (argv [0], "encoder") && strlen (strstr (argv [0], "encoder")) == strlen ("encoder"); 66 | decode_only = argc && strstr (argv [0], "decoder") && strlen (strstr (argv [0], "decoder")) == strlen ("decoder"); 67 | 68 | // loop through command-line arguments 69 | 70 | while (--argc) { 71 | #if defined (_WIN32) 72 | if ((**++argv == '-' || **argv == '/') && (*argv)[1]) 73 | #else 74 | if ((**++argv == '-') && (*argv)[1]) 75 | #endif 76 | while (*++*argv) 77 | switch (**argv) { 78 | 79 | case '0': case '1': case '2': 80 | case '3': case '4': case '5': 81 | case '6': case '7': case '8': 82 | case '9': 83 | lookahead = (strtol (*argv, argv, 10) & LOOKAHEAD_DEPTH) | (lookahead & LOOKAHEAD_EXHAUSTIVE); 84 | 85 | if ((lookahead & LOOKAHEAD_DEPTH) > 16) { 86 | fprintf (stderr, "\nlookahead must be 0 to 16!\n"); 87 | return -1; 88 | } 89 | 90 | --*argv; 91 | break; 92 | 93 | case 'B': case 'b': 94 | blocksize_pow2 = strtol (++*argv, argv, 10); 95 | 96 | if (blocksize_pow2 < 8 || blocksize_pow2 > 15) { 97 | fprintf (stderr, "\nblock size power must be 8 to 15!\n"); 98 | return -1; 99 | } 100 | 101 | --*argv; 102 | break; 103 | 104 | case 'D': case 'd': 105 | decode_only = 1; 106 | break; 107 | 108 | case 'E': case 'e': 109 | encode_only = 1; 110 | break; 111 | 112 | case 'F': case 'f': 113 | flags &= ~ADPCM_FLAG_NOISE_SHAPING; 114 | static_shaping_weight = 0.0; 115 | break; 116 | 117 | case 'H': case 'h': 118 | asked_help = 0; 119 | break; 120 | 121 | case 'N': case 'n': 122 | flags |= ADPCM_FLAG_MEASURE_NOISE; 123 | break; 124 | 125 | case 'Q': case 'q': 126 | verbosity = -1; 127 | break; 128 | 129 | case 'R': case 'r': 130 | flags |= ADPCM_FLAG_RAW_OUTPUT; 131 | break; 132 | 133 | case 'S': case 's': 134 | static_shaping_weight = (float) strtod_hexfree (++*argv, argv); 135 | 136 | if (static_shaping_weight <= -1.0 || static_shaping_weight > 1.0) { 137 | fprintf (stderr, "\ninvalid noise shaping value!"); 138 | return -1; 139 | } 140 | 141 | if (static_shaping_weight == 0.0) 142 | flags &= ~ADPCM_FLAG_NOISE_SHAPING; 143 | 144 | --*argv; 145 | break; 146 | 147 | case 'V': case 'v': 148 | verbosity = 1; 149 | break; 150 | 151 | case 'W': case 'w': 152 | encode_width_bits = strtol (++*argv, argv, 10); 153 | 154 | if (encode_width_bits < 2 || encode_width_bits > 5) { 155 | fprintf (stderr, "\nencoding width in bits must be 2 to 5!\n"); 156 | return -1; 157 | } 158 | 159 | --*argv; 160 | break; 161 | 162 | case 'X': case 'x': 163 | lookahead |= LOOKAHEAD_EXHAUSTIVE; 164 | break; 165 | 166 | case 'Y': case 'y': 167 | overwrite = 1; 168 | break; 169 | 170 | default: 171 | fprintf (stderr, "\nillegal option: %c !\n", **argv); 172 | return 1; 173 | } 174 | else if (!infilename) { 175 | infilename = malloc (strlen (*argv) + 10); 176 | strcpy (infilename, *argv); 177 | } 178 | else if (!outfilename) { 179 | outfilename = malloc (strlen (*argv) + 10); 180 | strcpy (outfilename, *argv); 181 | } 182 | else { 183 | fprintf (stderr, "\nextra unknown argument: %s !\n", *argv); 184 | return 1; 185 | } 186 | } 187 | 188 | if (verbosity >= 0) 189 | fprintf (stderr, "%s", sign_on); 190 | 191 | if (!outfilename || asked_help) { 192 | printf ("%s", usage); 193 | return 0; 194 | } 195 | 196 | if (!strcmp (infilename, outfilename)) { 197 | fprintf (stderr, "can't overwrite input file (specify different/new output file name)\n"); 198 | return -1; 199 | } 200 | 201 | if (!overwrite && (outfile = fopen (outfilename, "r"))) { 202 | fclose (outfile); 203 | fprintf (stderr, "output file \"%s\" exists (use -y to overwrite)\n", outfilename); 204 | return -1; 205 | } 206 | 207 | return adpcm_converter (infilename, outfilename); 208 | } 209 | 210 | // The C-standard function strtod() also handles hex numbers prefixed 211 | // with [+-]0[xX]. Unfortunately this causes problems for us in rare 212 | // cases where a value of zero is specified for one option followed 213 | // by the 'x' option (e.g., -s0xe). This version of strtod() does not 214 | // allow hex specification, but otherwise should be identical. 215 | 216 | static double strtod_hexfree (const char *nptr, char **endptr) 217 | { 218 | const char *sptr = nptr; 219 | 220 | // skip past any leading whitespace and possibly a sign 221 | while (isspace (*sptr)) sptr++; 222 | if (*sptr == '+' || *sptr == '-') sptr++; 223 | 224 | // if hex detected ("0x" or "0X"), return 0.0 and end at the X 225 | if (*sptr == '0' && tolower (sptr [1]) == 'x') { 226 | if (endptr) *endptr = (char *) sptr + 1; 227 | return 0.0; 228 | } 229 | 230 | // otherwise unmodified strtod() result 231 | return strtod (nptr, endptr); 232 | } 233 | 234 | typedef struct { 235 | char ckID [4]; 236 | uint32_t ckSize; 237 | char formType [4]; 238 | } RiffChunkHeader; 239 | 240 | typedef struct { 241 | char ckID [4]; 242 | uint32_t ckSize; 243 | } ChunkHeader; 244 | 245 | #define ChunkHeaderFormat "4L" 246 | 247 | typedef struct { 248 | uint16_t FormatTag, NumChannels; 249 | uint32_t SampleRate, BytesPerSecond; 250 | uint16_t BlockAlign, BitsPerSample; 251 | uint16_t cbSize; 252 | union { 253 | uint16_t ValidBitsPerSample; 254 | uint16_t SamplesPerBlock; 255 | uint16_t Reserved; 256 | } Samples; 257 | int32_t ChannelMask; 258 | uint16_t SubFormat; 259 | char GUID [14]; 260 | } WaveHeader; 261 | 262 | #define WaveHeaderFormat "SSLLSSSSLS" 263 | 264 | typedef struct { 265 | char ckID [4]; 266 | uint32_t ckSize; 267 | uint32_t TotalSamples; 268 | } FactHeader; 269 | 270 | #define FactHeaderFormat "4LL" 271 | 272 | #define WAVE_FORMAT_PCM 0x1 273 | #define WAVE_FORMAT_IMA_ADPCM 0x11 274 | #define WAVE_FORMAT_EXTENSIBLE 0xfffe 275 | 276 | #define SNAP_NEAREST_POW2(v) { int d = 0; while ((v) & ((v) - 1)) (v) += (d = (~d >> 31 | 1) - d); } 277 | 278 | static int write_pcm_wav_header (FILE *outfile, int num_channels, uint32_t num_samples, uint32_t sample_rate); 279 | static int write_adpcm_wav_header (FILE *outfile, int num_channels, int bps, uint32_t num_samples, uint32_t sample_rate, int samples_per_block); 280 | static int adpcm_decode_data (FILE *infile, FILE *outfile, int num_channels, int bps, uint32_t num_samples, int block_size); 281 | static int adpcm_encode_data (FILE *infile, FILE *outfile, int num_channels, int bps, uint32_t num_samples, int samples_per_block, int sample_rate); 282 | static void little_endian_to_native (void *data, char *format); 283 | static void native_to_little_endian (void *data, char *format); 284 | 285 | static int adpcm_converter (char *infilename, char *outfilename) 286 | { 287 | int format = 0, res = 0, bits_per_sample = 0, num_channels = 0; 288 | uint32_t fact_samples = 0, num_samples = 0, sample_rate = 0; 289 | FILE *infile, *outfile; 290 | RiffChunkHeader riff_chunk_header; 291 | ChunkHeader chunk_header; 292 | WaveHeader WaveHeader; 293 | 294 | if (!(infile = fopen (infilename, "rb"))) { 295 | fprintf (stderr, "can't open file \"%s\" for reading!\n", infilename); 296 | return -1; 297 | } 298 | 299 | // read initial RIFF form header 300 | 301 | if (!fread (&riff_chunk_header, sizeof (RiffChunkHeader), 1, infile) || 302 | strncmp (riff_chunk_header.ckID, "RIFF", 4) || 303 | strncmp (riff_chunk_header.formType, "WAVE", 4)) { 304 | fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename); 305 | return -1; 306 | } 307 | 308 | // loop through all elements of the RIFF wav header (until the data chuck) 309 | 310 | while (1) { 311 | 312 | if (!fread (&chunk_header, sizeof (ChunkHeader), 1, infile)) { 313 | fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename); 314 | return -1; 315 | } 316 | 317 | little_endian_to_native (&chunk_header, ChunkHeaderFormat); 318 | 319 | // if it's the format chunk, we want to get some info out of there and 320 | // make sure it's a .wav file we can handle 321 | 322 | if (!strncmp (chunk_header.ckID, "fmt ", 4)) { 323 | int supported = 1; 324 | 325 | if (chunk_header.ckSize < 16 || chunk_header.ckSize > sizeof (WaveHeader) || 326 | !fread (&WaveHeader, chunk_header.ckSize, 1, infile)) { 327 | fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename); 328 | return -1; 329 | } 330 | 331 | little_endian_to_native (&WaveHeader, WaveHeaderFormat); 332 | 333 | format = (WaveHeader.FormatTag == WAVE_FORMAT_EXTENSIBLE && chunk_header.ckSize == 40) ? 334 | WaveHeader.SubFormat : WaveHeader.FormatTag; 335 | 336 | bits_per_sample = (chunk_header.ckSize == 40 && WaveHeader.Samples.ValidBitsPerSample) ? 337 | WaveHeader.Samples.ValidBitsPerSample : WaveHeader.BitsPerSample; 338 | 339 | if (WaveHeader.NumChannels < 1 || WaveHeader.NumChannels > 2) 340 | supported = 0; 341 | else if (format == WAVE_FORMAT_PCM) { 342 | if (decode_only) { 343 | fprintf (stderr, "\"%s\" is PCM .WAV file, invalid in decode-only mode!\n", infilename); 344 | return -1; 345 | } 346 | 347 | if (bits_per_sample < 9 || bits_per_sample > 16) 348 | supported = 0; 349 | 350 | if (WaveHeader.BlockAlign != WaveHeader.NumChannels * 2) 351 | supported = 0; 352 | } 353 | else if (format == WAVE_FORMAT_IMA_ADPCM) { 354 | if (encode_only) { 355 | fprintf (stderr, "\"%s\" is ADPCM .WAV file, invalid in encode-only mode!\n", infilename); 356 | return -1; 357 | } 358 | 359 | if (verbosity > 0) 360 | fprintf (stderr, "%d-bit ADPCM, %d channels, %d samples/block, %d-byte block alignment\n", 361 | bits_per_sample, WaveHeader.NumChannels, WaveHeader.Samples.SamplesPerBlock, WaveHeader.BlockAlign); 362 | 363 | if (bits_per_sample < 2 || bits_per_sample > 5) 364 | supported = 0; 365 | 366 | if (WaveHeader.Samples.SamplesPerBlock > 367 | adpcm_block_size_to_sample_count (WaveHeader.BlockAlign, WaveHeader.NumChannels, bits_per_sample)) { 368 | fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename); 369 | return -1; 370 | } 371 | } 372 | else 373 | supported = 0; 374 | 375 | if (!supported) { 376 | fprintf (stderr, "\"%s\" is an unsupported .WAV format!\n", infilename); 377 | return -1; 378 | } 379 | 380 | if (verbosity > 0) { 381 | fprintf (stderr, "format tag size = %d\n", chunk_header.ckSize); 382 | fprintf (stderr, "FormatTag = 0x%x, NumChannels = %u, BitsPerSample = %u\n", 383 | WaveHeader.FormatTag, WaveHeader.NumChannels, WaveHeader.BitsPerSample); 384 | fprintf (stderr, "BlockAlign = %u, SampleRate = %lu, BytesPerSecond = %lu\n", 385 | WaveHeader.BlockAlign, (unsigned long) WaveHeader.SampleRate, (unsigned long) WaveHeader.BytesPerSecond); 386 | 387 | if (chunk_header.ckSize > 16) { 388 | if (format == WAVE_FORMAT_PCM) 389 | fprintf (stderr, "cbSize = %d, ValidBitsPerSample = %d\n", WaveHeader.cbSize, 390 | WaveHeader.Samples.ValidBitsPerSample); 391 | else if (format == WAVE_FORMAT_IMA_ADPCM) 392 | fprintf (stderr, "cbSize = %d, SamplesPerBlock = %d\n", WaveHeader.cbSize, 393 | WaveHeader.Samples.SamplesPerBlock); 394 | } 395 | 396 | if (chunk_header.ckSize > 20) 397 | fprintf (stderr, "ChannelMask = %x, SubFormat = %d\n", 398 | WaveHeader.ChannelMask, WaveHeader.SubFormat); 399 | } 400 | } 401 | else if (!strncmp (chunk_header.ckID, "fact", 4)) { 402 | 403 | if (chunk_header.ckSize < 4 || !fread (&fact_samples, sizeof (fact_samples), 1, infile)) { 404 | fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename); 405 | return -1; 406 | } 407 | 408 | little_endian_to_native (&fact_samples, "L"); 409 | 410 | if (chunk_header.ckSize > 4) { 411 | int bytes_to_skip = chunk_header.ckSize - 4; 412 | char dummy; 413 | 414 | while (bytes_to_skip--) 415 | if (!fread (&dummy, 1, 1, infile)) { 416 | fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename); 417 | return -1; 418 | } 419 | } 420 | } 421 | else if (!strncmp (chunk_header.ckID, "data", 4)) { 422 | 423 | // on the data chunk, get size and exit parsing loop 424 | 425 | if (!WaveHeader.NumChannels) { // make sure we saw a "fmt" chunk... 426 | fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename); 427 | return -1; 428 | } 429 | 430 | if (!chunk_header.ckSize) { 431 | fprintf (stderr, "this .WAV file has no audio samples, probably is corrupt!\n"); 432 | return -1; 433 | } 434 | 435 | if (format == WAVE_FORMAT_PCM) { 436 | if (chunk_header.ckSize % WaveHeader.BlockAlign) { 437 | fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename); 438 | return -1; 439 | } 440 | 441 | num_samples = chunk_header.ckSize / WaveHeader.BlockAlign; 442 | } 443 | else { 444 | uint32_t complete_blocks = chunk_header.ckSize / WaveHeader.BlockAlign; 445 | int leftover_bytes = chunk_header.ckSize % WaveHeader.BlockAlign; 446 | int samples_last_block; 447 | 448 | num_samples = complete_blocks * WaveHeader.Samples.SamplesPerBlock; 449 | 450 | if (leftover_bytes) { 451 | if (leftover_bytes % (WaveHeader.NumChannels * 4)) { 452 | fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename); 453 | return -1; 454 | } 455 | 456 | if (verbosity > 0) fprintf (stderr, "data chunk has %d bytes left over for final ADPCM block\n", leftover_bytes); 457 | samples_last_block = ((leftover_bytes - (WaveHeader.NumChannels * 4)) * 8) / (bits_per_sample * WaveHeader.NumChannels) + 1; 458 | num_samples += samples_last_block; 459 | } 460 | else 461 | samples_last_block = WaveHeader.Samples.SamplesPerBlock; 462 | 463 | if (fact_samples) { 464 | if (fact_samples < num_samples && fact_samples > num_samples - samples_last_block) { 465 | if (verbosity > 0) fprintf (stderr, "total samples reduced %lu by FACT chunk\n", (unsigned long) (num_samples - fact_samples)); 466 | num_samples = fact_samples; 467 | } 468 | else if (WaveHeader.NumChannels == 2 && (fact_samples >>= 1) < num_samples && fact_samples > num_samples - samples_last_block) { 469 | if (verbosity > 0) fprintf (stderr, "num samples reduced %lu by [incorrect] FACT chunk\n", (unsigned long) (num_samples - fact_samples)); 470 | num_samples = fact_samples; 471 | } 472 | } 473 | } 474 | 475 | if (!num_samples) { 476 | fprintf (stderr, "this .WAV file has no audio samples, probably is corrupt!\n"); 477 | return -1; 478 | } 479 | 480 | if (verbosity > 0) 481 | fprintf (stderr, "num samples = %lu\n", (unsigned long) num_samples); 482 | 483 | num_channels = WaveHeader.NumChannels; 484 | sample_rate = WaveHeader.SampleRate; 485 | break; 486 | } 487 | else { // just ignore unknown chunks 488 | int bytes_to_eat = (chunk_header.ckSize + 1) & ~1L; 489 | char dummy; 490 | 491 | if (verbosity > 0) 492 | fprintf (stderr, "extra unknown chunk \"%c%c%c%c\" of %d bytes\n", 493 | chunk_header.ckID [0], chunk_header.ckID [1], chunk_header.ckID [2], 494 | chunk_header.ckID [3], chunk_header.ckSize); 495 | 496 | while (bytes_to_eat--) 497 | if (!fread (&dummy, 1, 1, infile)) { 498 | fprintf (stderr, "\"%s\" is not a valid .WAV file!\n", infilename); 499 | return -1; 500 | } 501 | } 502 | } 503 | 504 | if (!(outfile = fopen (outfilename, "wb"))) { 505 | fprintf (stderr, "can't open file \"%s\" for writing!\n", outfilename); 506 | return -1; 507 | } 508 | 509 | if (format == WAVE_FORMAT_PCM) { 510 | int block_size, samples_per_block; 511 | 512 | if (blocksize_pow2) 513 | block_size = 1 << blocksize_pow2; 514 | else 515 | block_size = 256 * num_channels * (sample_rate < 11000 ? 1 : sample_rate / 11000); 516 | 517 | SNAP_NEAREST_POW2 (block_size); // for "middling" sample rates, snap to nearest power of two 518 | block_size = adpcm_align_block_size (block_size, num_channels, encode_width_bits, 0); 519 | samples_per_block = adpcm_block_size_to_sample_count (block_size, num_channels, encode_width_bits); 520 | 521 | if (verbosity > 0) 522 | fprintf (stderr, "each %d byte ADPCM block will contain %d samples * %d channels\n", 523 | block_size, samples_per_block, num_channels); 524 | 525 | if (!(flags & ADPCM_FLAG_RAW_OUTPUT) && !write_adpcm_wav_header (outfile, num_channels, encode_width_bits, num_samples, sample_rate, samples_per_block)) { 526 | fprintf (stderr, "can't write header to file \"%s\" !\n", outfilename); 527 | return -1; 528 | } 529 | 530 | if (verbosity >= 0) fprintf (stderr, "encoding PCM file \"%s\" to%sADPCM file \"%s\"...\n", 531 | infilename, (flags & ADPCM_FLAG_RAW_OUTPUT) ? " raw " : " ", outfilename); 532 | 533 | res = adpcm_encode_data (infile, outfile, num_channels, encode_width_bits, num_samples, samples_per_block, sample_rate); 534 | } 535 | else if (format == WAVE_FORMAT_IMA_ADPCM) { 536 | if (!(flags & ADPCM_FLAG_RAW_OUTPUT) && !write_pcm_wav_header (outfile, num_channels, num_samples, sample_rate)) { 537 | fprintf (stderr, "can't write header to file \"%s\" !\n", outfilename); 538 | return -1; 539 | } 540 | 541 | if (verbosity >= 0) fprintf (stderr, "decoding ADPCM file \"%s\" to%sPCM file \"%s\"...\n", 542 | infilename, (flags & ADPCM_FLAG_RAW_OUTPUT) ? " raw " : " ", outfilename); 543 | 544 | res = adpcm_decode_data (infile, outfile, num_channels, bits_per_sample, num_samples, WaveHeader.BlockAlign); 545 | } 546 | 547 | fclose (outfile); 548 | fclose (infile); 549 | return res; 550 | } 551 | 552 | static int write_pcm_wav_header (FILE *outfile, int num_channels, uint32_t num_samples, uint32_t sample_rate) 553 | { 554 | RiffChunkHeader riffhdr; 555 | ChunkHeader datahdr, fmthdr; 556 | WaveHeader wavhdr; 557 | 558 | int wavhdrsize = 16; 559 | int bytes_per_sample = 2; 560 | uint32_t total_data_bytes = num_samples * bytes_per_sample * num_channels; 561 | 562 | memset (&wavhdr, 0, sizeof (wavhdr)); 563 | 564 | wavhdr.FormatTag = WAVE_FORMAT_PCM; 565 | wavhdr.NumChannels = num_channels; 566 | wavhdr.SampleRate = sample_rate; 567 | wavhdr.BytesPerSecond = sample_rate * num_channels * bytes_per_sample; 568 | wavhdr.BlockAlign = bytes_per_sample * num_channels; 569 | wavhdr.BitsPerSample = 16; 570 | 571 | memcpy (riffhdr.ckID, "RIFF", sizeof (riffhdr.ckID)); 572 | memcpy (riffhdr.formType, "WAVE", sizeof (riffhdr.formType)); 573 | riffhdr.ckSize = sizeof (riffhdr) + wavhdrsize + sizeof (datahdr) + total_data_bytes; 574 | memcpy (fmthdr.ckID, "fmt ", sizeof (fmthdr.ckID)); 575 | fmthdr.ckSize = wavhdrsize; 576 | 577 | memcpy (datahdr.ckID, "data", sizeof (datahdr.ckID)); 578 | datahdr.ckSize = total_data_bytes; 579 | 580 | // write the RIFF chunks up to just before the data starts 581 | 582 | native_to_little_endian (&riffhdr, ChunkHeaderFormat); 583 | native_to_little_endian (&fmthdr, ChunkHeaderFormat); 584 | native_to_little_endian (&wavhdr, WaveHeaderFormat); 585 | native_to_little_endian (&datahdr, ChunkHeaderFormat); 586 | 587 | return fwrite (&riffhdr, sizeof (riffhdr), 1, outfile) && 588 | fwrite (&fmthdr, sizeof (fmthdr), 1, outfile) && 589 | fwrite (&wavhdr, wavhdrsize, 1, outfile) && 590 | fwrite (&datahdr, sizeof (datahdr), 1, outfile); 591 | } 592 | 593 | static int write_adpcm_wav_header (FILE *outfile, int num_channels, int bps, uint32_t num_samples, uint32_t sample_rate, int samples_per_block) 594 | { 595 | RiffChunkHeader riffhdr; 596 | ChunkHeader datahdr, fmthdr; 597 | WaveHeader wavhdr; 598 | FactHeader facthdr; 599 | 600 | int wavhdrsize = 20; 601 | int block_size = adpcm_sample_count_to_block_size (samples_per_block, num_channels, bps); 602 | uint32_t num_blocks = num_samples / samples_per_block; 603 | int leftover_samples = num_samples % samples_per_block; 604 | uint32_t total_data_bytes = num_blocks * block_size; 605 | 606 | if (leftover_samples) 607 | total_data_bytes += adpcm_align_block_size (adpcm_sample_count_to_block_size (leftover_samples, num_channels, bps), num_channels, bps, 1); 608 | 609 | memset (&wavhdr, 0, sizeof (wavhdr)); 610 | 611 | wavhdr.FormatTag = WAVE_FORMAT_IMA_ADPCM; 612 | wavhdr.NumChannels = num_channels; 613 | wavhdr.SampleRate = sample_rate; 614 | wavhdr.BytesPerSecond = sample_rate * block_size / samples_per_block; 615 | wavhdr.BlockAlign = block_size; 616 | wavhdr.BitsPerSample = bps; 617 | wavhdr.cbSize = 2; 618 | wavhdr.Samples.SamplesPerBlock = samples_per_block; 619 | 620 | memcpy (riffhdr.ckID, "RIFF", sizeof (riffhdr.ckID)); 621 | memcpy (riffhdr.formType, "WAVE", sizeof (riffhdr.formType)); 622 | riffhdr.ckSize = sizeof (riffhdr) + wavhdrsize + sizeof (facthdr) + sizeof (datahdr) + total_data_bytes; 623 | memcpy (fmthdr.ckID, "fmt ", sizeof (fmthdr.ckID)); 624 | fmthdr.ckSize = wavhdrsize; 625 | memcpy (facthdr.ckID, "fact", sizeof (facthdr.ckID)); 626 | facthdr.TotalSamples = num_samples; 627 | facthdr.ckSize = 4; 628 | 629 | memcpy (datahdr.ckID, "data", sizeof (datahdr.ckID)); 630 | datahdr.ckSize = total_data_bytes; 631 | 632 | // write the RIFF chunks up to just before the data starts 633 | 634 | native_to_little_endian (&riffhdr, ChunkHeaderFormat); 635 | native_to_little_endian (&fmthdr, ChunkHeaderFormat); 636 | native_to_little_endian (&wavhdr, WaveHeaderFormat); 637 | native_to_little_endian (&facthdr, FactHeaderFormat); 638 | native_to_little_endian (&datahdr, ChunkHeaderFormat); 639 | 640 | return fwrite (&riffhdr, sizeof (riffhdr), 1, outfile) && 641 | fwrite (&fmthdr, sizeof (fmthdr), 1, outfile) && 642 | fwrite (&wavhdr, wavhdrsize, 1, outfile) && 643 | fwrite (&facthdr, sizeof (facthdr), 1, outfile) && 644 | fwrite (&datahdr, sizeof (datahdr), 1, outfile); 645 | } 646 | 647 | static int adpcm_decode_data (FILE *infile, FILE *outfile, int num_channels, int bits_per_sample, uint32_t num_samples, int block_size) 648 | { 649 | int samples_per_block = adpcm_block_size_to_sample_count (block_size, num_channels, bits_per_sample), percent; 650 | void *pcm_block = malloc (samples_per_block * num_channels * 2); 651 | void *adpcm_block = malloc (block_size); 652 | uint32_t progress_divider = 0; 653 | 654 | if (!pcm_block || !adpcm_block) { 655 | fprintf (stderr, "could not allocate memory for buffers!\n"); 656 | return -1; 657 | } 658 | 659 | if (verbosity >= 0 && num_samples > 1000) { 660 | progress_divider = (num_samples + 50) / 100; 661 | fprintf (stderr, "\rprogress: %d%% ", percent = 0); 662 | fflush (stderr); 663 | } 664 | 665 | while (num_samples) { 666 | int this_block_adpcm_samples = samples_per_block; 667 | int this_block_pcm_samples = samples_per_block; 668 | 669 | if (this_block_adpcm_samples > (int) num_samples) { 670 | block_size = adpcm_sample_count_to_block_size (num_samples, num_channels, bits_per_sample); 671 | this_block_adpcm_samples = adpcm_block_size_to_sample_count (block_size, num_channels, bits_per_sample); 672 | this_block_pcm_samples = num_samples; 673 | 674 | if (verbosity > 0) 675 | fprintf (stderr, "\rfinal block decodes %d (of %d) samples in %d-byte block\n", 676 | num_samples, this_block_adpcm_samples, block_size); 677 | } 678 | 679 | if (!fread (adpcm_block, block_size, 1, infile)) { 680 | fprintf (stderr, "could not read all audio data from input file!\n"); 681 | return -1; 682 | } 683 | 684 | if (adpcm_decode_block_ex (pcm_block, adpcm_block, block_size, num_channels, bits_per_sample) != this_block_adpcm_samples) { 685 | fprintf (stderr, "adpcm_decode_block_ex() did not return expected value!\n"); 686 | return -1; 687 | } 688 | 689 | if (IS_BIG_ENDIAN) { 690 | int scount = this_block_pcm_samples * num_channels; 691 | unsigned char *cp = (unsigned char *) pcm_block; 692 | 693 | while (scount--) { 694 | int16_t temp = * (int16_t *) cp; 695 | *cp++ = (unsigned char) temp; 696 | *cp++ = (unsigned char) (temp >> 8); 697 | } 698 | } 699 | 700 | if (!fwrite (pcm_block, this_block_pcm_samples * num_channels * 2, 1, outfile)) { 701 | fprintf (stderr, "could not write all audio data to output file!\n"); 702 | return -1; 703 | } 704 | 705 | num_samples -= this_block_pcm_samples; 706 | 707 | if (progress_divider) { 708 | int new_percent = 100 - num_samples / progress_divider; 709 | 710 | if (new_percent != percent) { 711 | fprintf (stderr, "\rprogress: %d%% ", percent = new_percent); 712 | fflush (stderr); 713 | } 714 | } 715 | } 716 | 717 | if (verbosity >= 0) 718 | fprintf (stderr, "\r...completed successfully\n"); 719 | 720 | free (adpcm_block); 721 | free (pcm_block); 722 | return 0; 723 | } 724 | 725 | static int adpcm_encode_data (FILE *infile, FILE *outfile, int num_channels, int bps, uint32_t num_samples, int samples_per_block, int sample_rate) 726 | { 727 | int block_size = adpcm_sample_count_to_block_size (samples_per_block, num_channels, bps), percent, noise_shaping; 728 | int16_t *pcm_block = malloc (samples_per_block * num_channels * 2); 729 | void *adpcm_block = malloc (block_size); 730 | uint32_t progress_divider = 0; 731 | void *adpcm_cnxt = NULL; 732 | 733 | double rms_noise_total [2] = { 0.0, 0.0 }; 734 | double rms_noise_peak [2] = { 0.0, 0.0 }; 735 | int32_t max_error [2] = { 0, 0 }; 736 | uint32_t noise_samples = 0; 737 | 738 | if (!pcm_block || !adpcm_block) { 739 | fprintf (stderr, "could not allocate memory for buffers!\n"); 740 | return -1; 741 | } 742 | 743 | if (verbosity >= 0 && num_samples > 1000) { 744 | progress_divider = (num_samples + 50) / 100; 745 | fprintf (stderr, "\rprogress: %d%% ", percent = 0); 746 | fflush (stderr); 747 | } 748 | 749 | if (flags & ADPCM_FLAG_NOISE_SHAPING) { 750 | if (static_shaping_weight != 0.0) 751 | noise_shaping = NOISE_SHAPING_STATIC; 752 | else if (sample_rate > 64000) { 753 | noise_shaping = NOISE_SHAPING_STATIC; 754 | static_shaping_weight = 1.0; 755 | } 756 | else 757 | noise_shaping = NOISE_SHAPING_DYNAMIC; 758 | } 759 | else 760 | noise_shaping = NOISE_SHAPING_OFF; 761 | 762 | adpcm_cnxt = adpcm_create_context (num_channels, sample_rate, lookahead, noise_shaping); 763 | 764 | if (!adpcm_cnxt) { 765 | fprintf (stderr, "could not create ADPCM context!\n"); 766 | return -1; 767 | } 768 | 769 | if (noise_shaping == NOISE_SHAPING_STATIC) 770 | adpcm_set_shaping_weight (adpcm_cnxt, static_shaping_weight); 771 | 772 | while (num_samples) { 773 | int this_block_adpcm_samples = samples_per_block; 774 | int this_block_pcm_samples = samples_per_block; 775 | size_t num_bytes; 776 | 777 | if (this_block_pcm_samples > (int) num_samples) { 778 | block_size = adpcm_align_block_size (adpcm_sample_count_to_block_size (num_samples, num_channels, bps), num_channels, bps, 1); 779 | this_block_adpcm_samples = adpcm_block_size_to_sample_count (block_size, num_channels, bps); 780 | this_block_pcm_samples = num_samples; 781 | 782 | if (verbosity > 0) 783 | fprintf (stderr, "\rfinal block encodes %d (of %d) samples in %d-byte block\n", 784 | num_samples, this_block_adpcm_samples, block_size); 785 | } 786 | 787 | if (!fread (pcm_block, this_block_pcm_samples * num_channels * 2, 1, infile)) { 788 | fprintf (stderr, "\rcould not read all audio data from input file!\n"); 789 | return -1; 790 | } 791 | 792 | if (IS_BIG_ENDIAN) { 793 | int scount = this_block_pcm_samples * num_channels; 794 | unsigned char *cp = (unsigned char *) pcm_block; 795 | 796 | while (scount--) { 797 | int16_t temp = cp [0] + (cp [1] << 8); 798 | * (int16_t *) cp = temp; 799 | cp += 2; 800 | } 801 | } 802 | 803 | // if this is the last block and it's not full, duplicate the last sample(s) so we don't 804 | // create problems for the lookahead 805 | 806 | if (this_block_adpcm_samples > this_block_pcm_samples) { 807 | int16_t *dst = pcm_block + this_block_pcm_samples * num_channels, *src = dst - num_channels; 808 | int dups = (this_block_adpcm_samples - this_block_pcm_samples) * num_channels; 809 | 810 | while (dups--) 811 | *dst++ = *src++; 812 | } 813 | 814 | adpcm_encode_block_ex (adpcm_cnxt, adpcm_block, &num_bytes, pcm_block, this_block_adpcm_samples, bps); 815 | 816 | if ((int) num_bytes != block_size) { 817 | fprintf (stderr, "\radpcm_encode_block_ex() did not return expected value (expected %d, got %d)!\n", block_size, (int) num_bytes); 818 | return -1; 819 | } 820 | 821 | if (flags & ADPCM_FLAG_MEASURE_NOISE) { 822 | int16_t *pcm_decoded = malloc (samples_per_block * num_channels * 2); 823 | double rms_noise [2] = { 0.0, 0.0 }; 824 | int i; 825 | 826 | if (adpcm_decode_block_ex (pcm_decoded, adpcm_block, block_size, num_channels, bps) != this_block_adpcm_samples) { 827 | fprintf (stderr, "\radpcm_decode_block_ex() did not return expected value!\n"); 828 | return -1; 829 | } 830 | 831 | for (i = 0; i < this_block_pcm_samples * num_channels; i += num_channels) { 832 | int32_t error = abs (pcm_block [i] - pcm_decoded [i]); 833 | 834 | if (error > max_error [0]) 835 | max_error [0] = error; 836 | 837 | rms_noise [0] += (double) error * error; 838 | 839 | if (num_channels == 2) { 840 | error = abs (pcm_block [i+1] - pcm_decoded [i+1]); 841 | 842 | if (error > max_error [1]) 843 | max_error [1] = error; 844 | 845 | rms_noise [1] += (double) error * error; 846 | } 847 | } 848 | 849 | noise_samples += this_block_pcm_samples; 850 | rms_noise_total [0] += rms_noise [0]; 851 | rms_noise_total [1] += rms_noise [1]; 852 | 853 | if (rms_noise [0] / this_block_pcm_samples > rms_noise_peak [0]) 854 | rms_noise_peak [0] = rms_noise [0] / this_block_pcm_samples; 855 | 856 | if (rms_noise [1] / this_block_pcm_samples > rms_noise_peak [1]) 857 | rms_noise_peak [1] = rms_noise [1] / this_block_pcm_samples; 858 | 859 | free (pcm_decoded); 860 | } 861 | 862 | if (!fwrite (adpcm_block, block_size, 1, outfile)) { 863 | fprintf (stderr, "\rcould not write all audio data to output file!\n"); 864 | return -1; 865 | } 866 | 867 | num_samples -= this_block_pcm_samples; 868 | 869 | if (progress_divider) { 870 | int new_percent = 100 - num_samples / progress_divider; 871 | 872 | if (new_percent != percent) { 873 | fprintf (stderr, "\rprogress: %d%% ", percent = new_percent); 874 | fflush (stderr); 875 | } 876 | } 877 | } 878 | 879 | if (verbosity >= 0) 880 | fprintf (stderr, "\r...completed successfully\n"); 881 | 882 | if (flags & ADPCM_FLAG_MEASURE_NOISE) { 883 | double full_scale_rms = 32768.0 * 32767.0 * 0.5; 884 | 885 | if (num_channels == 2) { 886 | rms_noise_total [0] /= noise_samples * full_scale_rms; 887 | rms_noise_total [1] /= noise_samples * full_scale_rms; 888 | rms_noise_peak [0] /= full_scale_rms; 889 | rms_noise_peak [1] /= full_scale_rms; 890 | 891 | fprintf (stderr, "\n Channel: left right \n"); 892 | fprintf (stderr, "---------------------------------------\n"); 893 | fprintf (stderr, "Max Sample Error: %6ld %6ld\n", (long) max_error [0], (long) max_error [1]); 894 | fprintf (stderr, " RMS Total Noise: %6.2f dB %6.2f dB\n", log10 (rms_noise_total [0]) * 10.0, log10 (rms_noise_total [1]) * 10.0); 895 | fprintf (stderr, " RMS Peak Noise: %6.2f dB %6.2f dB\n\n", log10 (rms_noise_peak [0]) * 10.0, log10 (rms_noise_peak [1]) * 10.0); 896 | } 897 | else { 898 | rms_noise_total [0] /= noise_samples * full_scale_rms; 899 | rms_noise_peak [0] /= full_scale_rms; 900 | 901 | fprintf (stderr, "\nMax Sample Error: %6ld\n", (long) max_error [0]); 902 | fprintf (stderr, " RMS Total Noise: %6.2f dB\n", log10 (rms_noise_total [0]) * 10.0); 903 | fprintf (stderr, " RMS Peak Noise: %6.2f dB\n\n", log10 (rms_noise_peak [0]) * 10.0); 904 | } 905 | } 906 | 907 | adpcm_free_context (adpcm_cnxt); 908 | free (adpcm_block); 909 | free (pcm_block); 910 | return 0; 911 | } 912 | 913 | static void little_endian_to_native (void *data, char *format) 914 | { 915 | unsigned char *cp = (unsigned char *) data; 916 | int32_t temp; 917 | 918 | while (*format) { 919 | switch (*format) { 920 | case 'L': 921 | temp = cp [0] + ((int32_t) cp [1] << 8) + ((int32_t) cp [2] << 16) + ((int32_t) cp [3] << 24); 922 | * (int32_t *) cp = temp; 923 | cp += 4; 924 | break; 925 | 926 | case 'S': 927 | temp = cp [0] + (cp [1] << 8); 928 | * (short *) cp = (short) temp; 929 | cp += 2; 930 | break; 931 | 932 | default: 933 | if (isdigit ((unsigned char) *format)) 934 | cp += *format - '0'; 935 | 936 | break; 937 | } 938 | 939 | format++; 940 | } 941 | } 942 | 943 | static void native_to_little_endian (void *data, char *format) 944 | { 945 | unsigned char *cp = (unsigned char *) data; 946 | int32_t temp; 947 | 948 | while (*format) { 949 | switch (*format) { 950 | case 'L': 951 | temp = * (int32_t *) cp; 952 | *cp++ = (unsigned char) temp; 953 | *cp++ = (unsigned char) (temp >> 8); 954 | *cp++ = (unsigned char) (temp >> 16); 955 | *cp++ = (unsigned char) (temp >> 24); 956 | break; 957 | 958 | case 'S': 959 | temp = * (short *) cp; 960 | *cp++ = (unsigned char) temp; 961 | *cp++ = (unsigned char) (temp >> 8); 962 | break; 963 | 964 | default: 965 | if (isdigit ((unsigned char) *format)) 966 | cp += *format - '0'; 967 | 968 | break; 969 | } 970 | 971 | format++; 972 | } 973 | } 974 | #endif 975 | 976 | -------------------------------------------------------------------------------- /adpcm-lib.c: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////// 2 | // **** ADPCM-XQ **** // 3 | // Xtreme Quality ADPCM Encoder/Decoder // 4 | // Copyright (c) 2024 David Bryant. // 5 | // All Rights Reserved. // 6 | // Distributed under the BSD Software License (see license.txt) // 7 | //////////////////////////////////////////////////////////////////////////// 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "adpcm-lib.h" 15 | 16 | /* This module encodes and decodes ADPCM (DVI/IMA varient). It handles standard 4-bit 17 | * ADPCM data (where each code is a "nibble") and also the less-supported 2-bit, 3-bit, 18 | * and 5-bit varients. ADPCM audio is divided into independently decodable blocks that 19 | * can be relatively small. The most common configuration is to store 505 4-bit samples 20 | * into a 256 byte block, although other sizes are permitted as long as the number of 21 | * samples is one greater than a multiple of 8 (for 4-bit codes). When multiple 22 | * channels are present, they are interleaved in the data with a 4-byte interval, even 23 | * for code sizes that don't evenly divide into 32 bits (which seems a little weird at 24 | * first, but is actually kinda cool). 25 | * 26 | * Thanks to Jon Olick for the idea of limiting the ply search to only those values 27 | * that are reasonably likely to provide a benefit: 28 | * 29 | * https://www.jonolick.com/home/introducing-a-single-file-pcmadpcm-wav-file-writer 30 | */ 31 | 32 | /************************************ ADPCM encoder ***********************************/ 33 | 34 | typedef uint64_t rms_error_t; // best if "double" or "uint64_t", "float" okay in a pinch 35 | #define MAX_RMS_ERROR UINT64_MAX 36 | // typedef double rms_error_t; // best if "double" or "uint64_t", "float" okay in a pinch 37 | // #define MAX_RMS_ERROR DBL_MAX 38 | 39 | #define CLIP(data, min, max) \ 40 | if ((data) > (max)) data = max; \ 41 | else if ((data) < (min)) data = min; 42 | 43 | // Given the code size in bits (e.g., 2 - 5), these macros convert from ADPCM "nibble" 44 | // values (0 - 2^n-1) to and from the equivalent deltas (+/- 2^(n-1), no zero) 45 | #define NIBBLE_TO_DELTA(b,n) ((n)<(1<<((b)-1))?(n)+1:(1<<((b)-1))-1-(n)) 46 | #define DELTA_TO_NIBBLE(b,d) ((d)<0?(1<<((b)-1))-1-(d):(d)-1) 47 | 48 | #define NOISE_SHAPING_ENABLED (NOISE_SHAPING_DYNAMIC | NOISE_SHAPING_STATIC) 49 | 50 | /* step table */ 51 | static const uint16_t step_table[89] = { 52 | 7, 8, 9, 10, 11, 12, 13, 14, 53 | 16, 17, 19, 21, 23, 25, 28, 31, 54 | 34, 37, 41, 45, 50, 55, 60, 66, 55 | 73, 80, 88, 97, 107, 118, 130, 143, 56 | 157, 173, 190, 209, 230, 253, 279, 307, 57 | 337, 371, 408, 449, 494, 544, 598, 658, 58 | 724, 796, 876, 963, 1060, 1166, 1282, 1411, 59 | 1552, 1707, 1878, 2066, 2272, 2499, 2749, 3024, 60 | 3327, 3660, 4026, 4428, 4871, 5358, 5894, 6484, 61 | 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899, 62 | 15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 63 | 32767 64 | }; 65 | 66 | /* step index tables */ 67 | static const int index_table[] = { 68 | /* adpcm data size is 4 */ 69 | -1, -1, -1, -1, 2, 4, 6, 8 70 | }; 71 | 72 | static const int index_table_3bit[] = { 73 | /* adpcm data size is 3 */ 74 | -1, -1, 1, 2 75 | }; 76 | 77 | static const int index_table_5bit[] = { 78 | /* adpcm data size is 5 */ 79 | -1, -1, -1, -1, -1, -1, -1, -1, 1, 2, 4, 6, 8, 10, 13, 16 80 | }; 81 | 82 | struct adpcm_channel { 83 | int32_t pcmdata; // current PCM value 84 | int32_t shaping_weight, error; // for noise shaping 85 | int8_t index; // current index into step size table 86 | }; 87 | 88 | struct adpcm_context { 89 | struct adpcm_channel channels [2]; 90 | int num_channels, sample_rate, config_flags; 91 | int16_t *dynamic_shaping_array, last_shaping_weight; 92 | int static_shaping_weight; 93 | }; 94 | 95 | /* With the addition of 3-bit and 5-bit ADPCM formats and various alignment requirements, 96 | * it's become rather complicated to convert between sample counts and block sizes and 97 | * make sure the alignment is always correct. Therefore I have put dedicated functions 98 | * for this in here and removed the functionality from the command-line program. 99 | * 100 | * The first two function simply convert back and forth between sample counts and 101 | * block sizes (including the header). Note that these functions ignore the alignment 102 | * requirement that the 3-bit and 5-bit formats must exactly fill the block because 103 | * this requirement is really not neccessary and some programs ignore it (e.g., Adobe 104 | * Audition), so it's good to be able to correctly _decode_ such files (but probably 105 | * not a great idea to _create_ them). 106 | */ 107 | 108 | int adpcm_sample_count_to_block_size (int sample_count, int num_chans, int bps) 109 | { 110 | return ((sample_count - 1) * bps + 31) / 32 * num_chans * 4 + (num_chans * 4); 111 | } 112 | 113 | int adpcm_block_size_to_sample_count (int block_size, int num_chans, int bps) 114 | { 115 | return (block_size - num_chans * 4) / num_chans * 8 / bps + 1; 116 | } 117 | 118 | /* Convert an ADPCM block size (including header) to a (possibly) modified size that 119 | * is exactly bit-filled given the channel count and sample size (from 2 - 5 bits). 120 | * The round_up arg controls whether we round up or down to the next aligned value. 121 | * Rounding up ensures that the new block size will still hold at least as many 122 | * samples as the old block size. Even though this particular alignment requirement 123 | * is not really required (the spec is ambiguous) and some programs ignore it, both 124 | * FFmpeg (VLC) and Rockbox generate glitches when playing files that don't adhere, 125 | * so this function is provided to enforce it. 126 | */ 127 | 128 | int adpcm_align_block_size (int block_size, int num_chans, int bps, int round_up) 129 | { 130 | int sample_count = adpcm_block_size_to_sample_count (block_size, num_chans, bps) - 1; 131 | int sample_align = (bps & 1) ? 32 : 32 / bps; 132 | 133 | sample_count = (sample_count + (sample_align - 1) * round_up) / sample_align * sample_align; 134 | return adpcm_sample_count_to_block_size (sample_count + 1, num_chans, bps); 135 | } 136 | 137 | /* Create ADPCM encoder context with given number of channels. 138 | * The returned pointer is used for subsequent calls. Note that 139 | * even though an ADPCM encoder could be set up to encode frames 140 | * independently, we use a context so that we can use previous 141 | * data to improve quality, mostly with respect to noise-shaping 142 | * but also for the step table index at low search depths. 143 | */ 144 | 145 | void *adpcm_create_context (int num_channels, int sample_rate, int lookahead, int noise_shaping) 146 | { 147 | struct adpcm_context *pcnxt = malloc (sizeof (struct adpcm_context)); 148 | int ch; 149 | 150 | memset (pcnxt, 0, sizeof (struct adpcm_context)); 151 | pcnxt->config_flags = noise_shaping | lookahead; 152 | pcnxt->static_shaping_weight = 1024; 153 | pcnxt->num_channels = num_channels; 154 | pcnxt->sample_rate = sample_rate; 155 | 156 | // we set the indicies to invalid values so that we always recalculate them 157 | // on at least the first frame (and every frame if the depth is sufficient) 158 | 159 | for (ch = 0; ch < num_channels; ++ch) 160 | pcnxt->channels [ch].index = -1; 161 | 162 | return pcnxt; 163 | } 164 | 165 | /* Set the shaping weight in range: -1.0 > weight >= 1.0. 166 | * Note that previously this was fixed to pure first-order (i.e., 1.0). 167 | * Also, values very close to -1.0 are not recommended because 168 | * of the high DC gain. 169 | */ 170 | 171 | void adpcm_set_shaping_weight (void *p, double shaping_weight) 172 | { 173 | struct adpcm_context *pcnxt = (struct adpcm_context *) p; 174 | 175 | pcnxt->static_shaping_weight = (int) floor (shaping_weight * 1024.0 + 0.5); 176 | 177 | if (pcnxt->static_shaping_weight > 1024) pcnxt->static_shaping_weight = 1024; 178 | if (pcnxt->static_shaping_weight < -1023) pcnxt->static_shaping_weight = -1023; 179 | } 180 | 181 | /* Free the ADPCM encoder context. 182 | */ 183 | 184 | void adpcm_free_context (void *p) 185 | { 186 | struct adpcm_context *pcnxt = (struct adpcm_context *) p; 187 | 188 | free (pcnxt); 189 | } 190 | 191 | /* Apply noise-shaping to the supplied sample value using the shaping_weight 192 | * and accumulated error term stored in the adpcm_channel structure. Note that 193 | * the error term in the structure is updated, but won't be "correct" until the 194 | * final re-quantized sample value is added to it (and of course we don't know 195 | * that value yet). 196 | */ 197 | 198 | static inline int32_t noise_shape (struct adpcm_channel *pchan, int32_t sample) 199 | { 200 | int32_t temp = -((pchan->shaping_weight * pchan->error + 512) >> 10); 201 | 202 | if (pchan->shaping_weight < 0 && temp) { 203 | if (temp == pchan->error) 204 | temp = (temp < 0) ? temp + 1 : temp - 1; 205 | 206 | pchan->error = -sample; 207 | sample += temp; 208 | } 209 | else 210 | pchan->error = -(sample += temp); 211 | 212 | return sample; 213 | } 214 | 215 | /* These recursive functions are the core of the "lookahead" feature of the library. 216 | * They determine the best ADPCM code for the given audio (optionally returned in 217 | * *best_nibble) and also return the minimum RMS error that that code will generate 218 | * for the specified depth of the future audio. For speed, there are separate 219 | * versions for each code size (e.g., 2bit to 5bit). 220 | * 221 | * Parameters: 222 | * pchan pointer to the encoding status for the channel to encode 223 | * nch number of channels (just used to correctly stride sample array) 224 | * csample current sample to encode (may be modified by noise shaping) 225 | * psample pointer to samples for lookahead (enough for lookahead depth) 226 | * flags depth of search (in lower bits) plus some other control bits 227 | * best_nibble optional pointer for return of best nibble for current sample 228 | * max_error maximum allowed error (used to eliminate pointless branches) 229 | * 230 | * Returns RMS total error for the specified depth 231 | */ 232 | 233 | static rms_error_t min_error_4bit (const struct adpcm_channel *pchan, int nch, int32_t csample, const int16_t *psample, int flags, int *best_nibble, rms_error_t max_error) 234 | { 235 | int32_t delta = csample - pchan->pcmdata, csample2; 236 | struct adpcm_channel chan = *pchan; 237 | uint16_t step = step_table[chan.index]; 238 | uint16_t trial_delta = (step >> 3); 239 | int nibble, testnbl; 240 | rms_error_t min_error; 241 | 242 | // this odd-looking code always generates the nibble value with the least error, 243 | // regardless of step size (which was not true previously) 244 | 245 | if (delta < 0) { 246 | int mag = ((-delta << 2) + (step & 3) + ((step & 1) << 1)) / step; 247 | nibble = 0x8 | (mag > 7 ? 7 : mag); 248 | } 249 | else { 250 | int mag = ((delta << 2) + (step & 3) + ((step & 1) << 1)) / step; 251 | nibble = mag > 7 ? 7 : mag; 252 | } 253 | 254 | if (nibble & 1) trial_delta += (step >> 2); 255 | if (nibble & 2) trial_delta += (step >> 1); 256 | if (nibble & 4) trial_delta += step; 257 | 258 | if (nibble & 8) 259 | chan.pcmdata -= trial_delta; 260 | else 261 | chan.pcmdata += trial_delta; 262 | 263 | CLIP(chan.pcmdata, -32768, 32767); 264 | if (best_nibble) *best_nibble = nibble; 265 | min_error = (rms_error_t) (chan.pcmdata - csample) * (chan.pcmdata - csample); 266 | 267 | // if we're at a leaf, or we're not at a leaf but have already exceeded the error limit, return 268 | if (!(flags & LOOKAHEAD_DEPTH) || min_error >= max_error) 269 | return min_error; 270 | 271 | // otherwise we execute that naively closest nibble and search deeper for improvement 272 | 273 | chan.index += index_table[nibble & 0x07]; 274 | CLIP(chan.index, 0, 88); 275 | 276 | if (flags & NOISE_SHAPING_ENABLED) { 277 | chan.error += chan.pcmdata; 278 | csample2 = noise_shape (&chan, psample [nch]); 279 | } 280 | else 281 | csample2 = psample [nch]; 282 | 283 | min_error += min_error_4bit (&chan, nch, csample2, psample + nch, flags - 1, NULL, max_error - min_error); 284 | 285 | // min_error is the error (from here to the leaf) for the naively closest nibble. 286 | // Unless we've been told not to try, we may be able to improve on that by choosing 287 | // an alternative (not closest) nibble. 288 | 289 | if (flags & LOOKAHEAD_NO_BRANCHING) 290 | return min_error; 291 | 292 | for (testnbl = 0; testnbl <= 0xF; ++testnbl) { 293 | rms_error_t error, threshold; 294 | 295 | if (testnbl == nibble) // don't do the same value again 296 | continue; 297 | 298 | // we execute this branch if: 299 | // 1. we're doing an exhaustive search, or 300 | // 2. the test value is one of the maximum values (i.e., 0x7 or 0xf), or 301 | // 3. the test value's delta is within three of the initial estimate's delta 302 | 303 | if (flags & LOOKAHEAD_EXHAUSTIVE || !(~testnbl & 0x7) || abs (NIBBLE_TO_DELTA (4,nibble) - NIBBLE_TO_DELTA (4,testnbl)) <= 3) { 304 | trial_delta = (step >> 3); 305 | chan = *pchan; 306 | 307 | if (testnbl & 1) trial_delta += (step >> 2); 308 | if (testnbl & 2) trial_delta += (step >> 1); 309 | if (testnbl & 4) trial_delta += step; 310 | 311 | if (testnbl & 8) 312 | chan.pcmdata -= trial_delta; 313 | else 314 | chan.pcmdata += trial_delta; 315 | 316 | CLIP(chan.pcmdata, -32768, 32767); 317 | 318 | error = (rms_error_t) (chan.pcmdata - csample) * (chan.pcmdata - csample); 319 | threshold = max_error < min_error ? max_error : min_error; 320 | 321 | if (error < threshold) { 322 | chan.index += index_table[testnbl & 0x07]; 323 | CLIP(chan.index, 0, 88); 324 | 325 | if (flags & NOISE_SHAPING_ENABLED) { 326 | chan.error += chan.pcmdata; 327 | csample2 = noise_shape (&chan, psample [nch]); 328 | } 329 | else 330 | csample2 = psample [nch]; 331 | 332 | error += min_error_4bit (&chan, nch, csample2, psample + nch, flags - 1, NULL, threshold - error); 333 | 334 | if (error < min_error) { 335 | if (best_nibble) *best_nibble = testnbl; 336 | min_error = error; 337 | } 338 | } 339 | } 340 | } 341 | 342 | return min_error; 343 | } 344 | 345 | static rms_error_t min_error_2bit (const struct adpcm_channel *pchan, int nch, int32_t csample, const int16_t *psample, int flags, int *best_nibble, rms_error_t max_error) 346 | { 347 | int32_t delta = csample - pchan->pcmdata, csample2; 348 | struct adpcm_channel chan = *pchan; 349 | uint16_t step = step_table[chan.index]; 350 | int nibble, testnbl; 351 | rms_error_t min_error; 352 | 353 | if (delta < 0) { 354 | if (-delta >= step) { 355 | chan.pcmdata -= step + (step >> 1); 356 | nibble = 3; 357 | } 358 | else { 359 | chan.pcmdata -= step >> 1; 360 | nibble = 2; 361 | } 362 | } 363 | else 364 | chan.pcmdata += step * ((nibble = delta >= step)) + (step >> 1); 365 | 366 | CLIP(chan.pcmdata, -32768, 32767); 367 | if (best_nibble) *best_nibble = nibble; 368 | min_error = (rms_error_t) (chan.pcmdata - csample) * (chan.pcmdata - csample); 369 | 370 | // if we're at a leaf, or we're not at a leaf but have already exceeded the error limit, return 371 | if (!(flags & LOOKAHEAD_DEPTH) || min_error >= max_error) 372 | return min_error; 373 | 374 | // otherwise we execute that naively closest nibble and search deeper for improvement 375 | 376 | chan.index += (nibble & 1) * 3 - 1; 377 | CLIP(chan.index, 0, 88); 378 | 379 | if (flags & NOISE_SHAPING_ENABLED) { 380 | chan.error += chan.pcmdata; 381 | csample2 = noise_shape (&chan, psample [nch]); 382 | } 383 | else 384 | csample2 = psample [nch]; 385 | 386 | min_error += min_error_2bit (&chan, nch, csample2, psample + nch, flags - 1, NULL, max_error - min_error); 387 | 388 | // min_error is the error (from here to the leaf) for the naively closest nibble. 389 | // Unless we've been told not to try, we may be able to improve on that by choosing 390 | // an alternative (not closest) nibble. 391 | 392 | if (flags & LOOKAHEAD_NO_BRANCHING) 393 | return min_error; 394 | 395 | for (testnbl = 0; testnbl <= 0x3; ++testnbl) { 396 | rms_error_t error, threshold; 397 | 398 | if (testnbl == nibble) // don't do the same value again 399 | continue; 400 | 401 | chan = *pchan; 402 | 403 | if (testnbl & 2) 404 | chan.pcmdata -= step * (testnbl & 1) + (step >> 1); 405 | else 406 | chan.pcmdata += step * (testnbl & 1) + (step >> 1); 407 | 408 | CLIP(chan.pcmdata, -32768, 32767); 409 | 410 | error = (rms_error_t) (chan.pcmdata - csample) * (chan.pcmdata - csample); 411 | threshold = max_error < min_error ? max_error : min_error; 412 | 413 | if (error < threshold) { 414 | chan.index += (testnbl & 1) * 3 - 1; 415 | CLIP(chan.index, 0, 88); 416 | 417 | if (flags & NOISE_SHAPING_ENABLED) { 418 | chan.error += chan.pcmdata; 419 | csample2 = noise_shape (&chan, psample [nch]); 420 | } 421 | else 422 | csample2 = psample [nch]; 423 | 424 | error += min_error_2bit (&chan, nch, csample2, psample + nch, flags - 1, NULL, threshold - error); 425 | 426 | if (error < min_error) { 427 | if (best_nibble) *best_nibble = testnbl; 428 | min_error = error; 429 | } 430 | } 431 | } 432 | 433 | return min_error; 434 | } 435 | 436 | static rms_error_t min_error_3bit (const struct adpcm_channel *pchan, int nch, int32_t csample, const int16_t *psample, int flags, int *best_nibble, rms_error_t max_error) 437 | { 438 | int32_t delta = csample - pchan->pcmdata, csample2; 439 | struct adpcm_channel chan = *pchan; 440 | uint16_t step = step_table[chan.index]; 441 | uint16_t trial_delta = (step >> 2); 442 | int nibble, testnbl; 443 | rms_error_t min_error; 444 | 445 | if (delta < 0) { 446 | int mag = ((-delta << 1) + (step & 1)) / step; 447 | nibble = 0x4 | (mag > 3 ? 3 : mag); 448 | } 449 | else { 450 | int mag = ((delta << 1) + (step & 1)) / step; 451 | nibble = mag > 3 ? 3 : mag; 452 | } 453 | 454 | if (nibble & 1) trial_delta += (step >> 1); 455 | if (nibble & 2) trial_delta += step; 456 | 457 | if (nibble & 4) 458 | chan.pcmdata -= trial_delta; 459 | else 460 | chan.pcmdata += trial_delta; 461 | 462 | CLIP(chan.pcmdata, -32768, 32767); 463 | if (best_nibble) *best_nibble = nibble; 464 | min_error = (rms_error_t) (chan.pcmdata - csample) * (chan.pcmdata - csample); 465 | 466 | // if we're at a leaf, or we're not at a leaf but have already exceeded the error limit, return 467 | if (!(flags & LOOKAHEAD_DEPTH) || min_error >= max_error) 468 | return min_error; 469 | 470 | // otherwise we execute that naively closest nibble and search deeper for improvement 471 | 472 | chan.index += index_table_3bit[nibble & 0x03]; 473 | CLIP(chan.index, 0, 88); 474 | 475 | if (flags & NOISE_SHAPING_ENABLED) { 476 | chan.error += chan.pcmdata; 477 | csample2 = noise_shape (&chan, psample [nch]); 478 | } 479 | else 480 | csample2 = psample [nch]; 481 | 482 | min_error += min_error_3bit (&chan, nch, csample2, psample + nch, flags - 1, NULL, max_error - min_error); 483 | 484 | // min_error is the error (from here to the leaf) for the naively closest nibble. 485 | // Unless we've been told not to try, we may be able to improve on that by choosing 486 | // an alternative (not closest) nibble. 487 | 488 | if (flags & LOOKAHEAD_NO_BRANCHING) 489 | return min_error; 490 | 491 | for (testnbl = 0; testnbl <= 0x7; ++testnbl) { 492 | rms_error_t error, threshold; 493 | 494 | if (testnbl == nibble) // don't do the same value again 495 | continue; 496 | 497 | // we execute this branch if: 498 | // 1. we're doing an exhaustive search, or 499 | // 2. the test value is one of the maximum values (i.e., 0x3 or 0x7), or 500 | // 3. the test value's delta is within two of the initial estimate's delta 501 | 502 | if (flags & LOOKAHEAD_EXHAUSTIVE || !(~testnbl & 0x3) || abs (NIBBLE_TO_DELTA (3,nibble) - NIBBLE_TO_DELTA (3,testnbl)) <= 2) { 503 | trial_delta = (step >> 2); 504 | chan = *pchan; 505 | 506 | if (testnbl & 1) trial_delta += (step >> 1); 507 | if (testnbl & 2) trial_delta += step; 508 | 509 | if (testnbl & 4) 510 | chan.pcmdata -= trial_delta; 511 | else 512 | chan.pcmdata += trial_delta; 513 | 514 | CLIP(chan.pcmdata, -32768, 32767); 515 | error = (rms_error_t) (chan.pcmdata - csample) * (chan.pcmdata - csample); 516 | threshold = max_error < min_error ? max_error : min_error; 517 | 518 | if (error < threshold) { 519 | chan.index += index_table_3bit[testnbl & 0x03]; 520 | CLIP(chan.index, 0, 88); 521 | 522 | if (flags & NOISE_SHAPING_ENABLED) { 523 | chan.error += chan.pcmdata; 524 | csample2 = noise_shape (&chan, psample [nch]); 525 | } 526 | else 527 | csample2 = psample [nch]; 528 | 529 | error += min_error_3bit (&chan, nch, csample2, psample + nch, flags - 1, NULL, threshold - error); 530 | 531 | if (error < min_error) { 532 | if (best_nibble) *best_nibble = testnbl; 533 | min_error = error; 534 | } 535 | } 536 | } 537 | } 538 | 539 | return min_error; 540 | } 541 | 542 | static rms_error_t min_error_5bit (const struct adpcm_channel *pchan, int nch, int32_t csample, const int16_t *psample, int flags, int *best_nibble, rms_error_t max_error) 543 | { 544 | static char comp_table [16] = { 0, 0, 0, 5, 0, 6, 4, 10, 0, 7, 6, 10, 4, 11, 11, 13 }; 545 | int32_t delta = csample - pchan->pcmdata, csample2; 546 | struct adpcm_channel chan = *pchan; 547 | uint16_t step = step_table[chan.index]; 548 | uint16_t trial_delta = (step >> 4); 549 | int nibble, testnbl; 550 | rms_error_t min_error; 551 | 552 | if (delta < 0) { 553 | int mag = ((-delta << 3) + comp_table [step & 0xf]) / step; 554 | nibble = 0x10 | (mag > 0xf ? 0xf : mag); 555 | } 556 | else { 557 | int mag = ((delta << 3) + comp_table [step & 0xf]) / step; 558 | nibble = mag > 0xf ? 0xf : mag; 559 | } 560 | 561 | if (nibble & 1) trial_delta += (step >> 3); 562 | if (nibble & 2) trial_delta += (step >> 2); 563 | if (nibble & 4) trial_delta += (step >> 1); 564 | if (nibble & 8) trial_delta += step; 565 | 566 | if (nibble & 0x10) 567 | chan.pcmdata -= trial_delta; 568 | else 569 | chan.pcmdata += trial_delta; 570 | 571 | CLIP(chan.pcmdata, -32768, 32767); 572 | if (best_nibble) *best_nibble = nibble; 573 | min_error = (rms_error_t) (chan.pcmdata - csample) * (chan.pcmdata - csample); 574 | 575 | // if we're at a leaf, or we're not at a leaf but have already exceeded the error limit, return 576 | if (!(flags & LOOKAHEAD_DEPTH) || min_error >= max_error) 577 | return min_error; 578 | 579 | // otherwise we execute that naively closest nibble and search deeper for improvement 580 | 581 | chan.index += index_table_5bit[nibble & 0x0f]; 582 | CLIP(chan.index, 0, 88); 583 | 584 | if (flags & NOISE_SHAPING_ENABLED) { 585 | chan.error += chan.pcmdata; 586 | csample2 = noise_shape (&chan, psample [nch]); 587 | } 588 | else 589 | csample2 = psample [nch]; 590 | 591 | min_error += min_error_5bit (&chan, nch, csample2, psample + nch, flags - 1, NULL, max_error - min_error); 592 | 593 | // min_error is the error (from here to the leaf) for the naively closest nibble. 594 | // Unless we've been told not to try, we may be able to improve on that by choosing 595 | // an alternative (not closest) nibble. 596 | 597 | if (flags & LOOKAHEAD_NO_BRANCHING) 598 | return min_error; 599 | 600 | for (testnbl = 0; testnbl <= 0x1F; ++testnbl) { 601 | rms_error_t error, threshold; 602 | 603 | if (testnbl == nibble) // don't do the same value again 604 | continue; 605 | 606 | // we execute this trial if: 607 | // 1. we're doing an exhaustive search, or 608 | // 2. the trial value is one of the four maximum values for the sign, or 609 | // 3. the test value's delta is within three of the initial estimate's delta 610 | 611 | if (flags & LOOKAHEAD_EXHAUSTIVE || (testnbl | 3) == (nibble | 0xf) || abs (NIBBLE_TO_DELTA (5,nibble) - NIBBLE_TO_DELTA (5,testnbl)) <= 3) { 612 | trial_delta = (step >> 4); 613 | chan = *pchan; 614 | 615 | if (testnbl & 1) trial_delta += (step >> 3); 616 | if (testnbl & 2) trial_delta += (step >> 2); 617 | if (testnbl & 4) trial_delta += (step >> 1); 618 | if (testnbl & 8) trial_delta += step; 619 | 620 | if (testnbl & 0x10) 621 | chan.pcmdata -= trial_delta; 622 | else 623 | chan.pcmdata += trial_delta; 624 | 625 | CLIP(chan.pcmdata, -32768, 32767); 626 | 627 | error = (rms_error_t) (chan.pcmdata - csample) * (chan.pcmdata - csample); 628 | threshold = max_error < min_error ? max_error : min_error; 629 | 630 | if (error < threshold) { 631 | chan.index += index_table_5bit [testnbl & 0x0f]; 632 | CLIP(chan.index, 0, 88); 633 | 634 | if (flags & NOISE_SHAPING_ENABLED) { 635 | chan.error += chan.pcmdata; 636 | csample2 = noise_shape (&chan, psample [nch]); 637 | } 638 | else 639 | csample2 = psample [nch]; 640 | 641 | error += min_error_5bit (&chan, nch, csample2, psample + nch, flags - 1, NULL, threshold - error); 642 | 643 | if (error < min_error) { 644 | if (best_nibble) *best_nibble = testnbl; 645 | min_error = error; 646 | } 647 | } 648 | } 649 | } 650 | 651 | return min_error; 652 | } 653 | 654 | static uint8_t encode_sample (struct adpcm_context *pcnxt, int ch, int bps, const int16_t *psample, int num_samples) 655 | { 656 | struct adpcm_channel *pchan = pcnxt->channels + ch; 657 | uint16_t step = step_table[pchan->index]; 658 | int flags = pcnxt->config_flags, nibble; 659 | int32_t csample = *psample; 660 | uint16_t trial_delta; 661 | 662 | if (flags & NOISE_SHAPING_ENABLED) 663 | csample = noise_shape (pchan, csample); 664 | 665 | if ((flags & LOOKAHEAD_DEPTH) > num_samples - 1) 666 | flags = (flags & ~LOOKAHEAD_DEPTH) + num_samples - 1; 667 | 668 | if (bps == 2) { 669 | min_error_2bit (pchan, pcnxt->num_channels, csample, psample, flags, &nibble, MAX_RMS_ERROR); 670 | 671 | if (nibble & 2) 672 | pchan->pcmdata -= step * (nibble & 1) + (step >> 1); 673 | else 674 | pchan->pcmdata += step * (nibble & 1) + (step >> 1); 675 | 676 | pchan->index += (nibble & 1) * 3 - 1; 677 | } 678 | else if (bps == 3) { 679 | min_error_3bit (pchan, pcnxt->num_channels, csample, psample, flags, &nibble, MAX_RMS_ERROR); 680 | trial_delta = (step >> 2); 681 | if (nibble & 1) trial_delta += (step >> 1); 682 | if (nibble & 2) trial_delta += step; 683 | 684 | if (nibble & 4) 685 | pchan->pcmdata -= trial_delta; 686 | else 687 | pchan->pcmdata += trial_delta; 688 | 689 | pchan->index += index_table_3bit[nibble & 0x03]; 690 | } 691 | else if (bps == 4) { 692 | min_error_4bit (pchan, pcnxt->num_channels, csample, psample, flags, &nibble, MAX_RMS_ERROR); 693 | trial_delta = (step >> 3); 694 | if (nibble & 1) trial_delta += (step >> 2); 695 | if (nibble & 2) trial_delta += (step >> 1); 696 | if (nibble & 4) trial_delta += step; 697 | 698 | if (nibble & 8) 699 | pchan->pcmdata -= trial_delta; 700 | else 701 | pchan->pcmdata += trial_delta; 702 | 703 | pchan->index += index_table[nibble & 0x07]; 704 | } 705 | else { // bps == 5 706 | min_error_5bit (pchan, pcnxt->num_channels, csample, psample, flags, &nibble, MAX_RMS_ERROR); 707 | trial_delta = (step >> 4); 708 | if (nibble & 1) trial_delta += (step >> 3); 709 | if (nibble & 2) trial_delta += (step >> 2); 710 | if (nibble & 4) trial_delta += (step >> 1); 711 | if (nibble & 8) trial_delta += step; 712 | 713 | if (nibble & 0x10) 714 | pchan->pcmdata -= trial_delta; 715 | else 716 | pchan->pcmdata += trial_delta; 717 | 718 | pchan->index += index_table_5bit[nibble & 0x0f]; 719 | } 720 | 721 | CLIP(pchan->index, 0, 88); 722 | CLIP(pchan->pcmdata, -32768, 32767); 723 | 724 | if (flags & NOISE_SHAPING_ENABLED) 725 | pchan->error += pchan->pcmdata; 726 | 727 | return nibble; 728 | } 729 | 730 | static void encode_chunks (struct adpcm_context *pcnxt, uint8_t *outbuf, size_t *outbufsize, const int16_t *inbuf, int inbufcount, int bps) 731 | { 732 | const int16_t *pcmbuf; 733 | int ch; 734 | 735 | for (ch = 0; ch < pcnxt->num_channels; ++ch) { 736 | int shiftbits = 0, numbits = 0, i, j; 737 | 738 | if (pcnxt->config_flags & NOISE_SHAPING_STATIC) 739 | pcnxt->channels [ch].shaping_weight = pcnxt->static_shaping_weight; 740 | 741 | pcmbuf = inbuf + ch; 742 | 743 | for (j = i = 0; i < inbufcount; ++i) { 744 | if (pcnxt->config_flags & NOISE_SHAPING_DYNAMIC) 745 | pcnxt->channels [ch].shaping_weight = pcnxt->dynamic_shaping_array [i]; 746 | 747 | shiftbits |= encode_sample (pcnxt, ch, bps, pcmbuf, inbufcount - i) << numbits; 748 | pcmbuf += pcnxt->num_channels; 749 | 750 | if ((numbits += bps) >= 8) { 751 | outbuf [(j & ~3) * pcnxt->num_channels + (ch * 4) + (j & 3)] = shiftbits; 752 | shiftbits >>= 8; 753 | numbits -= 8; 754 | j++; 755 | } 756 | } 757 | 758 | if (numbits) 759 | outbuf [(j & ~3) * pcnxt->num_channels + (ch * 4) + (j & 3)] = shiftbits; 760 | } 761 | 762 | *outbufsize += (inbufcount * bps + 31) / 32 * pcnxt->num_channels * 4; 763 | } 764 | 765 | /* Encode a block of 16-bit PCM data into N-bit ADPCM. 766 | * 767 | * Parameters: 768 | * p the context returned by adpcm_begin() 769 | * outbuf destination buffer 770 | * outbufsize pointer to variable where the number of bytes written 771 | * will be stored 772 | * inbuf source PCM samples 773 | * inbufcount number of composite PCM samples provided (note: this is 774 | * the total number of 16-bit samples divided by the number 775 | * of channels) 776 | * bps bits per ADPCM sample (2-5) 777 | * 778 | * Returns 1 for success or 0 for error (which is only invalid bit count) 779 | */ 780 | 781 | int adpcm_encode_block_ex (void *p, uint8_t *outbuf, size_t *outbufsize, const int16_t *inbuf, int inbufcount, int bps) 782 | { 783 | struct adpcm_context *pcnxt = (struct adpcm_context *) p; 784 | int ch; 785 | 786 | *outbufsize = 0; 787 | 788 | if (bps < 2 || bps > 5) 789 | return 0; 790 | 791 | if (!inbufcount) 792 | return 1; 793 | 794 | // The first PCM sample is encoded verbatim. In theory, we should apply the noise shaping, 795 | // but we'll actually just apply the error term on the next sample. 796 | 797 | for (ch = 0; ch < pcnxt->num_channels; ch++) 798 | pcnxt->channels[ch].pcmdata = *inbuf++; 799 | 800 | inbufcount--; 801 | 802 | // Use min_error_nbit() to find the optimum initial index if this is the first frame or 803 | // the lookahead depth is at least 3. Below that just using the value leftover from 804 | // the previous frame is better, and of course faster. 805 | 806 | if (inbufcount && (pcnxt->channels [0].index < 0 || (pcnxt->config_flags & LOOKAHEAD_DEPTH) >= 3)) { 807 | int flags = 16 | LOOKAHEAD_NO_BRANCHING; 808 | 809 | if ((flags & LOOKAHEAD_DEPTH) > inbufcount - 1) 810 | flags = (flags & ~LOOKAHEAD_DEPTH) + inbufcount - 1; 811 | 812 | for (ch = 0; ch < pcnxt->num_channels; ch++) { 813 | rms_error_t min_error = MAX_RMS_ERROR; 814 | rms_error_t error_per_index [89]; 815 | int best_index = 0, tindex; 816 | 817 | for (tindex = 0; tindex <= 88; tindex++) { 818 | struct adpcm_channel chan = pcnxt->channels [ch]; 819 | 820 | chan.index = tindex; 821 | chan.shaping_weight = 0; 822 | 823 | if (bps == 2) 824 | error_per_index [tindex] = min_error_2bit (&chan, pcnxt->num_channels, inbuf [ch], inbuf + ch, flags, NULL, MAX_RMS_ERROR); 825 | else if (bps == 3) 826 | error_per_index [tindex] = min_error_3bit (&chan, pcnxt->num_channels, inbuf [ch], inbuf + ch, flags, NULL, MAX_RMS_ERROR); 827 | else if (bps == 5) 828 | error_per_index [tindex] = min_error_5bit (&chan, pcnxt->num_channels, inbuf [ch], inbuf + ch, flags, NULL, MAX_RMS_ERROR); 829 | else 830 | error_per_index [tindex] = min_error_4bit (&chan, pcnxt->num_channels, inbuf [ch], inbuf + ch, flags, NULL, MAX_RMS_ERROR); 831 | } 832 | 833 | // we use a 3-wide average window because the min_error_nbit() results can be noisy 834 | 835 | for (tindex = 0; tindex <= 87; tindex++) { 836 | rms_error_t terror = error_per_index [tindex]; 837 | 838 | if (tindex) 839 | terror = (error_per_index [tindex - 1] + terror + error_per_index [tindex + 1]) / 3; 840 | 841 | if (terror < min_error) { 842 | best_index = tindex; 843 | min_error = terror; 844 | } 845 | } 846 | 847 | pcnxt->channels [ch].index = best_index; 848 | } 849 | } 850 | 851 | // write the block header, which includes the first PCM sample verbatim 852 | 853 | for (ch = 0; ch < pcnxt->num_channels; ch++) { 854 | outbuf[0] = pcnxt->channels[ch].pcmdata; 855 | outbuf[1] = pcnxt->channels[ch].pcmdata >> 8; 856 | outbuf[2] = pcnxt->channels[ch].index; 857 | outbuf[3] = 0; 858 | 859 | outbuf += 4; 860 | *outbufsize += 4; 861 | } 862 | 863 | if (inbufcount && (pcnxt->config_flags & NOISE_SHAPING_DYNAMIC)) { 864 | pcnxt->dynamic_shaping_array = malloc (inbufcount * sizeof (int16_t)); 865 | generate_dns_values (inbuf, inbufcount, pcnxt->num_channels, pcnxt->sample_rate, pcnxt->dynamic_shaping_array, -512, pcnxt->last_shaping_weight); 866 | pcnxt->last_shaping_weight = pcnxt->dynamic_shaping_array [inbufcount - 1]; 867 | } 868 | 869 | // encode the rest of the PCM samples, if any, into 32-bit, possibly interleaved, chunks 870 | 871 | if (inbufcount) 872 | encode_chunks (pcnxt, outbuf, outbufsize, inbuf, inbufcount, bps); 873 | 874 | if (pcnxt->dynamic_shaping_array && (pcnxt->config_flags & NOISE_SHAPING_DYNAMIC)) { 875 | free (pcnxt->dynamic_shaping_array); 876 | pcnxt->dynamic_shaping_array = NULL; 877 | } 878 | 879 | return 1; 880 | } 881 | 882 | /* Encode a block of 16-bit PCM data into 4-bit ADPCM. 883 | * 884 | * Parameters: 885 | * p the context returned by adpcm_begin() 886 | * outbuf destination buffer 887 | * outbufsize pointer to variable where the number of bytes written 888 | * will be stored 889 | * inbuf source PCM samples 890 | * inbufcount number of composite PCM samples provided (note: this is 891 | * the total number of 16-bit samples divided by the number 892 | * of channels) 893 | * 894 | * Returns 1 (for success as there is no error checking) 895 | */ 896 | 897 | int adpcm_encode_block (void *p, uint8_t *outbuf, size_t *outbufsize, const int16_t *inbuf, int inbufcount) 898 | { 899 | return adpcm_encode_block_ex (p, outbuf, outbufsize, inbuf, inbufcount, 4); 900 | } 901 | 902 | /************************************ ADPCM decoder ***********************************/ 903 | 904 | /* Decode the block of 4-bit ADPCM data into PCM. This requires no context because ADPCM 905 | * blocks are independently decodable. This assumes that a single entire block is always 906 | * decoded; it must be called multiple times for multiple blocks and cannot resume in the 907 | * middle of a block. Note that for all other bit depths, use adpcm_decode_block_ex(). 908 | * 909 | * Parameters: 910 | * outbuf destination for interleaved PCM samples 911 | * inbuf source ADPCM block 912 | * inbufsize size of source ADPCM block 913 | * channels number of channels in block (must be determined from other context) 914 | * 915 | * Returns number of converted composite samples (total samples divided by number of channels) 916 | */ 917 | 918 | int adpcm_decode_block (int16_t *outbuf, const uint8_t *inbuf, size_t inbufsize, int channels) 919 | { 920 | int ch, samples = 1, chunks; 921 | int32_t pcmdata[2]; 922 | int8_t index[2]; 923 | 924 | if (inbufsize < (uint32_t) channels * 4) 925 | return 0; 926 | 927 | for (ch = 0; ch < channels; ch++) { 928 | *outbuf++ = pcmdata[ch] = (int16_t) (inbuf [0] | (inbuf [1] << 8)); 929 | index[ch] = inbuf [2]; 930 | 931 | if (index [ch] < 0 || index [ch] > 88 || inbuf [3]) // sanitize the input a little... 932 | return 0; 933 | 934 | inbufsize -= 4; 935 | inbuf += 4; 936 | } 937 | 938 | chunks = inbufsize / (channels * 4); 939 | samples += chunks * 8; 940 | 941 | while (chunks--) { 942 | int ch, i; 943 | 944 | for (ch = 0; ch < channels; ++ch) { 945 | 946 | for (i = 0; i < 4; ++i) { 947 | uint16_t step = step_table [index [ch]], delta = step >> 3; 948 | 949 | if (*inbuf & 1) delta += (step >> 2); 950 | if (*inbuf & 2) delta += (step >> 1); 951 | if (*inbuf & 4) delta += step; 952 | 953 | if (*inbuf & 8) 954 | pcmdata[ch] -= delta; 955 | else 956 | pcmdata[ch] += delta; 957 | 958 | index[ch] += index_table [*inbuf & 0x7]; 959 | CLIP(index[ch], 0, 88); 960 | CLIP(pcmdata[ch], -32768, 32767); 961 | outbuf [i * 2 * channels] = pcmdata[ch]; 962 | 963 | step = step_table [index [ch]]; delta = step >> 3; 964 | 965 | if (*inbuf & 0x10) delta += (step >> 2); 966 | if (*inbuf & 0x20) delta += (step >> 1); 967 | if (*inbuf & 0x40) delta += step; 968 | 969 | if (*inbuf & 0x80) 970 | pcmdata[ch] -= delta; 971 | else 972 | pcmdata[ch] += delta; 973 | 974 | index[ch] += index_table [(*inbuf >> 4) & 0x7]; 975 | CLIP(index[ch], 0, 88); 976 | CLIP(pcmdata[ch], -32768, 32767); 977 | outbuf [(i * 2 + 1) * channels] = pcmdata[ch]; 978 | 979 | inbuf++; 980 | } 981 | 982 | outbuf++; 983 | } 984 | 985 | outbuf += channels * 7; 986 | } 987 | 988 | return samples; 989 | } 990 | 991 | /* Decode the block of ADPCM data, with from 2 to 5 bits per sample, into 16-bit PCM. 992 | * This requires no context because ADPCM blocks are independently decodable. This assumes 993 | * that a single entire block is always decoded; it must be called multiple times for 994 | * multiple blocks and cannot resume in the middle of a block. 995 | * 996 | * Parameters: 997 | * outbuf destination for interleaved PCM samples 998 | * inbuf source ADPCM block 999 | * inbufsize size of source ADPCM block 1000 | * channels number of channels in block (must be determined from other context) 1001 | * bps bits per ADPCM sample (2-5, must be determined from other context) 1002 | * 1003 | * Returns number of converted composite samples (total samples divided by number of channels) 1004 | */ 1005 | 1006 | int adpcm_decode_block_ex (int16_t *outbuf, const uint8_t *inbuf, size_t inbufsize, int channels, int bps) 1007 | { 1008 | int samples = 1, ch; 1009 | int32_t pcmdata[2]; 1010 | int8_t index[2]; 1011 | 1012 | if (bps == 4) 1013 | return adpcm_decode_block (outbuf, inbuf, inbufsize, channels); 1014 | 1015 | if (bps < 2 || bps > 5 || inbufsize < (uint32_t) channels * 4) 1016 | return 0; 1017 | 1018 | for (ch = 0; ch < channels; ch++) { 1019 | *outbuf++ = pcmdata[ch] = (int16_t) (inbuf [0] | (inbuf [1] << 8)); 1020 | index[ch] = inbuf [2]; 1021 | 1022 | if (index [ch] < 0 || index [ch] > 88 || inbuf [3]) // sanitize the input a little... 1023 | return 0; 1024 | 1025 | inbufsize -= 4; 1026 | inbuf += 4; 1027 | } 1028 | 1029 | if (!inbufsize || (inbufsize % (channels * 4))) // extra clean 1030 | return samples; 1031 | 1032 | samples += inbufsize / channels * 8 / bps; 1033 | 1034 | switch (bps) { 1035 | case 2: 1036 | for (ch = 0; ch < channels; ++ch) { 1037 | int shiftbits = 0, numbits = 0, i, j; 1038 | 1039 | for (j = i = 0; i < samples - 1; ++i) { 1040 | uint16_t step = step_table [index [ch]]; 1041 | 1042 | if (numbits < bps) { 1043 | shiftbits |= inbuf [(j & ~3) * channels + (ch * 4) + (j & 3)] << numbits; 1044 | numbits += 8; 1045 | j++; 1046 | } 1047 | 1048 | if (shiftbits & 2) 1049 | pcmdata[ch] -= step * (shiftbits & 1) + (step >> 1); 1050 | else 1051 | pcmdata[ch] += step * (shiftbits & 1) + (step >> 1); 1052 | 1053 | index[ch] += (shiftbits & 1) * 3 - 1; 1054 | shiftbits >>= bps; 1055 | numbits -= bps; 1056 | 1057 | CLIP(index[ch], 0, 88); 1058 | CLIP(pcmdata[ch], -32768, 32767); 1059 | outbuf [i * channels + ch] = pcmdata[ch]; 1060 | } 1061 | } 1062 | 1063 | break; 1064 | 1065 | case 3: 1066 | for (ch = 0; ch < channels; ++ch) { 1067 | int shiftbits = 0, numbits = 0, i, j; 1068 | 1069 | for (j = i = 0; i < samples - 1; ++i) { 1070 | uint16_t step = step_table [index [ch]], delta = step >> 2; 1071 | 1072 | if (numbits < bps) { 1073 | shiftbits |= inbuf [(j & ~3) * channels + (ch * 4) + (j & 3)] << numbits; 1074 | numbits += 8; 1075 | j++; 1076 | } 1077 | 1078 | if (shiftbits & 1) delta += (step >> 1); 1079 | if (shiftbits & 2) delta += step; 1080 | 1081 | if (shiftbits & 4) 1082 | pcmdata[ch] -= delta; 1083 | else 1084 | pcmdata[ch] += delta; 1085 | 1086 | index[ch] += index_table_3bit [shiftbits & 0x3]; 1087 | shiftbits >>= bps; 1088 | numbits -= bps; 1089 | 1090 | CLIP(index[ch], 0, 88); 1091 | CLIP(pcmdata[ch], -32768, 32767); 1092 | outbuf [i * channels + ch] = pcmdata[ch]; 1093 | } 1094 | } 1095 | 1096 | break; 1097 | 1098 | case 5: 1099 | for (ch = 0; ch < channels; ++ch) { 1100 | int shiftbits = 0, numbits = 0, i, j; 1101 | 1102 | for (j = i = 0; i < samples - 1; ++i) { 1103 | uint16_t step = step_table [index [ch]], delta = step >> 4; 1104 | 1105 | if (numbits < bps) { 1106 | shiftbits |= inbuf [(j & ~3) * channels + (ch * 4) + (j & 3)] << numbits; 1107 | numbits += 8; 1108 | j++; 1109 | } 1110 | 1111 | if (shiftbits & 1) delta += (step >> 3); 1112 | if (shiftbits & 2) delta += (step >> 2); 1113 | if (shiftbits & 4) delta += (step >> 1); 1114 | if (shiftbits & 8) delta += step; 1115 | 1116 | if (shiftbits & 0x10) 1117 | pcmdata[ch] -= delta; 1118 | else 1119 | pcmdata[ch] += delta; 1120 | 1121 | index[ch] += index_table_5bit [shiftbits & 0xf]; 1122 | shiftbits >>= bps; 1123 | numbits -= bps; 1124 | 1125 | CLIP(index[ch], 0, 88); 1126 | CLIP(pcmdata[ch], -32768, 32767); 1127 | outbuf [i * channels + ch] = pcmdata[ch]; 1128 | } 1129 | } 1130 | 1131 | break; 1132 | 1133 | default: 1134 | return 0; 1135 | } 1136 | 1137 | return samples; 1138 | } 1139 | --------------------------------------------------------------------------------