├── README.md ├── UNLICENSE ├── eachunk.h ├── io.h ├── samples ├── DS1.M10 ├── fifa2001-mt5.dat ├── fifa2001.dat └── male.utk ├── utk.h ├── utkdecode-bnb.c ├── utkdecode-fifa.c ├── utkdecode.c └── utkencode.c /README.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | EA MicroTalk (also UTalk or UTK) is a linear-predictive speech codec used in 4 | various games by Electronic Arts. The earliest known game to use it is 5 | Beasts & Bumpkins (1997). The codec has a bandwidth of 11.025kHz (sampling rate 6 | 22.05kHz) and frame size of 20ms (432 samples) and only supports mono. It is 7 | typically encoded at 32 kbit/s. 8 | 9 | Docs: http://wiki.niotso.org/UTK 10 | 11 | In this repository, I have created a set of open source (public domain 12 | via the UNLICENSE) MicroTalk decoders/encoders. 13 | 14 | * Use utkdecode to decode Maxis UTK (The Sims Online, SimCity 4). 15 | * Use utkdecode-bnb to decode PT/M10 (Beasts & Bumpkins). 16 | * Use utkdecode-fifa to decode FIFA 2001/2002 (PS2) speech samples. This tool 17 | supports regular MicroTalk and MicroTalk Revision 3 18 | [SCxl files](https://wiki.multimedia.cx/index.php/Electronic_Arts_SCxl).(*) 19 | * Use utkencode to encode Maxis UTK. (This is the simplest container format and 20 | is currently the only one supported for encoding.) 21 | 22 | (*) I wasn't able to find any real-world MicroTalk Rev. 3 samples in any games. 23 | However, you can transcode a FIFA MicroTalk Rev. 2 file to Rev. 3 using 24 | [EA's Sound eXchange tool](https://wiki.multimedia.cx/index.php/Electronic_Arts_Sound_eXchange) 25 | (`sx -mt_blk input.dat -=output.dat`). 26 | 27 | ## Compiling 28 | 29 | ``` 30 | gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkdecode utkdecode.c 31 | gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkdecode-fifa utkdecode-fifa.c 32 | gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkdecode-bnb utkdecode-bnb.c 33 | gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkencode utkencode.c 34 | ``` 35 | 36 | ## How the encoder works 37 | 38 | The encoder for now is very simple. It does LPC analysis using the Levinson 39 | algorithm and transmits the entire excitation signal explicitly. Compression is 40 | achieved by choosing a large fixed codebook gain, such that each excitation 41 | sample has a large (coarse) quantization step size. Error is minimized in the 42 | excitation domain, and the quality is somewhat poor for bitrates below about 43 | 48 kbit/s. 44 | 45 | However, MicroTalk is a multi-pulse codec (it is cheap to code long runs of 46 | zeros in the excitation signal). Hence, a much better design (and indeed the 47 | standard practice for multi-pulse speech codecs) is to search for the positions 48 | and amplitudes of n pulses such that error is minimized in the output domain 49 | (or the perceptually weighted domain). This new encoder is still in the works. -------------------------------------------------------------------------------- /UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /eachunk.h: -------------------------------------------------------------------------------- 1 | typedef struct EAChunk { 2 | uint32_t type; 3 | uint8_t *start; 4 | uint8_t *ptr; 5 | uint8_t *end; 6 | } EAChunk; 7 | 8 | static void chunk_read_bytes(EAChunk *chunk, uint8_t *dest, size_t size) 9 | { 10 | size_t bytes_remaining = chunk->end - chunk->ptr; 11 | 12 | if (bytes_remaining < size) { 13 | fprintf(stderr, "error: unexpected end of chunk\n"); 14 | exit(EXIT_FAILURE); 15 | } 16 | 17 | memcpy(dest, chunk->ptr, size); 18 | chunk->ptr += size; 19 | } 20 | 21 | static uint32_t chunk_read_u32(EAChunk *chunk) 22 | { 23 | uint8_t dest[4]; 24 | chunk_read_bytes(chunk, dest, sizeof(dest)); 25 | return dest[0] | (dest[1] << 8) | (dest[2] << 16) | (dest[3] << 24); 26 | } 27 | 28 | static uint32_t chunk_read_u8(EAChunk *chunk) 29 | { 30 | uint8_t dest; 31 | chunk_read_bytes(chunk, &dest, sizeof(dest)); 32 | return dest; 33 | } 34 | 35 | static uint32_t chunk_read_var_int(EAChunk *chunk) 36 | { 37 | uint8_t dest[4]; 38 | uint8_t size = chunk_read_u8(chunk); 39 | 40 | if (size > 4) { 41 | fprintf(stderr, "error: invalid varint size %u\n", (unsigned)size); 42 | exit(EXIT_FAILURE); 43 | } 44 | 45 | chunk_read_bytes(chunk, dest, size); 46 | 47 | /* read a big-endian integer of variable length */ 48 | switch (size) { 49 | case 1: return dest[0]; 50 | case 2: return (dest[0]<<8) | dest[1]; 51 | case 3: return (dest[0]<<16) | (dest[1] << 8) | dest[2]; 52 | case 4: return (dest[0]<<24) | (dest[1] << 16) | (dest[2] << 8) | dest[3]; 53 | default: return 0; 54 | } 55 | } 56 | 57 | static EAChunk *read_chunk(FILE *fp) 58 | { 59 | uint32_t size; 60 | static EAChunk chunk; 61 | static uint8_t buffer[4096]; 62 | 63 | chunk.type = read_u32(fp); 64 | 65 | size = read_u32(fp); 66 | if (size < 8 || size-8 > sizeof(buffer)) { 67 | fprintf(stderr, "error: invalid chunk size %u\n", (unsigned)size); 68 | exit(EXIT_FAILURE); 69 | } 70 | 71 | size -= 8; 72 | read_bytes(fp, buffer, size); 73 | chunk.start = chunk.ptr = buffer; 74 | chunk.end = buffer+size; 75 | 76 | return &chunk; 77 | } -------------------------------------------------------------------------------- /io.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | static void read_bytes(FILE *fp, uint8_t *dest, size_t size) 8 | { 9 | size_t bytes_copied; 10 | 11 | if (!size) 12 | return; 13 | 14 | bytes_copied = fread(dest, 1, size, fp); 15 | if (bytes_copied < size) { 16 | if (ferror(fp)) 17 | fprintf(stderr, "error: fread failed: %s\n", strerror(errno)); 18 | else 19 | fprintf(stderr, "error: unexpected end of file\n"); 20 | 21 | exit(EXIT_FAILURE); 22 | } 23 | } 24 | 25 | static uint32_t read_u32(FILE *fp) 26 | { 27 | uint8_t dest[4]; 28 | read_bytes(fp, dest, sizeof(dest)); 29 | return dest[0] | (dest[1] << 8) | (dest[2] << 16) | (dest[3] << 24); 30 | } 31 | 32 | static uint16_t read_u16(FILE *fp) 33 | { 34 | uint8_t dest[2]; 35 | read_bytes(fp, dest, sizeof(dest)); 36 | return dest[0] | (dest[1] << 8); 37 | } 38 | 39 | static uint16_t read_u8(FILE *fp) 40 | { 41 | uint8_t dest; 42 | read_bytes(fp, &dest, sizeof(dest)); 43 | return dest; 44 | } 45 | 46 | static void write_bytes(FILE *fp, const uint8_t *dest, size_t size) 47 | { 48 | if (!size) 49 | return; 50 | 51 | if (fwrite(dest, 1, size, fp) != size) { 52 | fprintf(stderr, "error: fwrite failed: %s\n", strerror(errno)); 53 | exit(EXIT_FAILURE); 54 | } 55 | } 56 | 57 | static void write_u32(FILE *fp, uint32_t x) 58 | { 59 | uint8_t dest[4]; 60 | dest[0] = (uint8_t)x; 61 | dest[1] = (uint8_t)(x>>8); 62 | dest[2] = (uint8_t)(x>>16); 63 | dest[3] = (uint8_t)(x>>24); 64 | write_bytes(fp, dest, sizeof(dest)); 65 | } 66 | 67 | static void write_u16(FILE *fp, uint16_t x) 68 | { 69 | uint8_t dest[2]; 70 | dest[0] = (uint8_t)x; 71 | dest[1] = (uint8_t)(x>>8); 72 | write_bytes(fp, dest, sizeof(dest)); 73 | } 74 | 75 | static void write_u8(FILE *fp, uint8_t x) 76 | { 77 | write_bytes(fp, &x, sizeof(x)); 78 | } -------------------------------------------------------------------------------- /samples/DS1.M10: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daddesio/utkencode/e0b13fac7a75fc5ef16851c165d3f31f0fe220b1/samples/DS1.M10 -------------------------------------------------------------------------------- /samples/fifa2001-mt5.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daddesio/utkencode/e0b13fac7a75fc5ef16851c165d3f31f0fe220b1/samples/fifa2001-mt5.dat -------------------------------------------------------------------------------- /samples/fifa2001.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daddesio/utkencode/e0b13fac7a75fc5ef16851c165d3f31f0fe220b1/samples/fifa2001.dat -------------------------------------------------------------------------------- /samples/male.utk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daddesio/utkencode/e0b13fac7a75fc5ef16851c165d3f31f0fe220b1/samples/male.utk -------------------------------------------------------------------------------- /utk.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* Note: This struct assumes a member alignment of 4 bytes. 5 | ** This matters when pitch_lag > 216 on the first subframe of any given frame. */ 6 | typedef struct UTKContext { 7 | FILE *fp; 8 | const uint8_t *ptr, *end; 9 | int parsed_header; 10 | unsigned int bits_value; 11 | int bits_count; 12 | int reduced_bw; 13 | int multipulse_thresh; 14 | float fixed_gains[64]; 15 | float rc[12]; 16 | float synth_history[12]; 17 | float adapt_cb[324]; 18 | float decompressed_frame[432]; 19 | } UTKContext; 20 | 21 | enum { 22 | MDL_NORMAL = 0, 23 | MDL_LARGEPULSE = 1 24 | }; 25 | 26 | static const float utk_rc_table[64] = { 27 | +0.0f, 28 | -.99677598476409912109375f, -.99032700061798095703125f, -.983879029750823974609375f, -.977430999279022216796875f, 29 | -.970982015132904052734375f, -.964533984661102294921875f, -.958085000514984130859375f, -.9516370296478271484375f, 30 | -.930754005908966064453125f, -.904959976673126220703125f, -.879167020320892333984375f, -.853372991085052490234375f, 31 | -.827579021453857421875f, -.801786005496978759765625f, -.775991976261138916015625f, -.75019800662994384765625f, 32 | -.724404990673065185546875f, -.6986110210418701171875f, -.6706349849700927734375f, -.61904799938201904296875f, 33 | -.567460000514984130859375f, -.515873014926910400390625f, -.4642859995365142822265625f, -.4126980006694793701171875f, 34 | -.361110985279083251953125f, -.309523999691009521484375f, -.257937014102935791015625f, -.20634900033473968505859375f, 35 | -.1547619998455047607421875f, -.10317499935626983642578125f, -.05158700048923492431640625f, 36 | +0.0f, 37 | +.05158700048923492431640625f, +.10317499935626983642578125f, +.1547619998455047607421875f, +.20634900033473968505859375f, 38 | +.257937014102935791015625f, +.309523999691009521484375f, +.361110985279083251953125f, +.4126980006694793701171875f, 39 | +.4642859995365142822265625f, +.515873014926910400390625f, +.567460000514984130859375f, +.61904799938201904296875f, 40 | +.6706349849700927734375f, +.6986110210418701171875f, +.724404990673065185546875f, +.75019800662994384765625f, 41 | +.775991976261138916015625f, +.801786005496978759765625f, +.827579021453857421875f, +.853372991085052490234375f, 42 | +.879167020320892333984375f, +.904959976673126220703125f, +.930754005908966064453125f, +.9516370296478271484375f, 43 | +.958085000514984130859375f, +.964533984661102294921875f, +.970982015132904052734375f, +.977430999279022216796875f, 44 | +.983879029750823974609375f, +.99032700061798095703125f, +.99677598476409912109375f 45 | }; 46 | 47 | static const uint8_t utk_codebooks[2][256] = { 48 | { /* normal model */ 49 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 17, 50 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 21, 51 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 18, 52 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 25, 53 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 17, 54 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 22, 55 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 18, 56 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 0, 57 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 17, 58 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 21, 59 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 18, 60 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 26, 61 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 17, 62 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 22, 63 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 18, 64 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 2 65 | }, { /* large-pulse model */ 66 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 23, 67 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 27, 68 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 24, 69 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 1, 70 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 23, 71 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 28, 72 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 24, 73 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 3, 74 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 23, 75 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 27, 76 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 24, 77 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 1, 78 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 23, 79 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 28, 80 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 24, 81 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 3 82 | } 83 | }; 84 | 85 | static const struct { 86 | int next_model; 87 | int code_size; 88 | float pulse_value; 89 | } utk_commands[29] = { 90 | {MDL_LARGEPULSE, 8, 0.0f}, 91 | {MDL_LARGEPULSE, 7, 0.0f}, 92 | {MDL_NORMAL, 8, 0.0f}, 93 | {MDL_NORMAL, 7, 0.0f}, 94 | {MDL_NORMAL, 2, 0.0f}, 95 | {MDL_NORMAL, 2, -1.0f}, 96 | {MDL_NORMAL, 2, +1.0f}, 97 | {MDL_NORMAL, 3, -1.0f}, 98 | {MDL_NORMAL, 3, +1.0f}, 99 | {MDL_LARGEPULSE, 4, -2.0f}, 100 | {MDL_LARGEPULSE, 4, +2.0f}, 101 | {MDL_LARGEPULSE, 3, -2.0f}, 102 | {MDL_LARGEPULSE, 3, +2.0f}, 103 | {MDL_LARGEPULSE, 5, -3.0f}, 104 | {MDL_LARGEPULSE, 5, +3.0f}, 105 | {MDL_LARGEPULSE, 4, -3.0f}, 106 | {MDL_LARGEPULSE, 4, +3.0f}, 107 | {MDL_LARGEPULSE, 6, -4.0f}, 108 | {MDL_LARGEPULSE, 6, +4.0f}, 109 | {MDL_LARGEPULSE, 5, -4.0f}, 110 | {MDL_LARGEPULSE, 5, +4.0f}, 111 | {MDL_LARGEPULSE, 7, -5.0f}, 112 | {MDL_LARGEPULSE, 7, +5.0f}, 113 | {MDL_LARGEPULSE, 6, -5.0f}, 114 | {MDL_LARGEPULSE, 6, +5.0f}, 115 | {MDL_LARGEPULSE, 8, -6.0f}, 116 | {MDL_LARGEPULSE, 8, +6.0f}, 117 | {MDL_LARGEPULSE, 7, -6.0f}, 118 | {MDL_LARGEPULSE, 7, +6.0f} 119 | }; 120 | 121 | static int utk_read_byte(UTKContext *ctx) 122 | { 123 | if (ctx->ptr < ctx->end) 124 | return *ctx->ptr++; 125 | 126 | if (ctx->fp) { 127 | static uint8_t buffer[4096]; 128 | size_t bytes_copied = fread(buffer, 1, sizeof(buffer), ctx->fp); 129 | if (bytes_copied > 0 && bytes_copied <= sizeof(buffer)) { 130 | ctx->ptr = buffer; 131 | ctx->end = buffer + bytes_copied; 132 | return *ctx->ptr++; 133 | } 134 | } 135 | 136 | return 0; 137 | } 138 | 139 | static int16_t utk_read_i16(UTKContext *ctx) 140 | { 141 | int x = utk_read_byte(ctx); 142 | x = (x << 8) | utk_read_byte(ctx); 143 | return x; 144 | } 145 | 146 | static int utk_read_bits(UTKContext *ctx, int count) 147 | { 148 | int ret = ctx->bits_value & ((1 << count) - 1); 149 | ctx->bits_value >>= count; 150 | ctx->bits_count -= count; 151 | 152 | if (ctx->bits_count < 8) { 153 | /* read another byte */ 154 | ctx->bits_value |= utk_read_byte(ctx) << ctx->bits_count; 155 | ctx->bits_count += 8; 156 | } 157 | 158 | return ret; 159 | } 160 | 161 | static void utk_parse_header(UTKContext *ctx) 162 | { 163 | int i; 164 | float multiplier; 165 | 166 | ctx->reduced_bw = utk_read_bits(ctx, 1); 167 | ctx->multipulse_thresh = 32 - utk_read_bits(ctx, 4); 168 | ctx->fixed_gains[0] = 8.0f * (1 + utk_read_bits(ctx, 4)); 169 | multiplier = 1.04f + utk_read_bits(ctx, 6)*0.001f; 170 | 171 | for (i = 1; i < 64; i++) 172 | ctx->fixed_gains[i] = ctx->fixed_gains[i-1] * multiplier; 173 | } 174 | 175 | static void utk_decode_excitation(UTKContext *ctx, int use_multipulse, float *out, int stride) 176 | { 177 | int i; 178 | 179 | if (use_multipulse) { 180 | /* multi-pulse model: n pulses are coded explicitly; the rest are zero */ 181 | int model, cmd; 182 | model = 0; 183 | i = 0; 184 | while (i < 108) { 185 | cmd = utk_codebooks[model][ctx->bits_value & 0xff]; 186 | model = utk_commands[cmd].next_model; 187 | utk_read_bits(ctx, utk_commands[cmd].code_size); 188 | 189 | if (cmd > 3) { 190 | /* insert a pulse with magnitude <= 6.0f */ 191 | out[i] = utk_commands[cmd].pulse_value; 192 | i += stride; 193 | } else if (cmd > 1) { 194 | /* insert between 7 and 70 zeros */ 195 | int count = 7 + utk_read_bits(ctx, 6); 196 | if (i + count * stride > 108) 197 | count = (108 - i)/stride; 198 | 199 | while (count > 0) { 200 | out[i] = 0.0f; 201 | i += stride; 202 | count--; 203 | } 204 | } else { 205 | /* insert a pulse with magnitude >= 7.0f */ 206 | int x = 7; 207 | 208 | while (utk_read_bits(ctx, 1)) 209 | x++; 210 | 211 | if (!utk_read_bits(ctx, 1)) 212 | x *= -1; 213 | 214 | out[i] = (float)x; 215 | i += stride; 216 | } 217 | } 218 | } else { 219 | /* RELP model: entire residual (excitation) signal is coded explicitly */ 220 | i = 0; 221 | while (i < 108) { 222 | if (!utk_read_bits(ctx, 1)) 223 | out[i] = 0.0f; 224 | else if (!utk_read_bits(ctx, 1)) 225 | out[i] = -2.0f; 226 | else 227 | out[i] = 2.0f; 228 | 229 | i += stride; 230 | } 231 | } 232 | } 233 | 234 | static void rc_to_lpc(const float *rc, float *lpc) 235 | { 236 | int i, j; 237 | float tmp1[12]; 238 | float tmp2[12]; 239 | 240 | for (i = 10; i >= 0; i--) 241 | tmp2[1+i] = rc[i]; 242 | 243 | tmp2[0] = 1.0f; 244 | 245 | for (i = 0; i < 12; i++) { 246 | float x = -tmp2[11] * rc[11]; 247 | 248 | for (j = 10; j >= 0; j--) { 249 | x -= tmp2[j] * rc[j]; 250 | tmp2[j+1] = x * rc[j] + tmp2[j]; 251 | } 252 | 253 | tmp1[i] = tmp2[0] = x; 254 | 255 | for (j = 0; j < i; j++) 256 | x -= tmp1[i-1-j] * lpc[j]; 257 | 258 | lpc[i] = x; 259 | } 260 | } 261 | 262 | static void utk_lp_synthesis_filter(UTKContext *ctx, int offset, int num_blocks) 263 | { 264 | int i, j, k; 265 | float lpc[12]; 266 | float *ptr = &ctx->decompressed_frame[offset]; 267 | 268 | rc_to_lpc(ctx->rc, lpc); 269 | 270 | for (i = 0; i < num_blocks; i++) { 271 | for (j = 0; j < 12; j++) { 272 | float x = *ptr; 273 | 274 | for (k = 0; k < j; k++) 275 | x += lpc[k] * ctx->synth_history[k-j+12]; 276 | for (; k < 12; k++) 277 | x += lpc[k] * ctx->synth_history[k-j]; 278 | 279 | ctx->synth_history[11-j] = x; 280 | *ptr++ = x; 281 | } 282 | } 283 | } 284 | 285 | /* 286 | ** Public functions. 287 | */ 288 | 289 | static void utk_decode_frame(UTKContext *ctx) 290 | { 291 | int i, j; 292 | int use_multipulse = 0; 293 | float excitation[5+108+5]; 294 | float rc_delta[12]; 295 | 296 | if (!ctx->bits_count) { 297 | ctx->bits_value = utk_read_byte(ctx); 298 | ctx->bits_count = 8; 299 | } 300 | 301 | if (!ctx->parsed_header) { 302 | utk_parse_header(ctx); 303 | ctx->parsed_header = 1; 304 | } 305 | 306 | memset(&excitation[0], 0, 5*sizeof(float)); 307 | memset(&excitation[5+108], 0, 5*sizeof(float)); 308 | 309 | /* read the reflection coefficients */ 310 | for (i = 0; i < 12; i++) { 311 | int idx; 312 | if (i == 0) { 313 | idx = utk_read_bits(ctx, 6); 314 | if (idx < ctx->multipulse_thresh) 315 | use_multipulse = 1; 316 | } else if (i < 4) { 317 | idx = utk_read_bits(ctx, 6); 318 | } else { 319 | idx = 16 + utk_read_bits(ctx, 5); 320 | } 321 | 322 | rc_delta[i] = (utk_rc_table[idx] - ctx->rc[i])*0.25f; 323 | } 324 | 325 | /* decode four subframes */ 326 | for (i = 0; i < 4; i++) { 327 | int pitch_lag = utk_read_bits(ctx, 8); 328 | float pitch_gain = (float)utk_read_bits(ctx, 4)/15.0f; 329 | float fixed_gain = ctx->fixed_gains[utk_read_bits(ctx, 6)]; 330 | 331 | if (!ctx->reduced_bw) { 332 | utk_decode_excitation(ctx, use_multipulse, &excitation[5], 1); 333 | } else { 334 | /* residual (excitation) signal is encoded at reduced bandwidth */ 335 | int align = utk_read_bits(ctx, 1); 336 | int zero = utk_read_bits(ctx, 1); 337 | 338 | utk_decode_excitation(ctx, use_multipulse, &excitation[5+align], 2); 339 | 340 | if (zero) { 341 | /* fill the remaining samples with zero 342 | ** (spectrum is duplicated into high frequencies) */ 343 | for (j = 0; j < 54; j++) 344 | excitation[5+(1-align)+2*j] = 0.0f; 345 | } else { 346 | /* interpolate the remaining samples 347 | ** (spectrum is low-pass filtered) */ 348 | float *ptr = &excitation[5+(1-align)]; 349 | for (j = 0; j < 108; j += 2) 350 | ptr[j] = ptr[j-5] * 0.01803267933428287506103515625f 351 | - ptr[j-3] * 0.114591561257839202880859375f 352 | + ptr[j-1] * 0.597385942935943603515625f 353 | + ptr[j+1] * 0.597385942935943603515625f 354 | - ptr[j+3] * 0.114591561257839202880859375f 355 | + ptr[j+5] * 0.01803267933428287506103515625f; 356 | 357 | /* scale by 0.5f to give the sinc impulse response unit energy */ 358 | fixed_gain *= 0.5f; 359 | } 360 | } 361 | 362 | for (j = 0; j < 108; j++) 363 | ctx->decompressed_frame[108*i+j] = fixed_gain * excitation[5+j] 364 | + pitch_gain * ctx->adapt_cb[108*i+216-pitch_lag+j]; 365 | } 366 | 367 | for (i = 0; i < 324; i++) 368 | ctx->adapt_cb[i] = ctx->decompressed_frame[108+i]; 369 | 370 | for (i = 0; i < 4; i++) { 371 | for (j = 0; j < 12; j++) 372 | ctx->rc[j] += rc_delta[j]; 373 | 374 | utk_lp_synthesis_filter(ctx, 12*i, i < 3 ? 1 : 33); 375 | } 376 | } 377 | 378 | static void utk_init(UTKContext *ctx) 379 | { 380 | memset(ctx, 0, sizeof(*ctx)); 381 | } 382 | 383 | static void utk_set_fp(UTKContext *ctx, FILE *fp) 384 | { 385 | ctx->fp = fp; 386 | 387 | /* reset the bit reader */ 388 | ctx->bits_count = 0; 389 | } 390 | 391 | static void utk_set_ptr(UTKContext *ctx, const uint8_t *ptr, const uint8_t *end) 392 | { 393 | ctx->ptr = ptr; 394 | ctx->end = end; 395 | 396 | /* reset the bit reader */ 397 | ctx->bits_count = 0; 398 | } 399 | 400 | /* 401 | ** MicroTalk Revision 3 decoding function. 402 | */ 403 | 404 | static void utk_rev3_decode_frame(UTKContext *ctx) 405 | { 406 | int pcm_data_present = (utk_read_byte(ctx) == 0xee); 407 | int i; 408 | 409 | utk_decode_frame(ctx); 410 | 411 | /* unread the last 8 bits and reset the bit reader */ 412 | ctx->ptr--; 413 | ctx->bits_count = 0; 414 | 415 | if (pcm_data_present) { 416 | /* Overwrite n samples at a given offset in the decoded frame with 417 | ** raw PCM data. */ 418 | int offset = utk_read_i16(ctx); 419 | int count = utk_read_i16(ctx); 420 | 421 | /* sx.exe does not do any bounds checking or clamping of these two 422 | ** fields (see 004274D1 in sx.exe v3.01.01), which means a specially 423 | ** crafted MT5:1 file can crash sx.exe. 424 | ** We will throw an error instead. */ 425 | if (offset < 0 || offset > 432) { 426 | fprintf(stderr, "error: invalid PCM offset %d\n", offset); 427 | exit(EXIT_FAILURE); 428 | } 429 | if (count < 0 || count > 432 - offset) { 430 | fprintf(stderr, "error: invalid PCM count %d\n", count); 431 | exit(EXIT_FAILURE); 432 | } 433 | 434 | for (i = 0; i < count; i++) 435 | ctx->decompressed_frame[offset+i] = (float)utk_read_i16(ctx); 436 | } 437 | } 438 | -------------------------------------------------------------------------------- /utkdecode-bnb.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** utkdecode-bnb 3 | ** Decode Beasts & Bumpkins M10 to wav. 4 | ** Authors: Andrew D'Addesio 5 | ** License: Public domain 6 | ** Compile: gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math 7 | ** -fwhole-program -g0 -s -o utkdecode-bnb utkdecode-bnb.c 8 | */ 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "utk.h" 15 | #include "io.h" 16 | #include "eachunk.h" 17 | 18 | #define MAKE_U32(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24)) 19 | #define ROUND(x) ((x) >= 0.0f ? ((x)+0.5f) : ((x)-0.5f)) 20 | #define MIN(x,y) ((x)<(y)?(x):(y)) 21 | #define MAX(x,y) ((x)>(y)?(x):(y)) 22 | #define CLAMP(x,min,max) MIN(MAX(x,min),max) 23 | 24 | typedef struct PTContext { 25 | FILE *infp, *outfp; 26 | uint32_t num_samples; 27 | uint32_t compression_type; 28 | UTKContext utk; 29 | } PTContext; 30 | 31 | static void pt_read_header(PTContext *pt) 32 | { 33 | EAChunk *chunk = read_chunk(pt->infp); 34 | 35 | if ((chunk->type & 0xffff) != MAKE_U32('P','T','\x00','\x00')) { 36 | fprintf(stderr, "error: expected PT chunk\n"); 37 | exit(EXIT_FAILURE); 38 | } 39 | 40 | while (1) { 41 | uint8_t cmd = chunk_read_u8(chunk); 42 | if (cmd == 0xFD) { 43 | while (1) { 44 | uint8_t key = chunk_read_u8(chunk); 45 | uint32_t value = chunk_read_var_int(chunk); 46 | 47 | if (key == 0xFF) 48 | break; 49 | else if (key == 0x85) 50 | pt->num_samples = value; 51 | else if (key == 0x83) 52 | pt->compression_type = value; 53 | } 54 | break; 55 | } else { 56 | chunk_read_var_int(chunk); 57 | } 58 | } 59 | 60 | if (pt->compression_type != 9) { 61 | fprintf(stderr, "error: invalid compression type %u (expected 9 for MicroTalk 10:1)\n", 62 | (unsigned)pt->compression_type); 63 | exit(EXIT_FAILURE); 64 | } 65 | 66 | if (pt->num_samples >= 0x01000000) { 67 | fprintf(stderr, "error: invalid num_samples %u\n", pt->num_samples); 68 | exit(EXIT_FAILURE); 69 | } 70 | 71 | /* Initialize the decoder. */ 72 | utk_init(&pt->utk); 73 | 74 | /* Write the WAV header. */ 75 | write_u32(pt->outfp, MAKE_U32('R','I','F','F')); 76 | write_u32(pt->outfp, 36 + pt->num_samples*2); 77 | write_u32(pt->outfp, MAKE_U32('W','A','V','E')); 78 | write_u32(pt->outfp, MAKE_U32('f','m','t',' ')); 79 | write_u32(pt->outfp, 16); 80 | write_u16(pt->outfp, 1); 81 | write_u16(pt->outfp, 1); 82 | write_u32(pt->outfp, 22050); 83 | write_u32(pt->outfp, 22050*2); 84 | write_u16(pt->outfp, 2); 85 | write_u16(pt->outfp, 16); 86 | write_u32(pt->outfp, MAKE_U32('d','a','t','a')); 87 | write_u32(pt->outfp, pt->num_samples*2); 88 | } 89 | 90 | static void pt_decode(PTContext *pt) 91 | { 92 | UTKContext *utk = &pt->utk; 93 | uint32_t num_samples = pt->num_samples; 94 | 95 | utk_set_fp(utk, pt->infp); 96 | 97 | while (num_samples > 0) { 98 | int count = MIN(num_samples, 432); 99 | int i; 100 | 101 | utk_decode_frame(utk); 102 | 103 | for (i = 0; i < count; i++) { 104 | int x = ROUND(pt->utk.decompressed_frame[i]); 105 | write_u16(pt->outfp, (int16_t)CLAMP(x, -32768, 32767)); 106 | } 107 | 108 | num_samples -= count; 109 | } 110 | } 111 | 112 | int main(int argc, char *argv[]) 113 | { 114 | PTContext pt; 115 | const char *infile, *outfile; 116 | FILE *infp, *outfp; 117 | int force = 0; 118 | 119 | /* Parse arguments. */ 120 | if (argc == 4 && !strcmp(argv[1], "-f")) { 121 | force = 1; 122 | argv++, argc--; 123 | } 124 | 125 | if (argc != 3) { 126 | printf("Usage: utkdecode-bnb [-f] infile outfile\n"); 127 | printf("Decode Beasts & Bumpkins M10 to wav.\n"); 128 | return EXIT_FAILURE; 129 | } 130 | 131 | infile = argv[1]; 132 | outfile = argv[2]; 133 | 134 | /* Open the input/output files. */ 135 | infp = fopen(infile, "rb"); 136 | if (!infp) { 137 | fprintf(stderr, "error: failed to open '%s' for reading: %s\n", infile, strerror(errno)); 138 | return EXIT_FAILURE; 139 | } 140 | 141 | if (!force && fopen(outfile, "rb")) { 142 | fprintf(stderr, "error: '%s' already exists\n", outfile); 143 | return EXIT_FAILURE; 144 | } 145 | 146 | outfp = fopen(outfile, "wb"); 147 | if (!outfp) { 148 | fprintf(stderr, "error: failed to create '%s': %s\n", outfile, strerror(errno)); 149 | return EXIT_FAILURE; 150 | } 151 | 152 | memset(&pt, 0, sizeof(pt)); 153 | pt.infp = infp; 154 | pt.outfp = outfp; 155 | 156 | pt_read_header(&pt); 157 | pt_decode(&pt); 158 | 159 | if (fclose(outfp) != 0) { 160 | fprintf(stderr, "error: failed to close '%s': %s\n", outfile, strerror(errno)); 161 | return EXIT_FAILURE; 162 | } 163 | 164 | fclose(infp); 165 | 166 | return EXIT_SUCCESS; 167 | } -------------------------------------------------------------------------------- /utkdecode-fifa.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** utkdecode-fifa 3 | ** Decode FIFA 2001/2002 MicroTalk to wav. 4 | ** Authors: Andrew D'Addesio 5 | ** License: Public domain 6 | ** Compile: gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math 7 | ** -fwhole-program -g0 -s -o utkdecode-fifa utkdecode-fifa.c 8 | */ 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "utk.h" 15 | #include "io.h" 16 | #include "eachunk.h" 17 | 18 | #define MAKE_U32(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24)) 19 | #define ROUND(x) ((x) >= 0.0f ? ((x)+0.5f) : ((x)-0.5f)) 20 | #define MIN(x,y) ((x)<(y)?(x):(y)) 21 | #define MAX(x,y) ((x)>(y)?(x):(y)) 22 | #define CLAMP(x,min,max) MIN(MAX(x,min),max) 23 | 24 | typedef struct EAContext { 25 | FILE *infp, *outfp; 26 | uint32_t audio_pos; 27 | uint32_t num_samples; 28 | uint32_t num_data_chunks; 29 | uint32_t compression_type; 30 | uint32_t codec_revision; 31 | UTKContext utk; 32 | } EAContext; 33 | 34 | static void ea_read_schl(EAContext *ea) 35 | { 36 | uint32_t id; 37 | EAChunk *chunk = read_chunk(ea->infp); 38 | 39 | if (chunk->type != MAKE_U32('S','C','H','l')) { 40 | fprintf(stderr, "error: expected SCHl chunk\n"); 41 | exit(EXIT_FAILURE); 42 | } 43 | 44 | id = chunk_read_u32(chunk); 45 | if ((id & 0xffff) != MAKE_U32('P','T','\x00','\x00')) { 46 | fprintf(stderr, "error: expected PT chunk in SCHl header\n"); 47 | exit(EXIT_FAILURE); 48 | } 49 | 50 | while (1) { 51 | uint8_t cmd = chunk_read_u8(chunk); 52 | if (cmd == 0xFD) { 53 | while (1) { 54 | uint8_t key = chunk_read_u8(chunk); 55 | uint32_t value = chunk_read_var_int(chunk); 56 | 57 | if (key == 0xFF) 58 | break; 59 | else if (key == 0x80) 60 | ea->codec_revision = value; 61 | else if (key == 0x85) 62 | ea->num_samples = value; 63 | else if (key == 0xA0) 64 | ea->compression_type = value; 65 | } 66 | break; 67 | } else { 68 | chunk_read_var_int(chunk); 69 | } 70 | } 71 | 72 | if (ea->compression_type != 4 && ea->compression_type != 22) { 73 | fprintf(stderr, "error: invalid compression type %u (expected 4 for MicroTalk 10:1 or 22 for MicroTalk 5:1)\n", 74 | (unsigned)ea->compression_type); 75 | exit(EXIT_FAILURE); 76 | } 77 | 78 | if (ea->num_samples >= 0x01000000) { 79 | fprintf(stderr, "error: invalid num_samples %u\n", ea->num_samples); 80 | exit(EXIT_FAILURE); 81 | } 82 | 83 | /* Initialize the decoder. */ 84 | utk_init(&ea->utk); 85 | 86 | /* Write the WAV header. */ 87 | write_u32(ea->outfp, MAKE_U32('R','I','F','F')); 88 | write_u32(ea->outfp, 36 + ea->num_samples*2); 89 | write_u32(ea->outfp, MAKE_U32('W','A','V','E')); 90 | write_u32(ea->outfp, MAKE_U32('f','m','t',' ')); 91 | write_u32(ea->outfp, 16); 92 | write_u16(ea->outfp, 1); 93 | write_u16(ea->outfp, 1); 94 | write_u32(ea->outfp, 22050); 95 | write_u32(ea->outfp, 22050*2); 96 | write_u16(ea->outfp, 2); 97 | write_u16(ea->outfp, 16); 98 | write_u32(ea->outfp, MAKE_U32('d','a','t','a')); 99 | write_u32(ea->outfp, ea->num_samples*2); 100 | } 101 | 102 | static void ea_read_sccl(EAContext *ea) 103 | { 104 | EAChunk *chunk = read_chunk(ea->infp); 105 | 106 | if (chunk->type != MAKE_U32('S','C','C','l')) { 107 | fprintf(stderr, "error: expected SCCl chunk\n"); 108 | exit(EXIT_FAILURE); 109 | } 110 | 111 | ea->num_data_chunks = chunk_read_u32(chunk); 112 | if (ea->num_data_chunks >= 0x01000000) { 113 | fprintf(stderr, "error: invalid num_data_chunks %u\n", (unsigned)ea->num_data_chunks); 114 | exit(EXIT_FAILURE); 115 | } 116 | } 117 | 118 | static void ea_read_scdl(EAContext *ea) 119 | { 120 | EAChunk *chunk = read_chunk(ea->infp); 121 | UTKContext *utk = &ea->utk; 122 | uint32_t num_samples; 123 | 124 | if (chunk->type != MAKE_U32('S','C','D','l')) { 125 | fprintf(stderr, "error: expected SCDl chunk\n"); 126 | exit(EXIT_FAILURE); 127 | } 128 | 129 | num_samples = chunk_read_u32(chunk); 130 | chunk_read_u32(chunk); /* unknown */ 131 | chunk_read_u8(chunk); /* unknown */ 132 | 133 | if (num_samples > ea->num_samples - ea->audio_pos) 134 | num_samples = ea->num_samples - ea->audio_pos; 135 | 136 | utk_set_ptr(utk, chunk->ptr, chunk->end); 137 | 138 | while (num_samples > 0) { 139 | int count = MIN(num_samples, 432); 140 | int i; 141 | 142 | if (ea->codec_revision >= 3) 143 | utk_rev3_decode_frame(utk); 144 | else 145 | utk_decode_frame(utk); 146 | 147 | for (i = 0; i < count; i++) { 148 | int x = ROUND(ea->utk.decompressed_frame[i]); 149 | write_u16(ea->outfp, (int16_t)CLAMP(x, -32768, 32767)); 150 | } 151 | 152 | ea->audio_pos += count; 153 | num_samples -= count; 154 | } 155 | } 156 | 157 | static void ea_read_scel(const EAContext *ea) 158 | { 159 | EAChunk *chunk = read_chunk(ea->infp); 160 | 161 | if (chunk->type != MAKE_U32('S','C','E','l')) { 162 | fprintf(stderr, "error: expected SCEl chunk\n"); 163 | exit(EXIT_FAILURE); 164 | } 165 | 166 | if (ea->audio_pos != ea->num_samples) { 167 | fprintf(stderr, "error: failed to decode the correct number of samples\n"); 168 | exit(EXIT_FAILURE); 169 | } 170 | } 171 | 172 | int main(int argc, char *argv[]) 173 | { 174 | EAContext ea; 175 | const char *infile, *outfile; 176 | FILE *infp, *outfp; 177 | int force = 0; 178 | unsigned int i; 179 | 180 | if (argc == 4 && !strcmp(argv[1], "-f")) { 181 | force = 1; 182 | argv++, argc--; 183 | } 184 | 185 | if (argc != 3) { 186 | printf("Usage: utkdecode-fifa [-f] infile outfile\n"); 187 | printf("Decode FIFA 2001/2002 MicroTalk to wav.\n"); 188 | return EXIT_FAILURE; 189 | } 190 | 191 | infile = argv[1]; 192 | outfile = argv[2]; 193 | 194 | infp = fopen(infile, "rb"); 195 | if (!infp) { 196 | fprintf(stderr, "error: failed to open '%s' for reading: %s\n", infile, strerror(errno)); 197 | return EXIT_FAILURE; 198 | } 199 | 200 | if (!force && fopen(outfile, "rb")) { 201 | fprintf(stderr, "error: '%s' already exists\n", outfile); 202 | return EXIT_FAILURE; 203 | } 204 | 205 | outfp = fopen(outfile, "wb"); 206 | if (!outfp) { 207 | fprintf(stderr, "error: failed to create '%s': %s\n", outfile, strerror(errno)); 208 | return EXIT_FAILURE; 209 | } 210 | 211 | memset(&ea, 0, sizeof(ea)); 212 | ea.infp = infp; 213 | ea.outfp = outfp; 214 | 215 | ea_read_schl(&ea); 216 | ea_read_sccl(&ea); 217 | 218 | for (i = 0; i < ea.num_data_chunks; i++) 219 | ea_read_scdl(&ea); 220 | 221 | ea_read_scel(&ea); 222 | 223 | if (!outfp) { 224 | fprintf(stderr, "error: failed to close '%s': %s\n", outfile, strerror(errno)); 225 | return EXIT_FAILURE; 226 | } 227 | 228 | return EXIT_SUCCESS; 229 | } -------------------------------------------------------------------------------- /utkdecode.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** utkdecode 3 | ** Decode Maxis UTK to wav. 4 | ** Authors: Andrew D'Addesio 5 | ** License: Public domain 6 | ** Compile: gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math 7 | ** -fwhole-program -g0 -s -o utkdecode utkdecode.c 8 | */ 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "utk.h" 15 | #include "io.h" 16 | 17 | #define MAKE_U32(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24)) 18 | #define ROUND(x) ((x) >= 0.0f ? ((x)+0.5f) : ((x)-0.5f)) 19 | #define MIN(x,y) ((x)<(y)?(x):(y)) 20 | #define MAX(x,y) ((x)>(y)?(x):(y)) 21 | #define CLAMP(x,min,max) MIN(MAX(x,min),max) 22 | 23 | int main(int argc, char *argv[]) 24 | { 25 | const char *infile, *outfile; 26 | UTKContext ctx; 27 | uint32_t sID; 28 | uint32_t dwOutSize; 29 | uint32_t dwWfxSize; 30 | uint16_t wFormatTag; 31 | uint16_t nChannels; 32 | uint32_t nSamplesPerSec; 33 | uint32_t nAvgBytesPerSec; 34 | uint16_t nBlockAlign; 35 | uint16_t wBitsPerSample; 36 | uint16_t cbSize; 37 | uint32_t num_samples; 38 | FILE *infp, *outfp; 39 | int force = 0; 40 | int error = 0; 41 | int i; 42 | 43 | /* Parse arguments. */ 44 | if (argc == 4 && !strcmp(argv[1], "-f")) { 45 | force = 1; 46 | argv++, argc--; 47 | } 48 | 49 | if (argc != 3) { 50 | printf("Usage: utkdecode [-f] infile outfile\n"); 51 | printf("Decode Maxis UTK to wav.\n"); 52 | return EXIT_FAILURE; 53 | } 54 | 55 | infile = argv[1]; 56 | outfile = argv[2]; 57 | 58 | /* Open the input/output files. */ 59 | infp = fopen(infile, "rb"); 60 | if (!infp) { 61 | fprintf(stderr, "error: failed to open '%s' for reading: %s\n", infile, strerror(errno)); 62 | return EXIT_FAILURE; 63 | } 64 | 65 | if (!force && fopen(outfile, "rb")) { 66 | fprintf(stderr, "error: '%s' already exists\n", outfile); 67 | return EXIT_FAILURE; 68 | } 69 | 70 | outfp = fopen(outfile, "wb"); 71 | if (!outfp) { 72 | fprintf(stderr, "error: failed to create '%s': %s\n", outfile, strerror(errno)); 73 | return EXIT_FAILURE; 74 | } 75 | 76 | /* Parse the UTK header. */ 77 | sID = read_u32(infp); 78 | dwOutSize = read_u32(infp); 79 | dwWfxSize = read_u32(infp); 80 | wFormatTag = read_u16(infp); 81 | nChannels = read_u16(infp); 82 | nSamplesPerSec = read_u32(infp); 83 | nAvgBytesPerSec = read_u32(infp); 84 | nBlockAlign = read_u16(infp); 85 | wBitsPerSample = read_u16(infp); 86 | cbSize = read_u16(infp); 87 | read_u16(infp); /* padding */ 88 | 89 | if (sID != MAKE_U32('U','T','M','0')) { 90 | fprintf(stderr, "error: not a valid UTK file (expected UTM0 signature)\n"); 91 | return EXIT_FAILURE; 92 | } else if ((dwOutSize & 0x01) != 0 || dwOutSize >= 0x01000000) { 93 | fprintf(stderr, "error: invalid dwOutSize %u\n", (unsigned)dwOutSize); 94 | return EXIT_FAILURE; 95 | } else if (dwWfxSize != 20) { 96 | fprintf(stderr, "error: invalid dwWfxSize %u (expected 20)\n", (unsigned)dwWfxSize); 97 | return EXIT_FAILURE; 98 | } else if (wFormatTag != 1) { 99 | fprintf(stderr, "error: invalid wFormatTag %u (expected 1)\n", (unsigned)wFormatTag); 100 | return EXIT_FAILURE; 101 | } 102 | 103 | if (nChannels != 1) { 104 | fprintf(stderr, "error: invalid nChannels %u (only mono is supported)\n", (unsigned)nChannels); 105 | error = 1; 106 | } 107 | if (nSamplesPerSec < 8000 || nSamplesPerSec > 192000) { 108 | fprintf(stderr, "error: invalid nSamplesPerSec %u\n", (unsigned)nSamplesPerSec); 109 | error = 1; 110 | } 111 | if (nAvgBytesPerSec != nSamplesPerSec * nBlockAlign) { 112 | fprintf(stderr, "error: invalid nAvgBytesPerSec %u (expected nSamplesPerSec * nBlockAlign)\n", (unsigned)nAvgBytesPerSec); 113 | error = 1; 114 | } 115 | if (nBlockAlign != 2) { 116 | fprintf(stderr, "error: invalid nBlockAlign %u (expected 2)\n", (unsigned)nBlockAlign); 117 | error = 1; 118 | } 119 | if (wBitsPerSample != 16) { 120 | fprintf(stderr, "error: invalid wBitsPerSample %u (expected 16)\n", (unsigned)wBitsPerSample); 121 | error = 1; 122 | } 123 | if (cbSize != 0) { 124 | fprintf(stderr, "error: invalid cbSize %u (expected 0)\n", (unsigned)cbSize); 125 | error = 1; 126 | } 127 | if (error) 128 | return EXIT_FAILURE; 129 | 130 | num_samples = dwOutSize/2; 131 | 132 | /* Write the WAV header. */ 133 | write_u32(outfp, MAKE_U32('R','I','F','F')); 134 | write_u32(outfp, 36 + num_samples*2); 135 | write_u32(outfp, MAKE_U32('W','A','V','E')); 136 | write_u32(outfp, MAKE_U32('f','m','t',' ')); 137 | write_u32(outfp, 16); 138 | write_u16(outfp, wFormatTag); 139 | write_u16(outfp, nChannels); 140 | write_u32(outfp, nSamplesPerSec); 141 | write_u32(outfp, nAvgBytesPerSec); 142 | write_u16(outfp, nBlockAlign); 143 | write_u16(outfp, wBitsPerSample); 144 | write_u32(outfp, MAKE_U32('d','a','t','a')); 145 | write_u32(outfp, num_samples*2); 146 | 147 | /* Decode. */ 148 | utk_init(&ctx); 149 | utk_set_fp(&ctx, infp); 150 | 151 | while (num_samples > 0) { 152 | int count = MIN(num_samples, 432); 153 | 154 | utk_decode_frame(&ctx); 155 | 156 | for (i = 0; i < count; i++) { 157 | int x = ROUND(ctx.decompressed_frame[i]); 158 | write_u16(outfp, (int16_t)CLAMP(x, -32768, 32767)); 159 | } 160 | 161 | num_samples -= count; 162 | } 163 | 164 | if (fclose(outfp) != 0) { 165 | fprintf(stderr, "error: failed to close '%s': %s\n", outfile, strerror(errno)); 166 | return EXIT_FAILURE; 167 | } 168 | 169 | fclose(infp); 170 | 171 | return EXIT_SUCCESS; 172 | } -------------------------------------------------------------------------------- /utkencode.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** utkencode 3 | ** Encode wav to Maxis UTalk. 4 | ** Authors: Fatbag 5 | ** License: Public domain (no warranties) 6 | ** Compile: gcc -Wall -Wextra -ansi -pedantic -O2 -ffast-math -g0 -s 7 | ** -o utkencode utkencode.c 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #define MIN(x,y) ((x)<(y)?(x):(y)) 18 | #define MAX(x,y) ((x)>(y)?(x):(y)) 19 | #define CLAMP(x,min,max) ((x)<(min)?(min):(x)>(max)?(max):(x)) 20 | #define ROUND(x) ((int)((x)>=0?((x)+0.5):((x)-0.5))) 21 | #define ABS(x) ((x)>=0?(x):-(x)) 22 | 23 | #define READ16(x) ((x)[0]|((x)[1]<<8)) 24 | #define READ32(x) ((x)[0]|((x)[1]<<8)|((x)[2]<<16)|((x)[3]<<24)) 25 | 26 | #define WRITE16(d,s) (d)[0]=(uint8_t)(s),(d)[1]=(uint8_t)((s)>>8) 27 | #define WRITE32(d,s) (d)[0]=(uint8_t)(s),(d)[1]=(uint8_t)((s)>>8),\ 28 | (d)[2]=(uint8_t)((s)>>16),(d)[3]=(uint8_t)((s)>>24) 29 | 30 | const float utk_rc_table[64] = { 31 | 0, 32 | -.99677598476409912109375, -.99032700061798095703125, -.983879029750823974609375, -.977430999279022216796875, 33 | -.970982015132904052734375, -.964533984661102294921875, -.958085000514984130859375, -.9516370296478271484375, 34 | -.930754005908966064453125, -.904959976673126220703125, -.879167020320892333984375, -.853372991085052490234375, 35 | -.827579021453857421875, -.801786005496978759765625, -.775991976261138916015625, -.75019800662994384765625, 36 | -.724404990673065185546875, -.6986110210418701171875, -.6706349849700927734375, -.61904799938201904296875, 37 | -.567460000514984130859375, -.515873014926910400390625, -.4642859995365142822265625, -.4126980006694793701171875, 38 | -.361110985279083251953125, -.309523999691009521484375, -.257937014102935791015625, -.20634900033473968505859375, 39 | -.1547619998455047607421875, -.10317499935626983642578125, -.05158700048923492431640625, 40 | 0, 41 | +.05158700048923492431640625, +.10317499935626983642578125, +.1547619998455047607421875, +.20634900033473968505859375, 42 | +.257937014102935791015625, +.309523999691009521484375, +.361110985279083251953125, +.4126980006694793701171875, 43 | +.4642859995365142822265625, +.515873014926910400390625, +.567460000514984130859375, +.61904799938201904296875, 44 | +.6706349849700927734375, +.6986110210418701171875, +.724404990673065185546875, +.75019800662994384765625, 45 | +.775991976261138916015625, +.801786005496978759765625, +.827579021453857421875, +.853372991085052490234375, 46 | +.879167020320892333984375, +.904959976673126220703125, +.930754005908966064453125, +.9516370296478271484375, 47 | +.958085000514984130859375, +.964533984661102294921875, +.970982015132904052734375, +.977430999279022216796875, 48 | +.983879029750823974609375, +.99032700061798095703125, +.99677598476409912109375 49 | }; 50 | 51 | static const char *prog_name; 52 | 53 | static void print_help(void) 54 | { 55 | printf("Usage: %s [options] infile.wav outfile.utk\n", prog_name); 56 | printf("Encode wav to Maxis UTalk.\n"); 57 | printf("\n"); 58 | printf("General options:\n"); 59 | printf(" -f, --force overwrite without prompting\n"); 60 | printf(" -q, --quiet suppress normal output and do not prompt\n"); 61 | printf(" -h, --help display this help and exit\n"); 62 | printf(" -V, --version output version information and exit\n"); 63 | printf("\n"); 64 | printf("Encoding options:\n"); 65 | printf(" -b, --bitrate=N target bitrate in bits/sec (default 32000)\n"); 66 | printf(" -H, --halved-inn encode innovation using half bandwidth\n"); 67 | printf(" (default)\n"); 68 | printf(" -F, --full-inn encode innovation using full bandwidth\n"); 69 | printf(" -T, --huff-threshold=N use the Huffman codebook with threshold N where\n"); 70 | printf(" N is an integer between 16 and 32 (inclusive)\n"); 71 | printf(" (default 24)\n"); 72 | printf(" -S, --inngain-sig=N use innovation gain significand N where N is\n"); 73 | printf(" between 8 and 128 (inclusive) in steps of 8\n"); 74 | printf(" (default 64)\n"); 75 | printf(" -B, --inngain-base=N use innovation gain base N where N is between\n"); 76 | printf(" 1.040 and 1.103 (inclusive) in steps of 0.001\n"); 77 | printf(" (default 1.068)\n"); 78 | printf("\n"); 79 | printf("If infile is \"-\", read from standard input.\n"); 80 | printf("If outfile is \"-\", write to standard output.\n"); 81 | } 82 | 83 | static void print_version(void) 84 | { 85 | printf("utkencode 0.0\n"); 86 | } 87 | 88 | static void print_usage_error(void) 89 | { 90 | fprintf(stderr, "Usage: %s [options] infile.wav outfile.utk\n", 91 | prog_name); 92 | fprintf(stderr, "Try '%s --help' for more options.\n", prog_name); 93 | } 94 | 95 | static const char short_options[] = "fqhVb:HFT:S:B:"; 96 | static const struct option long_options[] = { 97 | {"force", no_argument, 0, 'f'}, 98 | {"quiet", no_argument, 0, 'q'}, 99 | {"help", no_argument, 0, 'h'}, 100 | {"version", no_argument, 0, 'V'}, 101 | {"bitrate", required_argument, 0, 'b'}, 102 | {"halved-inn", no_argument, 0, 'H'}, 103 | {"full-inn", no_argument, 0, 'F'}, 104 | {"huff-threshold", required_argument, 0, 'T'}, 105 | {"inngain-sig", required_argument, 0, 'S'}, 106 | {"inngain-base", required_argument, 0, 'B'}, 107 | {0, 0, 0, 0} 108 | }; 109 | 110 | static int bitrate = 32000; 111 | static int force = 0; 112 | static int quiet = 0; 113 | static int halved_innovation = 1; 114 | static int huffman_threshold = 24; 115 | static int inngain_sig = 64; 116 | static float inngain_base = 1.068f; 117 | static const char *infile = ""; 118 | static const char *outfile = ""; 119 | static FILE *infp = NULL; 120 | static FILE *outfp = NULL; 121 | 122 | static uint8_t wav_buffer[432*2]; 123 | static float input_samples[12+432]; 124 | static float adaptive_codebook[324+432]; 125 | static uint8_t compressed_buffer[1024]; 126 | static uint8_t inn_buffers[2][256]; 127 | static float prev_rc[12]; 128 | static float innovation[5+108+5]; 129 | static float inn_gains[64]; 130 | 131 | struct bit_writer_context { 132 | uint8_t written_bits_count; 133 | size_t pos; 134 | uint8_t *buffer; 135 | }; 136 | 137 | static void read_data(FILE *fp, uint8_t *buffer, size_t size) 138 | { 139 | if (fread(buffer, 1, size, fp) != size) { 140 | fprintf(stderr, "%s: failed to read '%s': %s\n", 141 | prog_name, infile, ferror(fp) 142 | ? strerror(errno) : "reached end of file"); 143 | exit(EXIT_FAILURE); 144 | } 145 | } 146 | 147 | static void write_data(FILE *fp, const uint8_t *buffer, size_t size) 148 | { 149 | if (fwrite(buffer, 1, size, fp) != size) { 150 | fprintf(stderr, "%s: failed to write to '%s': %s\n", 151 | prog_name, outfile, ferror(fp) 152 | ? strerror(errno) : "reached end of file"); 153 | exit(EXIT_FAILURE); 154 | } 155 | } 156 | 157 | static void flush_data(FILE *fp) 158 | { 159 | if (fflush(fp) != 0) { 160 | fprintf(stderr, "%s: failed to flush '%s': %s\n", 161 | prog_name, outfile, strerror(errno)); 162 | exit(EXIT_FAILURE); 163 | } 164 | } 165 | 166 | static void bwc_init(struct bit_writer_context *bwc, uint8_t *buffer) 167 | { 168 | bwc->written_bits_count = 0; 169 | bwc->pos = 0; 170 | bwc->buffer = buffer; 171 | bwc->buffer[0] = 0; 172 | } 173 | 174 | static void bwc_write_bits(struct bit_writer_context *bwc, unsigned value, 175 | uint8_t count) 176 | { 177 | unsigned x = value << bwc->written_bits_count; 178 | 179 | bwc->buffer[bwc->pos] |= (uint8_t)x; 180 | bwc->written_bits_count += count; 181 | 182 | while (bwc->written_bits_count >= 8) { 183 | x >>= 8; 184 | bwc->buffer[++bwc->pos] = (uint8_t)x; 185 | bwc->written_bits_count -= 8; 186 | } 187 | } 188 | 189 | static void bwc_pad(struct bit_writer_context *bwc) 190 | { 191 | if (bwc->written_bits_count != 0) { 192 | bwc->buffer[++bwc->pos] = 0; 193 | bwc->written_bits_count = 0; 194 | } 195 | } 196 | 197 | static void bwc_flush(struct bit_writer_context *bwc, FILE *fp) 198 | { 199 | write_data(fp, bwc->buffer, bwc->pos); 200 | bwc->buffer[0] = bwc->buffer[bwc->pos]; 201 | bwc->pos = 0; 202 | } 203 | 204 | static unsigned quantize(float value, const float *alphabet, size_t alphabet_size) 205 | { 206 | unsigned i; 207 | unsigned min_idx = 0; 208 | float min_distance = ABS(value - alphabet[0]); 209 | 210 | for (i = 1; i < alphabet_size; i++) { 211 | float distance = ABS(value - alphabet[i]); 212 | 213 | if (distance < min_distance) { 214 | min_distance = distance; 215 | min_idx = i; 216 | } 217 | } 218 | 219 | return min_idx; 220 | } 221 | 222 | /* used in the parsing of some arguments */ 223 | static int read_dec_places(const char *string, int n) 224 | { 225 | int i; 226 | int value = 0; 227 | int pows_10[10]; 228 | 229 | pows_10[0] = 1; 230 | for (i = 1; i < n; i++) 231 | pows_10[i] = pows_10[i-1] * 10; 232 | 233 | for (i = 0; i < n && string[i] >= '0' && string[i] <= '9'; i++) 234 | value += pows_10[n-1-i] * (string[i]-'0'); 235 | 236 | return (string[i] == '\0') ? value : -1; 237 | } 238 | 239 | static int file_exists(const char *filename) 240 | { 241 | FILE *fp; 242 | 243 | fp = fopen(filename, "rb"); 244 | if (fp) { 245 | fclose(fp); 246 | return 1; 247 | } 248 | 249 | return 0; 250 | } 251 | 252 | static void find_autocorrelations(float *r, const float *samples) 253 | { 254 | int i, j; 255 | 256 | for (i = 0; i < 13; i++) { 257 | r[i] = 0; 258 | for (j = 0; j < 432 - i; j++) 259 | r[i] += samples[j]*samples[j+i]; 260 | } 261 | } 262 | 263 | static void levinson_durbin_symmetric(float *x, float *k, 264 | const float *r, const float *y) 265 | { 266 | float a[12]; /* the forward vector */ 267 | float e; /* prediction error */ 268 | int i; 269 | 270 | if (r[0] <= 1.0f/32768.0f && r[0] >= -1.0f/32768.0f) 271 | goto zero; 272 | 273 | a[0] = 1; 274 | e = r[0]; 275 | x[0] = y[0]/r[0]; 276 | 277 | for (i = 1; i < 12; i++) { 278 | float u, m; 279 | float a_temp[12]; 280 | int j; 281 | 282 | u = 0.0f; 283 | for (j = 0; j < i; j++) 284 | u += a[j]*r[i-j]; 285 | 286 | k[i-1] = -u/e; /* reflection coefficient i-1 */ 287 | e += u*k[i-1]; /* update e to the new value e - u*u/e */ 288 | 289 | if (e <= 1.0f/32768.0f && e >= -1.0f/32768.0f) 290 | goto zero; 291 | 292 | memcpy(a_temp, a, i*sizeof(float)); 293 | a[i] = 0.0f; 294 | for (j = 1; j <= i; j++) 295 | a[j] += k[i-1]*a_temp[i-j]; 296 | 297 | m = y[i]; 298 | for (j = 0; j < i; j++) 299 | m -= x[j]*r[i-j]; 300 | m /= e; 301 | 302 | x[i] = 0.0f; 303 | for (j = 0; j <= i; j++) 304 | x[j] += m*a[i-j]; 305 | } 306 | 307 | k[11] = -x[11]; 308 | 309 | return; 310 | 311 | zero: 312 | for (i = 0; i < 12; i++) 313 | x[i] = 0.0f; 314 | for (i = 0; i < 12; i++) 315 | k[i] = 0.0f; 316 | } 317 | 318 | static void rc_to_lpc(float *x, const float *k) 319 | { 320 | float a[13]; /* the forward vector */ 321 | unsigned i, j; 322 | a[0] = 1; 323 | 324 | for (i = 1; i < 13; i++) { 325 | float a_temp[12]; 326 | memcpy(a_temp, a, i*sizeof(float)); 327 | a[i] = 0.0f; 328 | for (j = 1; j <= i; j++) 329 | a[j] += k[i-1]*a_temp[i-j]; 330 | } 331 | 332 | for (i = 1; i < 13; i++) 333 | x[i-1] = -a[i]; 334 | } 335 | 336 | static void find_rc(float *rc, const float *samples) 337 | { 338 | float r[13]; 339 | float lpc[12]; 340 | find_autocorrelations(r, samples); 341 | levinson_durbin_symmetric(lpc, rc, r, r+1); 342 | } 343 | 344 | static void find_excitation(float *excitation, const float *source, 345 | int length, const float *lpc) 346 | { 347 | int i, j; 348 | 349 | for (i = 0; i < length; i++) { 350 | float prediction = 0.0f; 351 | for (j = 0; j < 12; j++) 352 | prediction += lpc[j]*source[i-1-j]; 353 | excitation[i] = source[i] - prediction; 354 | } 355 | } 356 | 357 | static void find_pitch(int *pitch_lag, float *pitch_gain, 358 | const float *excitation) 359 | { 360 | int max_corr_offset = 108; 361 | float max_corr_value = 0.0f; 362 | float history_energy; 363 | float gain; 364 | int i, j; 365 | 366 | /* Find the optimal pitch lag. */ 367 | for (i = 108; i < 324; i++) { 368 | float corr = 0.0f; 369 | for (j = 0; j < 108; j++) 370 | corr += excitation[j]*excitation[j-i]; 371 | if (corr > max_corr_value) { 372 | max_corr_offset = i; 373 | max_corr_value = corr; 374 | } 375 | } 376 | 377 | /* Find the optimal pitch gain. */ 378 | history_energy = 0.0f; 379 | for (i = 0; i < 108; i++) { 380 | float value = excitation[i-max_corr_offset]; 381 | history_energy += value*value; 382 | } 383 | 384 | if (history_energy >= 1/32768.0f) { 385 | gain = max_corr_value / history_energy; 386 | gain = CLAMP(gain, 0.0f, 1.0f); 387 | 388 | *pitch_lag = max_corr_offset; 389 | *pitch_gain = gain; 390 | } else { 391 | *pitch_lag = 108; 392 | *pitch_gain = 0.0f; 393 | } 394 | } 395 | 396 | static void interpolate(float *x, int a, int z) 397 | { 398 | int i; 399 | 400 | if (z) { 401 | for (i = !a; i < 108; i+=2) 402 | x[i] = 0.0f; 403 | } else { 404 | for (i = !a; i < 108; i+=2) 405 | x[i] 406 | = (x[i-1]+x[i+1]) * .5973859429f 407 | - (x[i-3]+x[i+3]) * .1145915613f 408 | + (x[i-5]+x[i+5]) * .0180326793f; 409 | } 410 | } 411 | 412 | static float interpolation_error(int a, int z, const float *x) 413 | { 414 | float error = 0.0f; 415 | int i; 416 | 417 | if (z) { 418 | for (i = !a; i < 108; i+=2) 419 | error += x[i]*x[i]; 420 | } else { 421 | for (i = !a; i < 108; i+=2) { 422 | float prediction 423 | = (x[i-1]+x[i+1]) * .5973859429f 424 | - (x[i-3]+x[i+3]) * .1145915613f 425 | + (x[i-5]+x[i+5]) * .0180326793f; 426 | error += (prediction - x[i])*(prediction - x[i]); 427 | } 428 | } 429 | 430 | return error; 431 | } 432 | 433 | static void find_a_z_flags(int *a, int *z, const float *innovation) 434 | { 435 | /* Find the a and z flags such that the least error is introduced 436 | ** in the downsampling step. In case of a tie (e.g. in silence), 437 | ** prefer using the zero flag. Thus, we will test in the order: 438 | ** (a=0,z=1), (a=1,z=1), (a=0,z=0), (a=1,z=1). */ 439 | float error; 440 | float best_error; 441 | int best_a = 0, best_z = 1; 442 | 443 | best_error = interpolation_error(0, 1, innovation); 444 | 445 | error = interpolation_error(1, 1, innovation); 446 | if (error < best_error) { 447 | best_error = error; 448 | best_a = 1, best_z = 1; 449 | } 450 | 451 | error = interpolation_error(0, 0, innovation); 452 | if (error < best_error) { 453 | best_error = error; 454 | best_a = 0, best_z = 0; 455 | } 456 | 457 | error = interpolation_error(1, 0, innovation); 458 | if (error < best_error) { 459 | best_error = error; 460 | best_a = 1, best_z = 0; 461 | } 462 | 463 | *a = best_a; 464 | *z = best_z; 465 | } 466 | 467 | struct huffman_code { 468 | uint16_t bits_value; 469 | uint16_t bits_count; 470 | }; 471 | 472 | static const struct huffman_code huffman_models[2][13+1+13] = { 473 | /* model 0 */ 474 | { 475 | /* -13 */ {16255, 16}, 476 | /* -12 */ {8063, 15}, 477 | /* -11 */ {3967, 14}, 478 | /* -10 */ {1919, 13}, 479 | /* -9 */ {895, 12}, 480 | /* -8 */ {383, 11}, 481 | /* -7 */ {127, 10}, 482 | /* -6 */ {63, 8}, 483 | /* -5 */ {31, 7}, 484 | /* -4 */ {15, 6}, 485 | /* -3 */ {7, 5}, 486 | /* -2 */ {3, 4}, 487 | /* -1 */ {2, 2}, 488 | /* 0 */ {0, 2}, 489 | /* +1 */ {1, 2}, 490 | /* +2 */ {11, 4}, 491 | /* +3 */ {23, 5}, 492 | /* +4 */ {47, 6}, 493 | /* +5 */ {95, 7}, 494 | /* +6 */ {191, 8}, 495 | /* +7 */ {639, 10}, 496 | /* +8 */ {1407, 11}, 497 | /* +9 */ {2943, 12}, 498 | /* +10 */ {6015, 13}, 499 | /* +11 */ {12159, 14}, 500 | /* +12 */ {24447, 15}, 501 | /* +13 */ {49023, 16} 502 | }, 503 | 504 | /* model 1 */ 505 | { 506 | /* -13 */ {8127, 15}, 507 | /* -12 */ {4031, 14}, 508 | /* -11 */ {1983, 13}, 509 | /* -10 */ {959, 12}, 510 | /* -9 */ {447, 11}, 511 | /* -8 */ {191, 10}, 512 | /* -7 */ {63, 9}, 513 | /* -6 */ {31, 7}, 514 | /* -5 */ {15, 6}, 515 | /* -4 */ {7, 5}, 516 | /* -3 */ {3, 4}, 517 | /* -2 */ {1, 3}, 518 | /* -1 */ {2, 3}, 519 | /* 0 */ {0, 2}, 520 | /* +1 */ {6, 3}, 521 | /* +2 */ {5, 3}, 522 | /* +3 */ {11, 4}, 523 | /* +4 */ {23, 5}, 524 | /* +5 */ {47, 6}, 525 | /* +6 */ {95, 7}, 526 | /* +7 */ {319, 9}, 527 | /* +8 */ {703, 10}, 528 | /* +9 */ {1471, 11}, 529 | /* +10 */ {3007, 12}, 530 | /* +11 */ {6079, 13}, 531 | /* +12 */ {12223, 14}, 532 | /* +13 */ {24511, 15} 533 | } 534 | }; 535 | 536 | static void encode_huffman(struct bit_writer_context *bwc, 537 | float *innovation_out, int *bits_used_out, float *error_out, 538 | const float *innovation_in, int halved_innovation, 539 | int pow, int a, int z) 540 | { 541 | int interval = halved_innovation ? 2 : 1; 542 | float inn_gain; 543 | float total_error = 0.0f; 544 | int counter; 545 | int values[108]; 546 | int zero_counts[108]; 547 | int model; 548 | int bits_start, bits_end; 549 | int i; 550 | 551 | inn_gain = inn_gains[pow]; 552 | if (!z) 553 | inn_gain *= 0.5f; 554 | 555 | bits_start = 8*bwc->pos + bwc->written_bits_count; 556 | 557 | if (halved_innovation) 558 | bwc_write_bits(bwc, pow | (a<<6) | (z<<7), 8); 559 | else 560 | bwc_write_bits(bwc, pow, 6); 561 | 562 | for (i = a; i < 108; i += interval) { 563 | float e; 564 | 565 | values[i] = ROUND(CLAMP( 566 | innovation_in[i]/inn_gain, -13.0f, 13.0f)); 567 | 568 | innovation_out[i] = inn_gain*values[i]; 569 | 570 | e = innovation_out[i] - innovation_in[i]; 571 | total_error += e*e; 572 | } 573 | 574 | *error_out = total_error; 575 | 576 | /* Find the zero runs at each position i (how many zeros 577 | ** in a row there are at position i). 578 | ** When interval=2 and a=1, start the search from i=105 instead 579 | ** of 107 in order to duplicate the off-by-one mistake in the 580 | ** decoder. (Thus, we will subtract a instead of adding.) 581 | ** For details, see: http://wiki.niotso.org/UTK */ 582 | counter = 0; 583 | for (i = 108 - interval - a; i >= 0; i -= interval) { 584 | if (values[i] == 0) 585 | counter++; 586 | else 587 | counter = 0; 588 | zero_counts[i] = counter; 589 | } 590 | 591 | i = a; 592 | model = 0; 593 | while (i < 108) { 594 | if (zero_counts[i] >= 7) { 595 | int length = MIN(zero_counts[i], 70); 596 | 597 | if (model == 0) 598 | bwc_write_bits(bwc, 255 | ((length-7)<<8), 14); 599 | else 600 | bwc_write_bits(bwc, 127 | ((length-7)<<7), 13); 601 | 602 | model = 0; 603 | i += length * interval; 604 | } else { 605 | int value = values[i]; 606 | 607 | bwc_write_bits(bwc, 608 | huffman_models[model][13+value].bits_value, 609 | huffman_models[model][13+value].bits_count); 610 | 611 | model = (value < -1 || value > 1); 612 | i += interval; 613 | } 614 | } 615 | 616 | bits_end = 8*bwc->pos + bwc->written_bits_count; 617 | *bits_used_out = bits_end - bits_start; 618 | } 619 | 620 | static void encode_triangular(struct bit_writer_context *bwc, 621 | float *innovation_out, int *bits_used_out, float *error_out, 622 | const float *innovation_in, int halved_innovation, 623 | int pow, int a, int z) 624 | { 625 | int interval = halved_innovation ? 2 : 1; 626 | float inn_gain; 627 | float total_error = 0.0f; 628 | int bits_start, bits_end; 629 | int i; 630 | 631 | inn_gain = 2.0f*inn_gains[pow]; 632 | if (!z) 633 | inn_gain *= 0.5f; 634 | 635 | bits_start = 8*bwc->pos + bwc->written_bits_count; 636 | 637 | if (halved_innovation) 638 | bwc_write_bits(bwc, pow | (a<<6) | (z<<7), 8); 639 | else 640 | bwc_write_bits(bwc, pow, 6); 641 | 642 | for (i = a; i < 108; i += interval) { 643 | float e; 644 | int value = ROUND(CLAMP( 645 | innovation_in[i]/inn_gain, -1.0f, 1.0f)); 646 | 647 | if (value > 0) 648 | bwc_write_bits(bwc, 3, 2); 649 | else if (value < 0) 650 | bwc_write_bits(bwc, 1, 2); 651 | else 652 | bwc_write_bits(bwc, 0, 1); 653 | 654 | innovation_out[i] = inn_gain*value; 655 | 656 | e = innovation_out[i] - innovation_in[i]; 657 | total_error += e*e; 658 | } 659 | 660 | bits_end = 8*bwc->pos + bwc->written_bits_count; 661 | *bits_used_out = bits_end - bits_start; 662 | 663 | *error_out = total_error; 664 | } 665 | 666 | static void low_pass_innovation(float *x, int a, int z) 667 | { 668 | /* Apply a weak low-pass filter to the innovation signal suitable for 669 | ** downsampling it by 1/2. Note that, since we are throwing out all 670 | ** x[m] samples where m != a+2*k for integer k, we only have to filter 671 | ** the x[n] samples where n = a+2*k. */ 672 | int i; 673 | 674 | /* filter coeffs: (GNU Octave) 675 | ** n = 10; b = sinc((-n/4):.5:(n/4)).*hamming(n+9)(5:(n+5))' */ 676 | for (i = a; i < 108; i+=2) 677 | x[i] = (z ? 1.0f : 0.5f)*(x[i] 678 | + (x[i-1]+x[i+1]) * 0.6189590521549956f 679 | + (x[i-3]+x[i+3]) * -0.1633990749076792f 680 | + (x[i-5]+x[i+5]) * 0.05858453198856907f); 681 | } 682 | 683 | struct innovation_encoding { 684 | struct bit_writer_context bwc; 685 | float innovation[108]; 686 | int bits_used; 687 | float error; 688 | }; 689 | 690 | static void encode_innovation(struct bit_writer_context *bwc, 691 | float *innovation, int halved_innovation, int use_huffman, 692 | int *bits_used, int target_bit_count) 693 | { 694 | int a = 0, z = 1; 695 | struct innovation_encoding encodings[2]; 696 | int m = 0; 697 | 698 | if (halved_innovation) { 699 | find_a_z_flags(&a, &z, innovation); 700 | low_pass_innovation(innovation, a, z); 701 | } 702 | 703 | if (use_huffman) { 704 | /* Encode using the Huffman model. */ 705 | int interval = halved_innovation ? 2 : 1; 706 | float max_value = 0.0f; 707 | int min_pow; 708 | int best_distance = 0; 709 | int pow; 710 | int i; 711 | 712 | /* Find the minimum innovation power such that the innovation 713 | ** signal doesn't clip anywhere in time. (We consider clipping 714 | ** a sample by <=0.5 of a quantization level to be okay since 715 | ** the sample already rounds down [towards zero].) */ 716 | for (i = a; i < 108; i += interval) { 717 | float value = ABS(innovation[i]); 718 | if (value > max_value) 719 | max_value = value; 720 | } 721 | for (i = 62; i >= 0; i--) { 722 | if (inn_gains[i]*(!z ? 0.5f : 1.0f)*13.5f 723 | < max_value) 724 | break; 725 | } 726 | min_pow = i+1; 727 | 728 | /* Find the innovation gain that results in the closest 729 | ** to the target bitrate without clipping occurring. */ 730 | for (pow = min_pow; pow <= 63; pow++) { 731 | int distance; 732 | 733 | bwc_init(&encodings[m].bwc, inn_buffers[m]); 734 | bwc_write_bits(&encodings[m].bwc, bwc->buffer[bwc->pos], 735 | bwc->written_bits_count); 736 | 737 | encode_huffman(&encodings[m].bwc, 738 | encodings[m].innovation, 739 | &encodings[m].bits_used, 740 | &encodings[m].error, 741 | innovation, halved_innovation, 742 | pow, a, z); 743 | 744 | distance = ABS(encodings[m].bits_used 745 | - target_bit_count); 746 | if (pow == min_pow || distance < best_distance) { 747 | best_distance = distance; 748 | m = !m; /* swap the buffers */ 749 | } 750 | } 751 | } else { 752 | /* Encode using the triangular noise model. */ 753 | float best_error = 0.0f; 754 | int pow; 755 | 756 | /* Find the innovation gain that results in 757 | ** the highest quality. */ 758 | for (pow = 0; pow <= 63; pow++) { 759 | bwc_init(&encodings[m].bwc, inn_buffers[m]); 760 | bwc_write_bits(&encodings[m].bwc, bwc->buffer[bwc->pos], 761 | bwc->written_bits_count); 762 | 763 | encode_triangular(&encodings[m].bwc, 764 | encodings[m].innovation, 765 | &encodings[m].bits_used, 766 | &encodings[m].error, 767 | innovation, halved_innovation, 768 | pow, a, z); 769 | 770 | if (pow == 0 || encodings[m].error < best_error) { 771 | best_error = encodings[m].error; 772 | m = !m; /* swap the buffers */ 773 | } 774 | } 775 | } 776 | 777 | /* Swap the buffers again to return back to our best encoding. */ 778 | m = !m; 779 | 780 | /* Write this encoding out to the UTK bitstream. */ 781 | memcpy(&bwc->buffer[bwc->pos], encodings[m].bwc.buffer, 782 | encodings[m].bwc.pos+1); 783 | bwc->pos += encodings[m].bwc.pos; 784 | bwc->written_bits_count = encodings[m].bwc.written_bits_count; 785 | 786 | /* Update the innovation signal with the quantized version. */ 787 | memcpy(innovation, encodings[m].innovation, 108*sizeof(float)); 788 | if (halved_innovation) 789 | interpolate(innovation, a, z); 790 | 791 | *bits_used = encodings[m].bits_used; 792 | } 793 | 794 | static int parse_arguments(int argc, char *argv[]) 795 | { 796 | int c; 797 | int value; 798 | char *endptr; 799 | 800 | prog_name = (argc >= 1 && argv[0][0] != '\0') ? argv[0] : "utkencode"; 801 | 802 | while ((c = getopt_long(argc, argv, short_options, 803 | long_options, NULL)) != -1) { 804 | switch (c) { 805 | case 'b': 806 | bitrate = (int)strtol(optarg, &endptr, 10); 807 | if (*endptr != '\0' 808 | || bitrate < 1000 809 | || bitrate > 1000000) { 810 | fprintf(stderr, "%s: invalid bitrate -- %s\n", 811 | prog_name, optarg); 812 | print_usage_error(); 813 | return -1; 814 | } 815 | break; 816 | case 'f': 817 | force = 1; 818 | break; 819 | case 'q': 820 | quiet = 1; 821 | break; 822 | case 'h': 823 | print_help(); 824 | return 1; 825 | case 'V': 826 | print_version(); 827 | return 1; 828 | case 'H': 829 | halved_innovation = 1; 830 | break; 831 | case 'F': 832 | halved_innovation = 0; 833 | break; 834 | case 'T': 835 | huffman_threshold = (int)strtol(optarg, &endptr, 10); 836 | if (*endptr != '\0' 837 | || huffman_threshold < 16 838 | || huffman_threshold > 32) { 839 | fprintf(stderr, "%s: invalid Huffman " 840 | "threshold -- %s\n", prog_name, optarg); 841 | print_usage_error(); 842 | return -1; 843 | } 844 | break; 845 | case 'S': 846 | inngain_sig = (int)strtol(optarg, &endptr, 10); 847 | if (*endptr != '\0' 848 | || inngain_sig < 8 849 | || inngain_sig > 128 850 | || (inngain_sig & 7) != 0) { 851 | fprintf(stderr, "%s: invalid innovation gain" 852 | " significand -- %s\n", prog_name, 853 | optarg); 854 | print_usage_error(); 855 | return -1; 856 | } 857 | break; 858 | case 'B': 859 | if (optarg[0] != '1' || optarg[1] != '.' 860 | || (value = read_dec_places(optarg+2, 3)) < 0 861 | || value < 40 862 | || value > 103) { 863 | fprintf(stderr, "%s: invalid innovation gain" 864 | " base -- %s\n", prog_name, optarg); 865 | print_usage_error(); 866 | return -1; 867 | } 868 | inngain_base = 1.0f + (float)value/1000.0f; 869 | break; 870 | default: 871 | print_usage_error(); 872 | return -1; 873 | } 874 | } 875 | 876 | if (argc - optind == 0) { 877 | fprintf(stderr, "%s: missing infile\n", prog_name); 878 | print_usage_error(); 879 | return -1; 880 | } else if (argc - optind == 1) { 881 | fprintf(stderr, "%s: missing outfile\n", prog_name); 882 | print_usage_error(); 883 | return -1; 884 | } else if (argc - optind >= 3) { 885 | fprintf(stderr, "%s: too many arguments passed\n", prog_name); 886 | print_usage_error(); 887 | return -1; 888 | } 889 | 890 | infile = argv[optind]; 891 | outfile = argv[optind+1]; 892 | 893 | return 0; 894 | } 895 | 896 | int main(int argc, char *argv[]) 897 | { 898 | int ret; 899 | uint8_t wav_header[44]; 900 | uint8_t utk_header[32]; 901 | unsigned bytes_remaining; 902 | int sampling_rate; 903 | struct bit_writer_context bwc; 904 | int i, j; 905 | 906 | ret = parse_arguments(argc, argv); 907 | if (ret < 0) 908 | return EXIT_FAILURE; 909 | else if (ret > 0) 910 | return EXIT_SUCCESS; 911 | 912 | if (!strcmp(infile, "-")) { 913 | infp = stdin; 914 | } else { 915 | infp = fopen(infile, "rb"); 916 | if (!infp) { 917 | fprintf(stderr, "%s: failed to open '%s' for" 918 | " reading: %s\n", prog_name, infile, 919 | strerror(errno)); 920 | return EXIT_FAILURE; 921 | } 922 | } 923 | setvbuf(infp, NULL, _IOFBF, BUFSIZ); 924 | 925 | if (!strcmp(outfile, "-")) { 926 | outfp = stdout; 927 | } else { 928 | if (!force && file_exists(outfile)) { 929 | if (quiet) { 930 | fprintf(stderr, "%s: failed to open '%s' for" 931 | " writing: file already exists\n", 932 | prog_name, outfile); 933 | return EXIT_FAILURE; 934 | } else { 935 | fprintf(stderr, "%s: overwrite '%s'? ", 936 | prog_name, outfile); 937 | if (getchar() != 'y') 938 | return EXIT_FAILURE; 939 | } 940 | } 941 | 942 | outfp = fopen(outfile, "wb"); 943 | if (!outfp) { 944 | fprintf(stderr, "%s: failed to open '%s' for" 945 | " writing: %s\n", prog_name, outfile, 946 | strerror(errno)); 947 | return EXIT_FAILURE; 948 | } 949 | } 950 | setvbuf(outfp, NULL, _IOFBF, BUFSIZ); 951 | 952 | if (fread(wav_header, 1, 44, infp) != 44) { 953 | if (ferror(infp)) 954 | fprintf(stderr, "%s: failed to read '%s': %s\n", 955 | prog_name, infile, strerror(errno)); 956 | else 957 | fprintf(stderr, "%s: '%s' is not a valid wav file\n", 958 | prog_name, infile); /* (reached end of file) */ 959 | return EXIT_FAILURE; 960 | } 961 | 962 | if (memcmp(wav_header, "RIFF", 4) != 0 963 | || memcmp(wav_header+8, "WAVEfmt ", 8) != 0) { 964 | fprintf(stderr, "%s: '%s' is not a valid wav file\n", 965 | prog_name, infile); 966 | return EXIT_FAILURE; 967 | } 968 | 969 | if (READ16(wav_header+20) != 1 /* wFormatTag */ 970 | || READ16(wav_header+22) != 1 /* nChannels */ 971 | || READ16(wav_header+32) != 2 /* nBlockAlign */ 972 | || READ16(wav_header+34) != 16 /* wBitsPerSample */) { 973 | fprintf(stderr, "%s: wav file must be 1-channel 16-bit LPCM\n", 974 | prog_name); 975 | return EXIT_FAILURE; 976 | } 977 | 978 | sampling_rate = READ32(wav_header+24); /* nSamplesPerSec */ 979 | if (sampling_rate < 1000 || sampling_rate > 1000000) { 980 | fprintf(stderr, "%s: unsupported sampling rate %d\n", 981 | prog_name, sampling_rate); 982 | return EXIT_FAILURE; 983 | } 984 | 985 | memcpy(utk_header, "UTM0", 4); /* sID */ 986 | 987 | /* Drop the last byte from the wav file if there are an odd 988 | ** number of sample bytes. */ 989 | bytes_remaining = READ32(wav_header+40) & (~1); 990 | WRITE32(utk_header+4, bytes_remaining); /* dwOutSize */ 991 | 992 | WRITE32(utk_header+8, 20); /* dwWfxSize */ 993 | memcpy(utk_header+12, wav_header+20, 16); /* WAVEFORMATEX */ 994 | WRITE32(utk_header+28, 0); /* cbSize */ 995 | 996 | write_data(outfp, utk_header, 32); 997 | 998 | bwc_init(&bwc, compressed_buffer); 999 | 1000 | bwc_write_bits(&bwc, halved_innovation, 1); 1001 | bwc_write_bits(&bwc, 32 - huffman_threshold, 4); 1002 | bwc_write_bits(&bwc, inngain_sig/8 - 1, 4); 1003 | bwc_write_bits(&bwc, ROUND((inngain_base - 1.04f)*1000.0f), 6); 1004 | bwc_flush(&bwc, outfp); 1005 | 1006 | for (i = 0; i < 12; i++) 1007 | input_samples[i] = 0.0f; 1008 | for (i = 0; i < 324; i++) 1009 | adaptive_codebook[i] = 0.0f; 1010 | for (i = 0; i < 12; i++) 1011 | prev_rc[i] = 0.0f; 1012 | for (i = 0; i < 5; i++) 1013 | innovation[i] = 0.0f; 1014 | for (i = 5+108; i < 5+108+5; i++) 1015 | innovation[i] = 0.0f; 1016 | 1017 | inn_gains[0] = inngain_sig; 1018 | for (i = 1; i < 64; i++) 1019 | inn_gains[i] = inn_gains[i-1]*inngain_base; 1020 | 1021 | while (bytes_remaining != 0) { 1022 | /* Encode the next frame of 432 samples. */ 1023 | int bytes_to_read; 1024 | int samples_to_read; 1025 | float rc[12]; 1026 | float rc_delta[12]; 1027 | int use_huffman = 0; 1028 | 1029 | bytes_to_read = (int)MIN(bytes_remaining, 432*2); 1030 | samples_to_read = bytes_to_read >> 1; 1031 | 1032 | read_data(infp, wav_buffer, bytes_to_read); 1033 | bytes_remaining -= bytes_to_read; 1034 | 1035 | for (i = 0; i < samples_to_read; i++) { 1036 | int16_t x = READ16(wav_buffer+2*i); 1037 | input_samples[12+i] = (float)x; 1038 | } 1039 | for (i = samples_to_read; i < 432; i++) 1040 | input_samples[12+i] = 0.0f; 1041 | 1042 | find_rc(rc, input_samples+12); 1043 | 1044 | /* Quantize the reflection coefficients. 1045 | ** In our encoder, we will not make use of utk_rc_table[0]. */ 1046 | for (i = 0; i < 4; i++) { 1047 | int idx = 1+quantize(rc[i], utk_rc_table+1, 63); 1048 | bwc_write_bits(&bwc, idx, 6); 1049 | rc[i] = utk_rc_table[idx]; 1050 | if (i == 0 && idx < huffman_threshold) 1051 | use_huffman = 1; 1052 | } 1053 | for (i = 4; i < 12; i++) { 1054 | int idx = quantize(rc[i], utk_rc_table+16, 32); 1055 | bwc_write_bits(&bwc, idx, 5); 1056 | rc[i] = utk_rc_table[16+idx]; 1057 | } 1058 | 1059 | for (i = 0; i < 12; i++) 1060 | rc_delta[i] = (rc[i] - prev_rc[i])/4.0f; 1061 | 1062 | memcpy(rc, prev_rc, 12*sizeof(float)); 1063 | 1064 | for (i = 0; i < 4; i++) { 1065 | /* Linearly interpolate the reflection coefficients over 1066 | ** the four subframes and find the excitation signal. */ 1067 | float lpc[12]; 1068 | 1069 | for (j = 0; j < 12; j++) 1070 | rc[j] += rc_delta[j]; 1071 | 1072 | rc_to_lpc(lpc, rc); 1073 | 1074 | find_excitation(adaptive_codebook+324+12*i, 1075 | input_samples+12+12*i, 1076 | i < 3 ? 12 : 396, lpc); 1077 | } 1078 | 1079 | memcpy(input_samples, &input_samples[432], 12*sizeof(float)); 1080 | memcpy(prev_rc, rc, 12*sizeof(float)); 1081 | 1082 | for (i = 0; i < 4; i++) { 1083 | /* Encode the i'th subframe. */ 1084 | float *excitation = adaptive_codebook+324+108*i; 1085 | int pitch_lag; 1086 | float pitch_gain; 1087 | int idx; 1088 | int bits_used; 1089 | 1090 | find_pitch(&pitch_lag, &pitch_gain, excitation); 1091 | 1092 | bwc_write_bits(&bwc, pitch_lag - 108, 8); 1093 | 1094 | idx = ROUND(pitch_gain*15.0f); 1095 | bwc_write_bits(&bwc, idx, 4); 1096 | pitch_gain = (float)idx/15.0f; 1097 | 1098 | for (j = 0; j < 108; j++) 1099 | innovation[5+j] = excitation[j] 1100 | - pitch_gain*excitation[j-pitch_lag]; 1101 | 1102 | encode_innovation(&bwc, &innovation[5], 1103 | halved_innovation, use_huffman, &bits_used, 1104 | ROUND(bitrate * 432 / sampling_rate / 4) - 18); 1105 | 1106 | /* Update the adaptive codebook using the quantized 1107 | ** innovation signal. */ 1108 | for (j = 0; j < 108; j++) 1109 | excitation[j] = innovation[5+j] 1110 | + pitch_gain*excitation[j-pitch_lag]; 1111 | } 1112 | 1113 | /* Copy the last 3 subframes to the beginning of the 1114 | ** adaptive codebook. */ 1115 | memcpy(adaptive_codebook, &adaptive_codebook[432], 1116 | 324*sizeof(float)); 1117 | 1118 | bwc_flush(&bwc, outfp); 1119 | } 1120 | 1121 | bwc_pad(&bwc); 1122 | bwc_flush(&bwc, outfp); 1123 | 1124 | flush_data(outfp); 1125 | 1126 | fclose(outfp); 1127 | fclose(infp); 1128 | 1129 | return EXIT_SUCCESS; 1130 | } --------------------------------------------------------------------------------