├── README.md
├── UNLICENSE
├── eachunk.h
├── io.h
├── samples
├── DS1.M10
├── fifa2001-mt5.dat
├── fifa2001.dat
└── male.utk
├── utk.h
├── utkdecode-bnb.c
├── utkdecode-fifa.c
├── utkdecode.c
└── utkencode.c
/README.md:
--------------------------------------------------------------------------------
1 | ## Overview
2 |
3 | EA MicroTalk (also UTalk or UTK) is a linear-predictive speech codec used in
4 | various games by Electronic Arts. The earliest known game to use it is
5 | Beasts & Bumpkins (1997). The codec has a bandwidth of 11.025kHz (sampling rate
6 | 22.05kHz) and frame size of 20ms (432 samples) and only supports mono. It is
7 | typically encoded at 32 kbit/s.
8 |
9 | Docs: http://wiki.niotso.org/UTK
10 |
11 | In this repository, I have created a set of open source (public domain
12 | via the UNLICENSE) MicroTalk decoders/encoders.
13 |
14 | * Use utkdecode to decode Maxis UTK (The Sims Online, SimCity 4).
15 | * Use utkdecode-bnb to decode PT/M10 (Beasts & Bumpkins).
16 | * Use utkdecode-fifa to decode FIFA 2001/2002 (PS2) speech samples. This tool
17 | supports regular MicroTalk and MicroTalk Revision 3
18 | [SCxl files](https://wiki.multimedia.cx/index.php/Electronic_Arts_SCxl).(*)
19 | * Use utkencode to encode Maxis UTK. (This is the simplest container format and
20 | is currently the only one supported for encoding.)
21 |
22 | (*) I wasn't able to find any real-world MicroTalk Rev. 3 samples in any games.
23 | However, you can transcode a FIFA MicroTalk Rev. 2 file to Rev. 3 using
24 | [EA's Sound eXchange tool](https://wiki.multimedia.cx/index.php/Electronic_Arts_Sound_eXchange)
25 | (`sx -mt_blk input.dat -=output.dat`).
26 |
27 | ## Compiling
28 |
29 | ```
30 | gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkdecode utkdecode.c
31 | gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkdecode-fifa utkdecode-fifa.c
32 | gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkdecode-bnb utkdecode-bnb.c
33 | gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkencode utkencode.c
34 | ```
35 |
36 | ## How the encoder works
37 |
38 | The encoder for now is very simple. It does LPC analysis using the Levinson
39 | algorithm and transmits the entire excitation signal explicitly. Compression is
40 | achieved by choosing a large fixed codebook gain, such that each excitation
41 | sample has a large (coarse) quantization step size. Error is minimized in the
42 | excitation domain, and the quality is somewhat poor for bitrates below about
43 | 48 kbit/s.
44 |
45 | However, MicroTalk is a multi-pulse codec (it is cheap to code long runs of
46 | zeros in the excitation signal). Hence, a much better design (and indeed the
47 | standard practice for multi-pulse speech codecs) is to search for the positions
48 | and amplitudes of n pulses such that error is minimized in the output domain
49 | (or the perceptually weighted domain). This new encoder is still in the works.
--------------------------------------------------------------------------------
/UNLICENSE:
--------------------------------------------------------------------------------
1 | This is free and unencumbered software released into the public domain.
2 |
3 | Anyone is free to copy, modify, publish, use, compile, sell, or
4 | distribute this software, either in source code form or as a compiled
5 | binary, for any purpose, commercial or non-commercial, and by any
6 | means.
7 |
8 | In jurisdictions that recognize copyright laws, the author or authors
9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | For more information, please refer to
25 |
--------------------------------------------------------------------------------
/eachunk.h:
--------------------------------------------------------------------------------
1 | typedef struct EAChunk {
2 | uint32_t type;
3 | uint8_t *start;
4 | uint8_t *ptr;
5 | uint8_t *end;
6 | } EAChunk;
7 |
8 | static void chunk_read_bytes(EAChunk *chunk, uint8_t *dest, size_t size)
9 | {
10 | size_t bytes_remaining = chunk->end - chunk->ptr;
11 |
12 | if (bytes_remaining < size) {
13 | fprintf(stderr, "error: unexpected end of chunk\n");
14 | exit(EXIT_FAILURE);
15 | }
16 |
17 | memcpy(dest, chunk->ptr, size);
18 | chunk->ptr += size;
19 | }
20 |
21 | static uint32_t chunk_read_u32(EAChunk *chunk)
22 | {
23 | uint8_t dest[4];
24 | chunk_read_bytes(chunk, dest, sizeof(dest));
25 | return dest[0] | (dest[1] << 8) | (dest[2] << 16) | (dest[3] << 24);
26 | }
27 |
28 | static uint32_t chunk_read_u8(EAChunk *chunk)
29 | {
30 | uint8_t dest;
31 | chunk_read_bytes(chunk, &dest, sizeof(dest));
32 | return dest;
33 | }
34 |
35 | static uint32_t chunk_read_var_int(EAChunk *chunk)
36 | {
37 | uint8_t dest[4];
38 | uint8_t size = chunk_read_u8(chunk);
39 |
40 | if (size > 4) {
41 | fprintf(stderr, "error: invalid varint size %u\n", (unsigned)size);
42 | exit(EXIT_FAILURE);
43 | }
44 |
45 | chunk_read_bytes(chunk, dest, size);
46 |
47 | /* read a big-endian integer of variable length */
48 | switch (size) {
49 | case 1: return dest[0];
50 | case 2: return (dest[0]<<8) | dest[1];
51 | case 3: return (dest[0]<<16) | (dest[1] << 8) | dest[2];
52 | case 4: return (dest[0]<<24) | (dest[1] << 16) | (dest[2] << 8) | dest[3];
53 | default: return 0;
54 | }
55 | }
56 |
57 | static EAChunk *read_chunk(FILE *fp)
58 | {
59 | uint32_t size;
60 | static EAChunk chunk;
61 | static uint8_t buffer[4096];
62 |
63 | chunk.type = read_u32(fp);
64 |
65 | size = read_u32(fp);
66 | if (size < 8 || size-8 > sizeof(buffer)) {
67 | fprintf(stderr, "error: invalid chunk size %u\n", (unsigned)size);
68 | exit(EXIT_FAILURE);
69 | }
70 |
71 | size -= 8;
72 | read_bytes(fp, buffer, size);
73 | chunk.start = chunk.ptr = buffer;
74 | chunk.end = buffer+size;
75 |
76 | return &chunk;
77 | }
--------------------------------------------------------------------------------
/io.h:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | static void read_bytes(FILE *fp, uint8_t *dest, size_t size)
8 | {
9 | size_t bytes_copied;
10 |
11 | if (!size)
12 | return;
13 |
14 | bytes_copied = fread(dest, 1, size, fp);
15 | if (bytes_copied < size) {
16 | if (ferror(fp))
17 | fprintf(stderr, "error: fread failed: %s\n", strerror(errno));
18 | else
19 | fprintf(stderr, "error: unexpected end of file\n");
20 |
21 | exit(EXIT_FAILURE);
22 | }
23 | }
24 |
25 | static uint32_t read_u32(FILE *fp)
26 | {
27 | uint8_t dest[4];
28 | read_bytes(fp, dest, sizeof(dest));
29 | return dest[0] | (dest[1] << 8) | (dest[2] << 16) | (dest[3] << 24);
30 | }
31 |
32 | static uint16_t read_u16(FILE *fp)
33 | {
34 | uint8_t dest[2];
35 | read_bytes(fp, dest, sizeof(dest));
36 | return dest[0] | (dest[1] << 8);
37 | }
38 |
39 | static uint16_t read_u8(FILE *fp)
40 | {
41 | uint8_t dest;
42 | read_bytes(fp, &dest, sizeof(dest));
43 | return dest;
44 | }
45 |
46 | static void write_bytes(FILE *fp, const uint8_t *dest, size_t size)
47 | {
48 | if (!size)
49 | return;
50 |
51 | if (fwrite(dest, 1, size, fp) != size) {
52 | fprintf(stderr, "error: fwrite failed: %s\n", strerror(errno));
53 | exit(EXIT_FAILURE);
54 | }
55 | }
56 |
57 | static void write_u32(FILE *fp, uint32_t x)
58 | {
59 | uint8_t dest[4];
60 | dest[0] = (uint8_t)x;
61 | dest[1] = (uint8_t)(x>>8);
62 | dest[2] = (uint8_t)(x>>16);
63 | dest[3] = (uint8_t)(x>>24);
64 | write_bytes(fp, dest, sizeof(dest));
65 | }
66 |
67 | static void write_u16(FILE *fp, uint16_t x)
68 | {
69 | uint8_t dest[2];
70 | dest[0] = (uint8_t)x;
71 | dest[1] = (uint8_t)(x>>8);
72 | write_bytes(fp, dest, sizeof(dest));
73 | }
74 |
75 | static void write_u8(FILE *fp, uint8_t x)
76 | {
77 | write_bytes(fp, &x, sizeof(x));
78 | }
--------------------------------------------------------------------------------
/samples/DS1.M10:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daddesio/utkencode/e0b13fac7a75fc5ef16851c165d3f31f0fe220b1/samples/DS1.M10
--------------------------------------------------------------------------------
/samples/fifa2001-mt5.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daddesio/utkencode/e0b13fac7a75fc5ef16851c165d3f31f0fe220b1/samples/fifa2001-mt5.dat
--------------------------------------------------------------------------------
/samples/fifa2001.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daddesio/utkencode/e0b13fac7a75fc5ef16851c165d3f31f0fe220b1/samples/fifa2001.dat
--------------------------------------------------------------------------------
/samples/male.utk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daddesio/utkencode/e0b13fac7a75fc5ef16851c165d3f31f0fe220b1/samples/male.utk
--------------------------------------------------------------------------------
/utk.h:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | /* Note: This struct assumes a member alignment of 4 bytes.
5 | ** This matters when pitch_lag > 216 on the first subframe of any given frame. */
6 | typedef struct UTKContext {
7 | FILE *fp;
8 | const uint8_t *ptr, *end;
9 | int parsed_header;
10 | unsigned int bits_value;
11 | int bits_count;
12 | int reduced_bw;
13 | int multipulse_thresh;
14 | float fixed_gains[64];
15 | float rc[12];
16 | float synth_history[12];
17 | float adapt_cb[324];
18 | float decompressed_frame[432];
19 | } UTKContext;
20 |
21 | enum {
22 | MDL_NORMAL = 0,
23 | MDL_LARGEPULSE = 1
24 | };
25 |
26 | static const float utk_rc_table[64] = {
27 | +0.0f,
28 | -.99677598476409912109375f, -.99032700061798095703125f, -.983879029750823974609375f, -.977430999279022216796875f,
29 | -.970982015132904052734375f, -.964533984661102294921875f, -.958085000514984130859375f, -.9516370296478271484375f,
30 | -.930754005908966064453125f, -.904959976673126220703125f, -.879167020320892333984375f, -.853372991085052490234375f,
31 | -.827579021453857421875f, -.801786005496978759765625f, -.775991976261138916015625f, -.75019800662994384765625f,
32 | -.724404990673065185546875f, -.6986110210418701171875f, -.6706349849700927734375f, -.61904799938201904296875f,
33 | -.567460000514984130859375f, -.515873014926910400390625f, -.4642859995365142822265625f, -.4126980006694793701171875f,
34 | -.361110985279083251953125f, -.309523999691009521484375f, -.257937014102935791015625f, -.20634900033473968505859375f,
35 | -.1547619998455047607421875f, -.10317499935626983642578125f, -.05158700048923492431640625f,
36 | +0.0f,
37 | +.05158700048923492431640625f, +.10317499935626983642578125f, +.1547619998455047607421875f, +.20634900033473968505859375f,
38 | +.257937014102935791015625f, +.309523999691009521484375f, +.361110985279083251953125f, +.4126980006694793701171875f,
39 | +.4642859995365142822265625f, +.515873014926910400390625f, +.567460000514984130859375f, +.61904799938201904296875f,
40 | +.6706349849700927734375f, +.6986110210418701171875f, +.724404990673065185546875f, +.75019800662994384765625f,
41 | +.775991976261138916015625f, +.801786005496978759765625f, +.827579021453857421875f, +.853372991085052490234375f,
42 | +.879167020320892333984375f, +.904959976673126220703125f, +.930754005908966064453125f, +.9516370296478271484375f,
43 | +.958085000514984130859375f, +.964533984661102294921875f, +.970982015132904052734375f, +.977430999279022216796875f,
44 | +.983879029750823974609375f, +.99032700061798095703125f, +.99677598476409912109375f
45 | };
46 |
47 | static const uint8_t utk_codebooks[2][256] = {
48 | { /* normal model */
49 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 17,
50 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 21,
51 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 18,
52 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 25,
53 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 17,
54 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 22,
55 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 18,
56 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 0,
57 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 17,
58 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 21,
59 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 18,
60 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 26,
61 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 17,
62 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 22,
63 | 4, 6, 5, 9, 4, 6, 5, 13, 4, 6, 5, 10, 4, 6, 5, 18,
64 | 4, 6, 5, 9, 4, 6, 5, 14, 4, 6, 5, 10, 4, 6, 5, 2
65 | }, { /* large-pulse model */
66 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 23,
67 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 27,
68 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 24,
69 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 1,
70 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 23,
71 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 28,
72 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 24,
73 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 3,
74 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 23,
75 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 27,
76 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 24,
77 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 1,
78 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 23,
79 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 28,
80 | 4, 11, 7, 15, 4, 12, 8, 19, 4, 11, 7, 16, 4, 12, 8, 24,
81 | 4, 11, 7, 15, 4, 12, 8, 20, 4, 11, 7, 16, 4, 12, 8, 3
82 | }
83 | };
84 |
85 | static const struct {
86 | int next_model;
87 | int code_size;
88 | float pulse_value;
89 | } utk_commands[29] = {
90 | {MDL_LARGEPULSE, 8, 0.0f},
91 | {MDL_LARGEPULSE, 7, 0.0f},
92 | {MDL_NORMAL, 8, 0.0f},
93 | {MDL_NORMAL, 7, 0.0f},
94 | {MDL_NORMAL, 2, 0.0f},
95 | {MDL_NORMAL, 2, -1.0f},
96 | {MDL_NORMAL, 2, +1.0f},
97 | {MDL_NORMAL, 3, -1.0f},
98 | {MDL_NORMAL, 3, +1.0f},
99 | {MDL_LARGEPULSE, 4, -2.0f},
100 | {MDL_LARGEPULSE, 4, +2.0f},
101 | {MDL_LARGEPULSE, 3, -2.0f},
102 | {MDL_LARGEPULSE, 3, +2.0f},
103 | {MDL_LARGEPULSE, 5, -3.0f},
104 | {MDL_LARGEPULSE, 5, +3.0f},
105 | {MDL_LARGEPULSE, 4, -3.0f},
106 | {MDL_LARGEPULSE, 4, +3.0f},
107 | {MDL_LARGEPULSE, 6, -4.0f},
108 | {MDL_LARGEPULSE, 6, +4.0f},
109 | {MDL_LARGEPULSE, 5, -4.0f},
110 | {MDL_LARGEPULSE, 5, +4.0f},
111 | {MDL_LARGEPULSE, 7, -5.0f},
112 | {MDL_LARGEPULSE, 7, +5.0f},
113 | {MDL_LARGEPULSE, 6, -5.0f},
114 | {MDL_LARGEPULSE, 6, +5.0f},
115 | {MDL_LARGEPULSE, 8, -6.0f},
116 | {MDL_LARGEPULSE, 8, +6.0f},
117 | {MDL_LARGEPULSE, 7, -6.0f},
118 | {MDL_LARGEPULSE, 7, +6.0f}
119 | };
120 |
121 | static int utk_read_byte(UTKContext *ctx)
122 | {
123 | if (ctx->ptr < ctx->end)
124 | return *ctx->ptr++;
125 |
126 | if (ctx->fp) {
127 | static uint8_t buffer[4096];
128 | size_t bytes_copied = fread(buffer, 1, sizeof(buffer), ctx->fp);
129 | if (bytes_copied > 0 && bytes_copied <= sizeof(buffer)) {
130 | ctx->ptr = buffer;
131 | ctx->end = buffer + bytes_copied;
132 | return *ctx->ptr++;
133 | }
134 | }
135 |
136 | return 0;
137 | }
138 |
139 | static int16_t utk_read_i16(UTKContext *ctx)
140 | {
141 | int x = utk_read_byte(ctx);
142 | x = (x << 8) | utk_read_byte(ctx);
143 | return x;
144 | }
145 |
146 | static int utk_read_bits(UTKContext *ctx, int count)
147 | {
148 | int ret = ctx->bits_value & ((1 << count) - 1);
149 | ctx->bits_value >>= count;
150 | ctx->bits_count -= count;
151 |
152 | if (ctx->bits_count < 8) {
153 | /* read another byte */
154 | ctx->bits_value |= utk_read_byte(ctx) << ctx->bits_count;
155 | ctx->bits_count += 8;
156 | }
157 |
158 | return ret;
159 | }
160 |
161 | static void utk_parse_header(UTKContext *ctx)
162 | {
163 | int i;
164 | float multiplier;
165 |
166 | ctx->reduced_bw = utk_read_bits(ctx, 1);
167 | ctx->multipulse_thresh = 32 - utk_read_bits(ctx, 4);
168 | ctx->fixed_gains[0] = 8.0f * (1 + utk_read_bits(ctx, 4));
169 | multiplier = 1.04f + utk_read_bits(ctx, 6)*0.001f;
170 |
171 | for (i = 1; i < 64; i++)
172 | ctx->fixed_gains[i] = ctx->fixed_gains[i-1] * multiplier;
173 | }
174 |
175 | static void utk_decode_excitation(UTKContext *ctx, int use_multipulse, float *out, int stride)
176 | {
177 | int i;
178 |
179 | if (use_multipulse) {
180 | /* multi-pulse model: n pulses are coded explicitly; the rest are zero */
181 | int model, cmd;
182 | model = 0;
183 | i = 0;
184 | while (i < 108) {
185 | cmd = utk_codebooks[model][ctx->bits_value & 0xff];
186 | model = utk_commands[cmd].next_model;
187 | utk_read_bits(ctx, utk_commands[cmd].code_size);
188 |
189 | if (cmd > 3) {
190 | /* insert a pulse with magnitude <= 6.0f */
191 | out[i] = utk_commands[cmd].pulse_value;
192 | i += stride;
193 | } else if (cmd > 1) {
194 | /* insert between 7 and 70 zeros */
195 | int count = 7 + utk_read_bits(ctx, 6);
196 | if (i + count * stride > 108)
197 | count = (108 - i)/stride;
198 |
199 | while (count > 0) {
200 | out[i] = 0.0f;
201 | i += stride;
202 | count--;
203 | }
204 | } else {
205 | /* insert a pulse with magnitude >= 7.0f */
206 | int x = 7;
207 |
208 | while (utk_read_bits(ctx, 1))
209 | x++;
210 |
211 | if (!utk_read_bits(ctx, 1))
212 | x *= -1;
213 |
214 | out[i] = (float)x;
215 | i += stride;
216 | }
217 | }
218 | } else {
219 | /* RELP model: entire residual (excitation) signal is coded explicitly */
220 | i = 0;
221 | while (i < 108) {
222 | if (!utk_read_bits(ctx, 1))
223 | out[i] = 0.0f;
224 | else if (!utk_read_bits(ctx, 1))
225 | out[i] = -2.0f;
226 | else
227 | out[i] = 2.0f;
228 |
229 | i += stride;
230 | }
231 | }
232 | }
233 |
234 | static void rc_to_lpc(const float *rc, float *lpc)
235 | {
236 | int i, j;
237 | float tmp1[12];
238 | float tmp2[12];
239 |
240 | for (i = 10; i >= 0; i--)
241 | tmp2[1+i] = rc[i];
242 |
243 | tmp2[0] = 1.0f;
244 |
245 | for (i = 0; i < 12; i++) {
246 | float x = -tmp2[11] * rc[11];
247 |
248 | for (j = 10; j >= 0; j--) {
249 | x -= tmp2[j] * rc[j];
250 | tmp2[j+1] = x * rc[j] + tmp2[j];
251 | }
252 |
253 | tmp1[i] = tmp2[0] = x;
254 |
255 | for (j = 0; j < i; j++)
256 | x -= tmp1[i-1-j] * lpc[j];
257 |
258 | lpc[i] = x;
259 | }
260 | }
261 |
262 | static void utk_lp_synthesis_filter(UTKContext *ctx, int offset, int num_blocks)
263 | {
264 | int i, j, k;
265 | float lpc[12];
266 | float *ptr = &ctx->decompressed_frame[offset];
267 |
268 | rc_to_lpc(ctx->rc, lpc);
269 |
270 | for (i = 0; i < num_blocks; i++) {
271 | for (j = 0; j < 12; j++) {
272 | float x = *ptr;
273 |
274 | for (k = 0; k < j; k++)
275 | x += lpc[k] * ctx->synth_history[k-j+12];
276 | for (; k < 12; k++)
277 | x += lpc[k] * ctx->synth_history[k-j];
278 |
279 | ctx->synth_history[11-j] = x;
280 | *ptr++ = x;
281 | }
282 | }
283 | }
284 |
285 | /*
286 | ** Public functions.
287 | */
288 |
289 | static void utk_decode_frame(UTKContext *ctx)
290 | {
291 | int i, j;
292 | int use_multipulse = 0;
293 | float excitation[5+108+5];
294 | float rc_delta[12];
295 |
296 | if (!ctx->bits_count) {
297 | ctx->bits_value = utk_read_byte(ctx);
298 | ctx->bits_count = 8;
299 | }
300 |
301 | if (!ctx->parsed_header) {
302 | utk_parse_header(ctx);
303 | ctx->parsed_header = 1;
304 | }
305 |
306 | memset(&excitation[0], 0, 5*sizeof(float));
307 | memset(&excitation[5+108], 0, 5*sizeof(float));
308 |
309 | /* read the reflection coefficients */
310 | for (i = 0; i < 12; i++) {
311 | int idx;
312 | if (i == 0) {
313 | idx = utk_read_bits(ctx, 6);
314 | if (idx < ctx->multipulse_thresh)
315 | use_multipulse = 1;
316 | } else if (i < 4) {
317 | idx = utk_read_bits(ctx, 6);
318 | } else {
319 | idx = 16 + utk_read_bits(ctx, 5);
320 | }
321 |
322 | rc_delta[i] = (utk_rc_table[idx] - ctx->rc[i])*0.25f;
323 | }
324 |
325 | /* decode four subframes */
326 | for (i = 0; i < 4; i++) {
327 | int pitch_lag = utk_read_bits(ctx, 8);
328 | float pitch_gain = (float)utk_read_bits(ctx, 4)/15.0f;
329 | float fixed_gain = ctx->fixed_gains[utk_read_bits(ctx, 6)];
330 |
331 | if (!ctx->reduced_bw) {
332 | utk_decode_excitation(ctx, use_multipulse, &excitation[5], 1);
333 | } else {
334 | /* residual (excitation) signal is encoded at reduced bandwidth */
335 | int align = utk_read_bits(ctx, 1);
336 | int zero = utk_read_bits(ctx, 1);
337 |
338 | utk_decode_excitation(ctx, use_multipulse, &excitation[5+align], 2);
339 |
340 | if (zero) {
341 | /* fill the remaining samples with zero
342 | ** (spectrum is duplicated into high frequencies) */
343 | for (j = 0; j < 54; j++)
344 | excitation[5+(1-align)+2*j] = 0.0f;
345 | } else {
346 | /* interpolate the remaining samples
347 | ** (spectrum is low-pass filtered) */
348 | float *ptr = &excitation[5+(1-align)];
349 | for (j = 0; j < 108; j += 2)
350 | ptr[j] = ptr[j-5] * 0.01803267933428287506103515625f
351 | - ptr[j-3] * 0.114591561257839202880859375f
352 | + ptr[j-1] * 0.597385942935943603515625f
353 | + ptr[j+1] * 0.597385942935943603515625f
354 | - ptr[j+3] * 0.114591561257839202880859375f
355 | + ptr[j+5] * 0.01803267933428287506103515625f;
356 |
357 | /* scale by 0.5f to give the sinc impulse response unit energy */
358 | fixed_gain *= 0.5f;
359 | }
360 | }
361 |
362 | for (j = 0; j < 108; j++)
363 | ctx->decompressed_frame[108*i+j] = fixed_gain * excitation[5+j]
364 | + pitch_gain * ctx->adapt_cb[108*i+216-pitch_lag+j];
365 | }
366 |
367 | for (i = 0; i < 324; i++)
368 | ctx->adapt_cb[i] = ctx->decompressed_frame[108+i];
369 |
370 | for (i = 0; i < 4; i++) {
371 | for (j = 0; j < 12; j++)
372 | ctx->rc[j] += rc_delta[j];
373 |
374 | utk_lp_synthesis_filter(ctx, 12*i, i < 3 ? 1 : 33);
375 | }
376 | }
377 |
378 | static void utk_init(UTKContext *ctx)
379 | {
380 | memset(ctx, 0, sizeof(*ctx));
381 | }
382 |
383 | static void utk_set_fp(UTKContext *ctx, FILE *fp)
384 | {
385 | ctx->fp = fp;
386 |
387 | /* reset the bit reader */
388 | ctx->bits_count = 0;
389 | }
390 |
391 | static void utk_set_ptr(UTKContext *ctx, const uint8_t *ptr, const uint8_t *end)
392 | {
393 | ctx->ptr = ptr;
394 | ctx->end = end;
395 |
396 | /* reset the bit reader */
397 | ctx->bits_count = 0;
398 | }
399 |
400 | /*
401 | ** MicroTalk Revision 3 decoding function.
402 | */
403 |
404 | static void utk_rev3_decode_frame(UTKContext *ctx)
405 | {
406 | int pcm_data_present = (utk_read_byte(ctx) == 0xee);
407 | int i;
408 |
409 | utk_decode_frame(ctx);
410 |
411 | /* unread the last 8 bits and reset the bit reader */
412 | ctx->ptr--;
413 | ctx->bits_count = 0;
414 |
415 | if (pcm_data_present) {
416 | /* Overwrite n samples at a given offset in the decoded frame with
417 | ** raw PCM data. */
418 | int offset = utk_read_i16(ctx);
419 | int count = utk_read_i16(ctx);
420 |
421 | /* sx.exe does not do any bounds checking or clamping of these two
422 | ** fields (see 004274D1 in sx.exe v3.01.01), which means a specially
423 | ** crafted MT5:1 file can crash sx.exe.
424 | ** We will throw an error instead. */
425 | if (offset < 0 || offset > 432) {
426 | fprintf(stderr, "error: invalid PCM offset %d\n", offset);
427 | exit(EXIT_FAILURE);
428 | }
429 | if (count < 0 || count > 432 - offset) {
430 | fprintf(stderr, "error: invalid PCM count %d\n", count);
431 | exit(EXIT_FAILURE);
432 | }
433 |
434 | for (i = 0; i < count; i++)
435 | ctx->decompressed_frame[offset+i] = (float)utk_read_i16(ctx);
436 | }
437 | }
438 |
--------------------------------------------------------------------------------
/utkdecode-bnb.c:
--------------------------------------------------------------------------------
1 | /*
2 | ** utkdecode-bnb
3 | ** Decode Beasts & Bumpkins M10 to wav.
4 | ** Authors: Andrew D'Addesio
5 | ** License: Public domain
6 | ** Compile: gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math
7 | ** -fwhole-program -g0 -s -o utkdecode-bnb utkdecode-bnb.c
8 | */
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include "utk.h"
15 | #include "io.h"
16 | #include "eachunk.h"
17 |
18 | #define MAKE_U32(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24))
19 | #define ROUND(x) ((x) >= 0.0f ? ((x)+0.5f) : ((x)-0.5f))
20 | #define MIN(x,y) ((x)<(y)?(x):(y))
21 | #define MAX(x,y) ((x)>(y)?(x):(y))
22 | #define CLAMP(x,min,max) MIN(MAX(x,min),max)
23 |
24 | typedef struct PTContext {
25 | FILE *infp, *outfp;
26 | uint32_t num_samples;
27 | uint32_t compression_type;
28 | UTKContext utk;
29 | } PTContext;
30 |
31 | static void pt_read_header(PTContext *pt)
32 | {
33 | EAChunk *chunk = read_chunk(pt->infp);
34 |
35 | if ((chunk->type & 0xffff) != MAKE_U32('P','T','\x00','\x00')) {
36 | fprintf(stderr, "error: expected PT chunk\n");
37 | exit(EXIT_FAILURE);
38 | }
39 |
40 | while (1) {
41 | uint8_t cmd = chunk_read_u8(chunk);
42 | if (cmd == 0xFD) {
43 | while (1) {
44 | uint8_t key = chunk_read_u8(chunk);
45 | uint32_t value = chunk_read_var_int(chunk);
46 |
47 | if (key == 0xFF)
48 | break;
49 | else if (key == 0x85)
50 | pt->num_samples = value;
51 | else if (key == 0x83)
52 | pt->compression_type = value;
53 | }
54 | break;
55 | } else {
56 | chunk_read_var_int(chunk);
57 | }
58 | }
59 |
60 | if (pt->compression_type != 9) {
61 | fprintf(stderr, "error: invalid compression type %u (expected 9 for MicroTalk 10:1)\n",
62 | (unsigned)pt->compression_type);
63 | exit(EXIT_FAILURE);
64 | }
65 |
66 | if (pt->num_samples >= 0x01000000) {
67 | fprintf(stderr, "error: invalid num_samples %u\n", pt->num_samples);
68 | exit(EXIT_FAILURE);
69 | }
70 |
71 | /* Initialize the decoder. */
72 | utk_init(&pt->utk);
73 |
74 | /* Write the WAV header. */
75 | write_u32(pt->outfp, MAKE_U32('R','I','F','F'));
76 | write_u32(pt->outfp, 36 + pt->num_samples*2);
77 | write_u32(pt->outfp, MAKE_U32('W','A','V','E'));
78 | write_u32(pt->outfp, MAKE_U32('f','m','t',' '));
79 | write_u32(pt->outfp, 16);
80 | write_u16(pt->outfp, 1);
81 | write_u16(pt->outfp, 1);
82 | write_u32(pt->outfp, 22050);
83 | write_u32(pt->outfp, 22050*2);
84 | write_u16(pt->outfp, 2);
85 | write_u16(pt->outfp, 16);
86 | write_u32(pt->outfp, MAKE_U32('d','a','t','a'));
87 | write_u32(pt->outfp, pt->num_samples*2);
88 | }
89 |
90 | static void pt_decode(PTContext *pt)
91 | {
92 | UTKContext *utk = &pt->utk;
93 | uint32_t num_samples = pt->num_samples;
94 |
95 | utk_set_fp(utk, pt->infp);
96 |
97 | while (num_samples > 0) {
98 | int count = MIN(num_samples, 432);
99 | int i;
100 |
101 | utk_decode_frame(utk);
102 |
103 | for (i = 0; i < count; i++) {
104 | int x = ROUND(pt->utk.decompressed_frame[i]);
105 | write_u16(pt->outfp, (int16_t)CLAMP(x, -32768, 32767));
106 | }
107 |
108 | num_samples -= count;
109 | }
110 | }
111 |
112 | int main(int argc, char *argv[])
113 | {
114 | PTContext pt;
115 | const char *infile, *outfile;
116 | FILE *infp, *outfp;
117 | int force = 0;
118 |
119 | /* Parse arguments. */
120 | if (argc == 4 && !strcmp(argv[1], "-f")) {
121 | force = 1;
122 | argv++, argc--;
123 | }
124 |
125 | if (argc != 3) {
126 | printf("Usage: utkdecode-bnb [-f] infile outfile\n");
127 | printf("Decode Beasts & Bumpkins M10 to wav.\n");
128 | return EXIT_FAILURE;
129 | }
130 |
131 | infile = argv[1];
132 | outfile = argv[2];
133 |
134 | /* Open the input/output files. */
135 | infp = fopen(infile, "rb");
136 | if (!infp) {
137 | fprintf(stderr, "error: failed to open '%s' for reading: %s\n", infile, strerror(errno));
138 | return EXIT_FAILURE;
139 | }
140 |
141 | if (!force && fopen(outfile, "rb")) {
142 | fprintf(stderr, "error: '%s' already exists\n", outfile);
143 | return EXIT_FAILURE;
144 | }
145 |
146 | outfp = fopen(outfile, "wb");
147 | if (!outfp) {
148 | fprintf(stderr, "error: failed to create '%s': %s\n", outfile, strerror(errno));
149 | return EXIT_FAILURE;
150 | }
151 |
152 | memset(&pt, 0, sizeof(pt));
153 | pt.infp = infp;
154 | pt.outfp = outfp;
155 |
156 | pt_read_header(&pt);
157 | pt_decode(&pt);
158 |
159 | if (fclose(outfp) != 0) {
160 | fprintf(stderr, "error: failed to close '%s': %s\n", outfile, strerror(errno));
161 | return EXIT_FAILURE;
162 | }
163 |
164 | fclose(infp);
165 |
166 | return EXIT_SUCCESS;
167 | }
--------------------------------------------------------------------------------
/utkdecode-fifa.c:
--------------------------------------------------------------------------------
1 | /*
2 | ** utkdecode-fifa
3 | ** Decode FIFA 2001/2002 MicroTalk to wav.
4 | ** Authors: Andrew D'Addesio
5 | ** License: Public domain
6 | ** Compile: gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math
7 | ** -fwhole-program -g0 -s -o utkdecode-fifa utkdecode-fifa.c
8 | */
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include "utk.h"
15 | #include "io.h"
16 | #include "eachunk.h"
17 |
18 | #define MAKE_U32(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24))
19 | #define ROUND(x) ((x) >= 0.0f ? ((x)+0.5f) : ((x)-0.5f))
20 | #define MIN(x,y) ((x)<(y)?(x):(y))
21 | #define MAX(x,y) ((x)>(y)?(x):(y))
22 | #define CLAMP(x,min,max) MIN(MAX(x,min),max)
23 |
24 | typedef struct EAContext {
25 | FILE *infp, *outfp;
26 | uint32_t audio_pos;
27 | uint32_t num_samples;
28 | uint32_t num_data_chunks;
29 | uint32_t compression_type;
30 | uint32_t codec_revision;
31 | UTKContext utk;
32 | } EAContext;
33 |
34 | static void ea_read_schl(EAContext *ea)
35 | {
36 | uint32_t id;
37 | EAChunk *chunk = read_chunk(ea->infp);
38 |
39 | if (chunk->type != MAKE_U32('S','C','H','l')) {
40 | fprintf(stderr, "error: expected SCHl chunk\n");
41 | exit(EXIT_FAILURE);
42 | }
43 |
44 | id = chunk_read_u32(chunk);
45 | if ((id & 0xffff) != MAKE_U32('P','T','\x00','\x00')) {
46 | fprintf(stderr, "error: expected PT chunk in SCHl header\n");
47 | exit(EXIT_FAILURE);
48 | }
49 |
50 | while (1) {
51 | uint8_t cmd = chunk_read_u8(chunk);
52 | if (cmd == 0xFD) {
53 | while (1) {
54 | uint8_t key = chunk_read_u8(chunk);
55 | uint32_t value = chunk_read_var_int(chunk);
56 |
57 | if (key == 0xFF)
58 | break;
59 | else if (key == 0x80)
60 | ea->codec_revision = value;
61 | else if (key == 0x85)
62 | ea->num_samples = value;
63 | else if (key == 0xA0)
64 | ea->compression_type = value;
65 | }
66 | break;
67 | } else {
68 | chunk_read_var_int(chunk);
69 | }
70 | }
71 |
72 | if (ea->compression_type != 4 && ea->compression_type != 22) {
73 | fprintf(stderr, "error: invalid compression type %u (expected 4 for MicroTalk 10:1 or 22 for MicroTalk 5:1)\n",
74 | (unsigned)ea->compression_type);
75 | exit(EXIT_FAILURE);
76 | }
77 |
78 | if (ea->num_samples >= 0x01000000) {
79 | fprintf(stderr, "error: invalid num_samples %u\n", ea->num_samples);
80 | exit(EXIT_FAILURE);
81 | }
82 |
83 | /* Initialize the decoder. */
84 | utk_init(&ea->utk);
85 |
86 | /* Write the WAV header. */
87 | write_u32(ea->outfp, MAKE_U32('R','I','F','F'));
88 | write_u32(ea->outfp, 36 + ea->num_samples*2);
89 | write_u32(ea->outfp, MAKE_U32('W','A','V','E'));
90 | write_u32(ea->outfp, MAKE_U32('f','m','t',' '));
91 | write_u32(ea->outfp, 16);
92 | write_u16(ea->outfp, 1);
93 | write_u16(ea->outfp, 1);
94 | write_u32(ea->outfp, 22050);
95 | write_u32(ea->outfp, 22050*2);
96 | write_u16(ea->outfp, 2);
97 | write_u16(ea->outfp, 16);
98 | write_u32(ea->outfp, MAKE_U32('d','a','t','a'));
99 | write_u32(ea->outfp, ea->num_samples*2);
100 | }
101 |
102 | static void ea_read_sccl(EAContext *ea)
103 | {
104 | EAChunk *chunk = read_chunk(ea->infp);
105 |
106 | if (chunk->type != MAKE_U32('S','C','C','l')) {
107 | fprintf(stderr, "error: expected SCCl chunk\n");
108 | exit(EXIT_FAILURE);
109 | }
110 |
111 | ea->num_data_chunks = chunk_read_u32(chunk);
112 | if (ea->num_data_chunks >= 0x01000000) {
113 | fprintf(stderr, "error: invalid num_data_chunks %u\n", (unsigned)ea->num_data_chunks);
114 | exit(EXIT_FAILURE);
115 | }
116 | }
117 |
118 | static void ea_read_scdl(EAContext *ea)
119 | {
120 | EAChunk *chunk = read_chunk(ea->infp);
121 | UTKContext *utk = &ea->utk;
122 | uint32_t num_samples;
123 |
124 | if (chunk->type != MAKE_U32('S','C','D','l')) {
125 | fprintf(stderr, "error: expected SCDl chunk\n");
126 | exit(EXIT_FAILURE);
127 | }
128 |
129 | num_samples = chunk_read_u32(chunk);
130 | chunk_read_u32(chunk); /* unknown */
131 | chunk_read_u8(chunk); /* unknown */
132 |
133 | if (num_samples > ea->num_samples - ea->audio_pos)
134 | num_samples = ea->num_samples - ea->audio_pos;
135 |
136 | utk_set_ptr(utk, chunk->ptr, chunk->end);
137 |
138 | while (num_samples > 0) {
139 | int count = MIN(num_samples, 432);
140 | int i;
141 |
142 | if (ea->codec_revision >= 3)
143 | utk_rev3_decode_frame(utk);
144 | else
145 | utk_decode_frame(utk);
146 |
147 | for (i = 0; i < count; i++) {
148 | int x = ROUND(ea->utk.decompressed_frame[i]);
149 | write_u16(ea->outfp, (int16_t)CLAMP(x, -32768, 32767));
150 | }
151 |
152 | ea->audio_pos += count;
153 | num_samples -= count;
154 | }
155 | }
156 |
157 | static void ea_read_scel(const EAContext *ea)
158 | {
159 | EAChunk *chunk = read_chunk(ea->infp);
160 |
161 | if (chunk->type != MAKE_U32('S','C','E','l')) {
162 | fprintf(stderr, "error: expected SCEl chunk\n");
163 | exit(EXIT_FAILURE);
164 | }
165 |
166 | if (ea->audio_pos != ea->num_samples) {
167 | fprintf(stderr, "error: failed to decode the correct number of samples\n");
168 | exit(EXIT_FAILURE);
169 | }
170 | }
171 |
172 | int main(int argc, char *argv[])
173 | {
174 | EAContext ea;
175 | const char *infile, *outfile;
176 | FILE *infp, *outfp;
177 | int force = 0;
178 | unsigned int i;
179 |
180 | if (argc == 4 && !strcmp(argv[1], "-f")) {
181 | force = 1;
182 | argv++, argc--;
183 | }
184 |
185 | if (argc != 3) {
186 | printf("Usage: utkdecode-fifa [-f] infile outfile\n");
187 | printf("Decode FIFA 2001/2002 MicroTalk to wav.\n");
188 | return EXIT_FAILURE;
189 | }
190 |
191 | infile = argv[1];
192 | outfile = argv[2];
193 |
194 | infp = fopen(infile, "rb");
195 | if (!infp) {
196 | fprintf(stderr, "error: failed to open '%s' for reading: %s\n", infile, strerror(errno));
197 | return EXIT_FAILURE;
198 | }
199 |
200 | if (!force && fopen(outfile, "rb")) {
201 | fprintf(stderr, "error: '%s' already exists\n", outfile);
202 | return EXIT_FAILURE;
203 | }
204 |
205 | outfp = fopen(outfile, "wb");
206 | if (!outfp) {
207 | fprintf(stderr, "error: failed to create '%s': %s\n", outfile, strerror(errno));
208 | return EXIT_FAILURE;
209 | }
210 |
211 | memset(&ea, 0, sizeof(ea));
212 | ea.infp = infp;
213 | ea.outfp = outfp;
214 |
215 | ea_read_schl(&ea);
216 | ea_read_sccl(&ea);
217 |
218 | for (i = 0; i < ea.num_data_chunks; i++)
219 | ea_read_scdl(&ea);
220 |
221 | ea_read_scel(&ea);
222 |
223 | if (!outfp) {
224 | fprintf(stderr, "error: failed to close '%s': %s\n", outfile, strerror(errno));
225 | return EXIT_FAILURE;
226 | }
227 |
228 | return EXIT_SUCCESS;
229 | }
--------------------------------------------------------------------------------
/utkdecode.c:
--------------------------------------------------------------------------------
1 | /*
2 | ** utkdecode
3 | ** Decode Maxis UTK to wav.
4 | ** Authors: Andrew D'Addesio
5 | ** License: Public domain
6 | ** Compile: gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math
7 | ** -fwhole-program -g0 -s -o utkdecode utkdecode.c
8 | */
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include "utk.h"
15 | #include "io.h"
16 |
17 | #define MAKE_U32(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24))
18 | #define ROUND(x) ((x) >= 0.0f ? ((x)+0.5f) : ((x)-0.5f))
19 | #define MIN(x,y) ((x)<(y)?(x):(y))
20 | #define MAX(x,y) ((x)>(y)?(x):(y))
21 | #define CLAMP(x,min,max) MIN(MAX(x,min),max)
22 |
23 | int main(int argc, char *argv[])
24 | {
25 | const char *infile, *outfile;
26 | UTKContext ctx;
27 | uint32_t sID;
28 | uint32_t dwOutSize;
29 | uint32_t dwWfxSize;
30 | uint16_t wFormatTag;
31 | uint16_t nChannels;
32 | uint32_t nSamplesPerSec;
33 | uint32_t nAvgBytesPerSec;
34 | uint16_t nBlockAlign;
35 | uint16_t wBitsPerSample;
36 | uint16_t cbSize;
37 | uint32_t num_samples;
38 | FILE *infp, *outfp;
39 | int force = 0;
40 | int error = 0;
41 | int i;
42 |
43 | /* Parse arguments. */
44 | if (argc == 4 && !strcmp(argv[1], "-f")) {
45 | force = 1;
46 | argv++, argc--;
47 | }
48 |
49 | if (argc != 3) {
50 | printf("Usage: utkdecode [-f] infile outfile\n");
51 | printf("Decode Maxis UTK to wav.\n");
52 | return EXIT_FAILURE;
53 | }
54 |
55 | infile = argv[1];
56 | outfile = argv[2];
57 |
58 | /* Open the input/output files. */
59 | infp = fopen(infile, "rb");
60 | if (!infp) {
61 | fprintf(stderr, "error: failed to open '%s' for reading: %s\n", infile, strerror(errno));
62 | return EXIT_FAILURE;
63 | }
64 |
65 | if (!force && fopen(outfile, "rb")) {
66 | fprintf(stderr, "error: '%s' already exists\n", outfile);
67 | return EXIT_FAILURE;
68 | }
69 |
70 | outfp = fopen(outfile, "wb");
71 | if (!outfp) {
72 | fprintf(stderr, "error: failed to create '%s': %s\n", outfile, strerror(errno));
73 | return EXIT_FAILURE;
74 | }
75 |
76 | /* Parse the UTK header. */
77 | sID = read_u32(infp);
78 | dwOutSize = read_u32(infp);
79 | dwWfxSize = read_u32(infp);
80 | wFormatTag = read_u16(infp);
81 | nChannels = read_u16(infp);
82 | nSamplesPerSec = read_u32(infp);
83 | nAvgBytesPerSec = read_u32(infp);
84 | nBlockAlign = read_u16(infp);
85 | wBitsPerSample = read_u16(infp);
86 | cbSize = read_u16(infp);
87 | read_u16(infp); /* padding */
88 |
89 | if (sID != MAKE_U32('U','T','M','0')) {
90 | fprintf(stderr, "error: not a valid UTK file (expected UTM0 signature)\n");
91 | return EXIT_FAILURE;
92 | } else if ((dwOutSize & 0x01) != 0 || dwOutSize >= 0x01000000) {
93 | fprintf(stderr, "error: invalid dwOutSize %u\n", (unsigned)dwOutSize);
94 | return EXIT_FAILURE;
95 | } else if (dwWfxSize != 20) {
96 | fprintf(stderr, "error: invalid dwWfxSize %u (expected 20)\n", (unsigned)dwWfxSize);
97 | return EXIT_FAILURE;
98 | } else if (wFormatTag != 1) {
99 | fprintf(stderr, "error: invalid wFormatTag %u (expected 1)\n", (unsigned)wFormatTag);
100 | return EXIT_FAILURE;
101 | }
102 |
103 | if (nChannels != 1) {
104 | fprintf(stderr, "error: invalid nChannels %u (only mono is supported)\n", (unsigned)nChannels);
105 | error = 1;
106 | }
107 | if (nSamplesPerSec < 8000 || nSamplesPerSec > 192000) {
108 | fprintf(stderr, "error: invalid nSamplesPerSec %u\n", (unsigned)nSamplesPerSec);
109 | error = 1;
110 | }
111 | if (nAvgBytesPerSec != nSamplesPerSec * nBlockAlign) {
112 | fprintf(stderr, "error: invalid nAvgBytesPerSec %u (expected nSamplesPerSec * nBlockAlign)\n", (unsigned)nAvgBytesPerSec);
113 | error = 1;
114 | }
115 | if (nBlockAlign != 2) {
116 | fprintf(stderr, "error: invalid nBlockAlign %u (expected 2)\n", (unsigned)nBlockAlign);
117 | error = 1;
118 | }
119 | if (wBitsPerSample != 16) {
120 | fprintf(stderr, "error: invalid wBitsPerSample %u (expected 16)\n", (unsigned)wBitsPerSample);
121 | error = 1;
122 | }
123 | if (cbSize != 0) {
124 | fprintf(stderr, "error: invalid cbSize %u (expected 0)\n", (unsigned)cbSize);
125 | error = 1;
126 | }
127 | if (error)
128 | return EXIT_FAILURE;
129 |
130 | num_samples = dwOutSize/2;
131 |
132 | /* Write the WAV header. */
133 | write_u32(outfp, MAKE_U32('R','I','F','F'));
134 | write_u32(outfp, 36 + num_samples*2);
135 | write_u32(outfp, MAKE_U32('W','A','V','E'));
136 | write_u32(outfp, MAKE_U32('f','m','t',' '));
137 | write_u32(outfp, 16);
138 | write_u16(outfp, wFormatTag);
139 | write_u16(outfp, nChannels);
140 | write_u32(outfp, nSamplesPerSec);
141 | write_u32(outfp, nAvgBytesPerSec);
142 | write_u16(outfp, nBlockAlign);
143 | write_u16(outfp, wBitsPerSample);
144 | write_u32(outfp, MAKE_U32('d','a','t','a'));
145 | write_u32(outfp, num_samples*2);
146 |
147 | /* Decode. */
148 | utk_init(&ctx);
149 | utk_set_fp(&ctx, infp);
150 |
151 | while (num_samples > 0) {
152 | int count = MIN(num_samples, 432);
153 |
154 | utk_decode_frame(&ctx);
155 |
156 | for (i = 0; i < count; i++) {
157 | int x = ROUND(ctx.decompressed_frame[i]);
158 | write_u16(outfp, (int16_t)CLAMP(x, -32768, 32767));
159 | }
160 |
161 | num_samples -= count;
162 | }
163 |
164 | if (fclose(outfp) != 0) {
165 | fprintf(stderr, "error: failed to close '%s': %s\n", outfile, strerror(errno));
166 | return EXIT_FAILURE;
167 | }
168 |
169 | fclose(infp);
170 |
171 | return EXIT_SUCCESS;
172 | }
--------------------------------------------------------------------------------
/utkencode.c:
--------------------------------------------------------------------------------
1 | /*
2 | ** utkencode
3 | ** Encode wav to Maxis UTalk.
4 | ** Authors: Fatbag
5 | ** License: Public domain (no warranties)
6 | ** Compile: gcc -Wall -Wextra -ansi -pedantic -O2 -ffast-math -g0 -s
7 | ** -o utkencode utkencode.c
8 | */
9 |
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 |
17 | #define MIN(x,y) ((x)<(y)?(x):(y))
18 | #define MAX(x,y) ((x)>(y)?(x):(y))
19 | #define CLAMP(x,min,max) ((x)<(min)?(min):(x)>(max)?(max):(x))
20 | #define ROUND(x) ((int)((x)>=0?((x)+0.5):((x)-0.5)))
21 | #define ABS(x) ((x)>=0?(x):-(x))
22 |
23 | #define READ16(x) ((x)[0]|((x)[1]<<8))
24 | #define READ32(x) ((x)[0]|((x)[1]<<8)|((x)[2]<<16)|((x)[3]<<24))
25 |
26 | #define WRITE16(d,s) (d)[0]=(uint8_t)(s),(d)[1]=(uint8_t)((s)>>8)
27 | #define WRITE32(d,s) (d)[0]=(uint8_t)(s),(d)[1]=(uint8_t)((s)>>8),\
28 | (d)[2]=(uint8_t)((s)>>16),(d)[3]=(uint8_t)((s)>>24)
29 |
30 | const float utk_rc_table[64] = {
31 | 0,
32 | -.99677598476409912109375, -.99032700061798095703125, -.983879029750823974609375, -.977430999279022216796875,
33 | -.970982015132904052734375, -.964533984661102294921875, -.958085000514984130859375, -.9516370296478271484375,
34 | -.930754005908966064453125, -.904959976673126220703125, -.879167020320892333984375, -.853372991085052490234375,
35 | -.827579021453857421875, -.801786005496978759765625, -.775991976261138916015625, -.75019800662994384765625,
36 | -.724404990673065185546875, -.6986110210418701171875, -.6706349849700927734375, -.61904799938201904296875,
37 | -.567460000514984130859375, -.515873014926910400390625, -.4642859995365142822265625, -.4126980006694793701171875,
38 | -.361110985279083251953125, -.309523999691009521484375, -.257937014102935791015625, -.20634900033473968505859375,
39 | -.1547619998455047607421875, -.10317499935626983642578125, -.05158700048923492431640625,
40 | 0,
41 | +.05158700048923492431640625, +.10317499935626983642578125, +.1547619998455047607421875, +.20634900033473968505859375,
42 | +.257937014102935791015625, +.309523999691009521484375, +.361110985279083251953125, +.4126980006694793701171875,
43 | +.4642859995365142822265625, +.515873014926910400390625, +.567460000514984130859375, +.61904799938201904296875,
44 | +.6706349849700927734375, +.6986110210418701171875, +.724404990673065185546875, +.75019800662994384765625,
45 | +.775991976261138916015625, +.801786005496978759765625, +.827579021453857421875, +.853372991085052490234375,
46 | +.879167020320892333984375, +.904959976673126220703125, +.930754005908966064453125, +.9516370296478271484375,
47 | +.958085000514984130859375, +.964533984661102294921875, +.970982015132904052734375, +.977430999279022216796875,
48 | +.983879029750823974609375, +.99032700061798095703125, +.99677598476409912109375
49 | };
50 |
51 | static const char *prog_name;
52 |
53 | static void print_help(void)
54 | {
55 | printf("Usage: %s [options] infile.wav outfile.utk\n", prog_name);
56 | printf("Encode wav to Maxis UTalk.\n");
57 | printf("\n");
58 | printf("General options:\n");
59 | printf(" -f, --force overwrite without prompting\n");
60 | printf(" -q, --quiet suppress normal output and do not prompt\n");
61 | printf(" -h, --help display this help and exit\n");
62 | printf(" -V, --version output version information and exit\n");
63 | printf("\n");
64 | printf("Encoding options:\n");
65 | printf(" -b, --bitrate=N target bitrate in bits/sec (default 32000)\n");
66 | printf(" -H, --halved-inn encode innovation using half bandwidth\n");
67 | printf(" (default)\n");
68 | printf(" -F, --full-inn encode innovation using full bandwidth\n");
69 | printf(" -T, --huff-threshold=N use the Huffman codebook with threshold N where\n");
70 | printf(" N is an integer between 16 and 32 (inclusive)\n");
71 | printf(" (default 24)\n");
72 | printf(" -S, --inngain-sig=N use innovation gain significand N where N is\n");
73 | printf(" between 8 and 128 (inclusive) in steps of 8\n");
74 | printf(" (default 64)\n");
75 | printf(" -B, --inngain-base=N use innovation gain base N where N is between\n");
76 | printf(" 1.040 and 1.103 (inclusive) in steps of 0.001\n");
77 | printf(" (default 1.068)\n");
78 | printf("\n");
79 | printf("If infile is \"-\", read from standard input.\n");
80 | printf("If outfile is \"-\", write to standard output.\n");
81 | }
82 |
83 | static void print_version(void)
84 | {
85 | printf("utkencode 0.0\n");
86 | }
87 |
88 | static void print_usage_error(void)
89 | {
90 | fprintf(stderr, "Usage: %s [options] infile.wav outfile.utk\n",
91 | prog_name);
92 | fprintf(stderr, "Try '%s --help' for more options.\n", prog_name);
93 | }
94 |
95 | static const char short_options[] = "fqhVb:HFT:S:B:";
96 | static const struct option long_options[] = {
97 | {"force", no_argument, 0, 'f'},
98 | {"quiet", no_argument, 0, 'q'},
99 | {"help", no_argument, 0, 'h'},
100 | {"version", no_argument, 0, 'V'},
101 | {"bitrate", required_argument, 0, 'b'},
102 | {"halved-inn", no_argument, 0, 'H'},
103 | {"full-inn", no_argument, 0, 'F'},
104 | {"huff-threshold", required_argument, 0, 'T'},
105 | {"inngain-sig", required_argument, 0, 'S'},
106 | {"inngain-base", required_argument, 0, 'B'},
107 | {0, 0, 0, 0}
108 | };
109 |
110 | static int bitrate = 32000;
111 | static int force = 0;
112 | static int quiet = 0;
113 | static int halved_innovation = 1;
114 | static int huffman_threshold = 24;
115 | static int inngain_sig = 64;
116 | static float inngain_base = 1.068f;
117 | static const char *infile = "";
118 | static const char *outfile = "";
119 | static FILE *infp = NULL;
120 | static FILE *outfp = NULL;
121 |
122 | static uint8_t wav_buffer[432*2];
123 | static float input_samples[12+432];
124 | static float adaptive_codebook[324+432];
125 | static uint8_t compressed_buffer[1024];
126 | static uint8_t inn_buffers[2][256];
127 | static float prev_rc[12];
128 | static float innovation[5+108+5];
129 | static float inn_gains[64];
130 |
131 | struct bit_writer_context {
132 | uint8_t written_bits_count;
133 | size_t pos;
134 | uint8_t *buffer;
135 | };
136 |
137 | static void read_data(FILE *fp, uint8_t *buffer, size_t size)
138 | {
139 | if (fread(buffer, 1, size, fp) != size) {
140 | fprintf(stderr, "%s: failed to read '%s': %s\n",
141 | prog_name, infile, ferror(fp)
142 | ? strerror(errno) : "reached end of file");
143 | exit(EXIT_FAILURE);
144 | }
145 | }
146 |
147 | static void write_data(FILE *fp, const uint8_t *buffer, size_t size)
148 | {
149 | if (fwrite(buffer, 1, size, fp) != size) {
150 | fprintf(stderr, "%s: failed to write to '%s': %s\n",
151 | prog_name, outfile, ferror(fp)
152 | ? strerror(errno) : "reached end of file");
153 | exit(EXIT_FAILURE);
154 | }
155 | }
156 |
157 | static void flush_data(FILE *fp)
158 | {
159 | if (fflush(fp) != 0) {
160 | fprintf(stderr, "%s: failed to flush '%s': %s\n",
161 | prog_name, outfile, strerror(errno));
162 | exit(EXIT_FAILURE);
163 | }
164 | }
165 |
166 | static void bwc_init(struct bit_writer_context *bwc, uint8_t *buffer)
167 | {
168 | bwc->written_bits_count = 0;
169 | bwc->pos = 0;
170 | bwc->buffer = buffer;
171 | bwc->buffer[0] = 0;
172 | }
173 |
174 | static void bwc_write_bits(struct bit_writer_context *bwc, unsigned value,
175 | uint8_t count)
176 | {
177 | unsigned x = value << bwc->written_bits_count;
178 |
179 | bwc->buffer[bwc->pos] |= (uint8_t)x;
180 | bwc->written_bits_count += count;
181 |
182 | while (bwc->written_bits_count >= 8) {
183 | x >>= 8;
184 | bwc->buffer[++bwc->pos] = (uint8_t)x;
185 | bwc->written_bits_count -= 8;
186 | }
187 | }
188 |
189 | static void bwc_pad(struct bit_writer_context *bwc)
190 | {
191 | if (bwc->written_bits_count != 0) {
192 | bwc->buffer[++bwc->pos] = 0;
193 | bwc->written_bits_count = 0;
194 | }
195 | }
196 |
197 | static void bwc_flush(struct bit_writer_context *bwc, FILE *fp)
198 | {
199 | write_data(fp, bwc->buffer, bwc->pos);
200 | bwc->buffer[0] = bwc->buffer[bwc->pos];
201 | bwc->pos = 0;
202 | }
203 |
204 | static unsigned quantize(float value, const float *alphabet, size_t alphabet_size)
205 | {
206 | unsigned i;
207 | unsigned min_idx = 0;
208 | float min_distance = ABS(value - alphabet[0]);
209 |
210 | for (i = 1; i < alphabet_size; i++) {
211 | float distance = ABS(value - alphabet[i]);
212 |
213 | if (distance < min_distance) {
214 | min_distance = distance;
215 | min_idx = i;
216 | }
217 | }
218 |
219 | return min_idx;
220 | }
221 |
222 | /* used in the parsing of some arguments */
223 | static int read_dec_places(const char *string, int n)
224 | {
225 | int i;
226 | int value = 0;
227 | int pows_10[10];
228 |
229 | pows_10[0] = 1;
230 | for (i = 1; i < n; i++)
231 | pows_10[i] = pows_10[i-1] * 10;
232 |
233 | for (i = 0; i < n && string[i] >= '0' && string[i] <= '9'; i++)
234 | value += pows_10[n-1-i] * (string[i]-'0');
235 |
236 | return (string[i] == '\0') ? value : -1;
237 | }
238 |
239 | static int file_exists(const char *filename)
240 | {
241 | FILE *fp;
242 |
243 | fp = fopen(filename, "rb");
244 | if (fp) {
245 | fclose(fp);
246 | return 1;
247 | }
248 |
249 | return 0;
250 | }
251 |
252 | static void find_autocorrelations(float *r, const float *samples)
253 | {
254 | int i, j;
255 |
256 | for (i = 0; i < 13; i++) {
257 | r[i] = 0;
258 | for (j = 0; j < 432 - i; j++)
259 | r[i] += samples[j]*samples[j+i];
260 | }
261 | }
262 |
263 | static void levinson_durbin_symmetric(float *x, float *k,
264 | const float *r, const float *y)
265 | {
266 | float a[12]; /* the forward vector */
267 | float e; /* prediction error */
268 | int i;
269 |
270 | if (r[0] <= 1.0f/32768.0f && r[0] >= -1.0f/32768.0f)
271 | goto zero;
272 |
273 | a[0] = 1;
274 | e = r[0];
275 | x[0] = y[0]/r[0];
276 |
277 | for (i = 1; i < 12; i++) {
278 | float u, m;
279 | float a_temp[12];
280 | int j;
281 |
282 | u = 0.0f;
283 | for (j = 0; j < i; j++)
284 | u += a[j]*r[i-j];
285 |
286 | k[i-1] = -u/e; /* reflection coefficient i-1 */
287 | e += u*k[i-1]; /* update e to the new value e - u*u/e */
288 |
289 | if (e <= 1.0f/32768.0f && e >= -1.0f/32768.0f)
290 | goto zero;
291 |
292 | memcpy(a_temp, a, i*sizeof(float));
293 | a[i] = 0.0f;
294 | for (j = 1; j <= i; j++)
295 | a[j] += k[i-1]*a_temp[i-j];
296 |
297 | m = y[i];
298 | for (j = 0; j < i; j++)
299 | m -= x[j]*r[i-j];
300 | m /= e;
301 |
302 | x[i] = 0.0f;
303 | for (j = 0; j <= i; j++)
304 | x[j] += m*a[i-j];
305 | }
306 |
307 | k[11] = -x[11];
308 |
309 | return;
310 |
311 | zero:
312 | for (i = 0; i < 12; i++)
313 | x[i] = 0.0f;
314 | for (i = 0; i < 12; i++)
315 | k[i] = 0.0f;
316 | }
317 |
318 | static void rc_to_lpc(float *x, const float *k)
319 | {
320 | float a[13]; /* the forward vector */
321 | unsigned i, j;
322 | a[0] = 1;
323 |
324 | for (i = 1; i < 13; i++) {
325 | float a_temp[12];
326 | memcpy(a_temp, a, i*sizeof(float));
327 | a[i] = 0.0f;
328 | for (j = 1; j <= i; j++)
329 | a[j] += k[i-1]*a_temp[i-j];
330 | }
331 |
332 | for (i = 1; i < 13; i++)
333 | x[i-1] = -a[i];
334 | }
335 |
336 | static void find_rc(float *rc, const float *samples)
337 | {
338 | float r[13];
339 | float lpc[12];
340 | find_autocorrelations(r, samples);
341 | levinson_durbin_symmetric(lpc, rc, r, r+1);
342 | }
343 |
344 | static void find_excitation(float *excitation, const float *source,
345 | int length, const float *lpc)
346 | {
347 | int i, j;
348 |
349 | for (i = 0; i < length; i++) {
350 | float prediction = 0.0f;
351 | for (j = 0; j < 12; j++)
352 | prediction += lpc[j]*source[i-1-j];
353 | excitation[i] = source[i] - prediction;
354 | }
355 | }
356 |
357 | static void find_pitch(int *pitch_lag, float *pitch_gain,
358 | const float *excitation)
359 | {
360 | int max_corr_offset = 108;
361 | float max_corr_value = 0.0f;
362 | float history_energy;
363 | float gain;
364 | int i, j;
365 |
366 | /* Find the optimal pitch lag. */
367 | for (i = 108; i < 324; i++) {
368 | float corr = 0.0f;
369 | for (j = 0; j < 108; j++)
370 | corr += excitation[j]*excitation[j-i];
371 | if (corr > max_corr_value) {
372 | max_corr_offset = i;
373 | max_corr_value = corr;
374 | }
375 | }
376 |
377 | /* Find the optimal pitch gain. */
378 | history_energy = 0.0f;
379 | for (i = 0; i < 108; i++) {
380 | float value = excitation[i-max_corr_offset];
381 | history_energy += value*value;
382 | }
383 |
384 | if (history_energy >= 1/32768.0f) {
385 | gain = max_corr_value / history_energy;
386 | gain = CLAMP(gain, 0.0f, 1.0f);
387 |
388 | *pitch_lag = max_corr_offset;
389 | *pitch_gain = gain;
390 | } else {
391 | *pitch_lag = 108;
392 | *pitch_gain = 0.0f;
393 | }
394 | }
395 |
396 | static void interpolate(float *x, int a, int z)
397 | {
398 | int i;
399 |
400 | if (z) {
401 | for (i = !a; i < 108; i+=2)
402 | x[i] = 0.0f;
403 | } else {
404 | for (i = !a; i < 108; i+=2)
405 | x[i]
406 | = (x[i-1]+x[i+1]) * .5973859429f
407 | - (x[i-3]+x[i+3]) * .1145915613f
408 | + (x[i-5]+x[i+5]) * .0180326793f;
409 | }
410 | }
411 |
412 | static float interpolation_error(int a, int z, const float *x)
413 | {
414 | float error = 0.0f;
415 | int i;
416 |
417 | if (z) {
418 | for (i = !a; i < 108; i+=2)
419 | error += x[i]*x[i];
420 | } else {
421 | for (i = !a; i < 108; i+=2) {
422 | float prediction
423 | = (x[i-1]+x[i+1]) * .5973859429f
424 | - (x[i-3]+x[i+3]) * .1145915613f
425 | + (x[i-5]+x[i+5]) * .0180326793f;
426 | error += (prediction - x[i])*(prediction - x[i]);
427 | }
428 | }
429 |
430 | return error;
431 | }
432 |
433 | static void find_a_z_flags(int *a, int *z, const float *innovation)
434 | {
435 | /* Find the a and z flags such that the least error is introduced
436 | ** in the downsampling step. In case of a tie (e.g. in silence),
437 | ** prefer using the zero flag. Thus, we will test in the order:
438 | ** (a=0,z=1), (a=1,z=1), (a=0,z=0), (a=1,z=1). */
439 | float error;
440 | float best_error;
441 | int best_a = 0, best_z = 1;
442 |
443 | best_error = interpolation_error(0, 1, innovation);
444 |
445 | error = interpolation_error(1, 1, innovation);
446 | if (error < best_error) {
447 | best_error = error;
448 | best_a = 1, best_z = 1;
449 | }
450 |
451 | error = interpolation_error(0, 0, innovation);
452 | if (error < best_error) {
453 | best_error = error;
454 | best_a = 0, best_z = 0;
455 | }
456 |
457 | error = interpolation_error(1, 0, innovation);
458 | if (error < best_error) {
459 | best_error = error;
460 | best_a = 1, best_z = 0;
461 | }
462 |
463 | *a = best_a;
464 | *z = best_z;
465 | }
466 |
467 | struct huffman_code {
468 | uint16_t bits_value;
469 | uint16_t bits_count;
470 | };
471 |
472 | static const struct huffman_code huffman_models[2][13+1+13] = {
473 | /* model 0 */
474 | {
475 | /* -13 */ {16255, 16},
476 | /* -12 */ {8063, 15},
477 | /* -11 */ {3967, 14},
478 | /* -10 */ {1919, 13},
479 | /* -9 */ {895, 12},
480 | /* -8 */ {383, 11},
481 | /* -7 */ {127, 10},
482 | /* -6 */ {63, 8},
483 | /* -5 */ {31, 7},
484 | /* -4 */ {15, 6},
485 | /* -3 */ {7, 5},
486 | /* -2 */ {3, 4},
487 | /* -1 */ {2, 2},
488 | /* 0 */ {0, 2},
489 | /* +1 */ {1, 2},
490 | /* +2 */ {11, 4},
491 | /* +3 */ {23, 5},
492 | /* +4 */ {47, 6},
493 | /* +5 */ {95, 7},
494 | /* +6 */ {191, 8},
495 | /* +7 */ {639, 10},
496 | /* +8 */ {1407, 11},
497 | /* +9 */ {2943, 12},
498 | /* +10 */ {6015, 13},
499 | /* +11 */ {12159, 14},
500 | /* +12 */ {24447, 15},
501 | /* +13 */ {49023, 16}
502 | },
503 |
504 | /* model 1 */
505 | {
506 | /* -13 */ {8127, 15},
507 | /* -12 */ {4031, 14},
508 | /* -11 */ {1983, 13},
509 | /* -10 */ {959, 12},
510 | /* -9 */ {447, 11},
511 | /* -8 */ {191, 10},
512 | /* -7 */ {63, 9},
513 | /* -6 */ {31, 7},
514 | /* -5 */ {15, 6},
515 | /* -4 */ {7, 5},
516 | /* -3 */ {3, 4},
517 | /* -2 */ {1, 3},
518 | /* -1 */ {2, 3},
519 | /* 0 */ {0, 2},
520 | /* +1 */ {6, 3},
521 | /* +2 */ {5, 3},
522 | /* +3 */ {11, 4},
523 | /* +4 */ {23, 5},
524 | /* +5 */ {47, 6},
525 | /* +6 */ {95, 7},
526 | /* +7 */ {319, 9},
527 | /* +8 */ {703, 10},
528 | /* +9 */ {1471, 11},
529 | /* +10 */ {3007, 12},
530 | /* +11 */ {6079, 13},
531 | /* +12 */ {12223, 14},
532 | /* +13 */ {24511, 15}
533 | }
534 | };
535 |
536 | static void encode_huffman(struct bit_writer_context *bwc,
537 | float *innovation_out, int *bits_used_out, float *error_out,
538 | const float *innovation_in, int halved_innovation,
539 | int pow, int a, int z)
540 | {
541 | int interval = halved_innovation ? 2 : 1;
542 | float inn_gain;
543 | float total_error = 0.0f;
544 | int counter;
545 | int values[108];
546 | int zero_counts[108];
547 | int model;
548 | int bits_start, bits_end;
549 | int i;
550 |
551 | inn_gain = inn_gains[pow];
552 | if (!z)
553 | inn_gain *= 0.5f;
554 |
555 | bits_start = 8*bwc->pos + bwc->written_bits_count;
556 |
557 | if (halved_innovation)
558 | bwc_write_bits(bwc, pow | (a<<6) | (z<<7), 8);
559 | else
560 | bwc_write_bits(bwc, pow, 6);
561 |
562 | for (i = a; i < 108; i += interval) {
563 | float e;
564 |
565 | values[i] = ROUND(CLAMP(
566 | innovation_in[i]/inn_gain, -13.0f, 13.0f));
567 |
568 | innovation_out[i] = inn_gain*values[i];
569 |
570 | e = innovation_out[i] - innovation_in[i];
571 | total_error += e*e;
572 | }
573 |
574 | *error_out = total_error;
575 |
576 | /* Find the zero runs at each position i (how many zeros
577 | ** in a row there are at position i).
578 | ** When interval=2 and a=1, start the search from i=105 instead
579 | ** of 107 in order to duplicate the off-by-one mistake in the
580 | ** decoder. (Thus, we will subtract a instead of adding.)
581 | ** For details, see: http://wiki.niotso.org/UTK */
582 | counter = 0;
583 | for (i = 108 - interval - a; i >= 0; i -= interval) {
584 | if (values[i] == 0)
585 | counter++;
586 | else
587 | counter = 0;
588 | zero_counts[i] = counter;
589 | }
590 |
591 | i = a;
592 | model = 0;
593 | while (i < 108) {
594 | if (zero_counts[i] >= 7) {
595 | int length = MIN(zero_counts[i], 70);
596 |
597 | if (model == 0)
598 | bwc_write_bits(bwc, 255 | ((length-7)<<8), 14);
599 | else
600 | bwc_write_bits(bwc, 127 | ((length-7)<<7), 13);
601 |
602 | model = 0;
603 | i += length * interval;
604 | } else {
605 | int value = values[i];
606 |
607 | bwc_write_bits(bwc,
608 | huffman_models[model][13+value].bits_value,
609 | huffman_models[model][13+value].bits_count);
610 |
611 | model = (value < -1 || value > 1);
612 | i += interval;
613 | }
614 | }
615 |
616 | bits_end = 8*bwc->pos + bwc->written_bits_count;
617 | *bits_used_out = bits_end - bits_start;
618 | }
619 |
620 | static void encode_triangular(struct bit_writer_context *bwc,
621 | float *innovation_out, int *bits_used_out, float *error_out,
622 | const float *innovation_in, int halved_innovation,
623 | int pow, int a, int z)
624 | {
625 | int interval = halved_innovation ? 2 : 1;
626 | float inn_gain;
627 | float total_error = 0.0f;
628 | int bits_start, bits_end;
629 | int i;
630 |
631 | inn_gain = 2.0f*inn_gains[pow];
632 | if (!z)
633 | inn_gain *= 0.5f;
634 |
635 | bits_start = 8*bwc->pos + bwc->written_bits_count;
636 |
637 | if (halved_innovation)
638 | bwc_write_bits(bwc, pow | (a<<6) | (z<<7), 8);
639 | else
640 | bwc_write_bits(bwc, pow, 6);
641 |
642 | for (i = a; i < 108; i += interval) {
643 | float e;
644 | int value = ROUND(CLAMP(
645 | innovation_in[i]/inn_gain, -1.0f, 1.0f));
646 |
647 | if (value > 0)
648 | bwc_write_bits(bwc, 3, 2);
649 | else if (value < 0)
650 | bwc_write_bits(bwc, 1, 2);
651 | else
652 | bwc_write_bits(bwc, 0, 1);
653 |
654 | innovation_out[i] = inn_gain*value;
655 |
656 | e = innovation_out[i] - innovation_in[i];
657 | total_error += e*e;
658 | }
659 |
660 | bits_end = 8*bwc->pos + bwc->written_bits_count;
661 | *bits_used_out = bits_end - bits_start;
662 |
663 | *error_out = total_error;
664 | }
665 |
666 | static void low_pass_innovation(float *x, int a, int z)
667 | {
668 | /* Apply a weak low-pass filter to the innovation signal suitable for
669 | ** downsampling it by 1/2. Note that, since we are throwing out all
670 | ** x[m] samples where m != a+2*k for integer k, we only have to filter
671 | ** the x[n] samples where n = a+2*k. */
672 | int i;
673 |
674 | /* filter coeffs: (GNU Octave)
675 | ** n = 10; b = sinc((-n/4):.5:(n/4)).*hamming(n+9)(5:(n+5))' */
676 | for (i = a; i < 108; i+=2)
677 | x[i] = (z ? 1.0f : 0.5f)*(x[i]
678 | + (x[i-1]+x[i+1]) * 0.6189590521549956f
679 | + (x[i-3]+x[i+3]) * -0.1633990749076792f
680 | + (x[i-5]+x[i+5]) * 0.05858453198856907f);
681 | }
682 |
683 | struct innovation_encoding {
684 | struct bit_writer_context bwc;
685 | float innovation[108];
686 | int bits_used;
687 | float error;
688 | };
689 |
690 | static void encode_innovation(struct bit_writer_context *bwc,
691 | float *innovation, int halved_innovation, int use_huffman,
692 | int *bits_used, int target_bit_count)
693 | {
694 | int a = 0, z = 1;
695 | struct innovation_encoding encodings[2];
696 | int m = 0;
697 |
698 | if (halved_innovation) {
699 | find_a_z_flags(&a, &z, innovation);
700 | low_pass_innovation(innovation, a, z);
701 | }
702 |
703 | if (use_huffman) {
704 | /* Encode using the Huffman model. */
705 | int interval = halved_innovation ? 2 : 1;
706 | float max_value = 0.0f;
707 | int min_pow;
708 | int best_distance = 0;
709 | int pow;
710 | int i;
711 |
712 | /* Find the minimum innovation power such that the innovation
713 | ** signal doesn't clip anywhere in time. (We consider clipping
714 | ** a sample by <=0.5 of a quantization level to be okay since
715 | ** the sample already rounds down [towards zero].) */
716 | for (i = a; i < 108; i += interval) {
717 | float value = ABS(innovation[i]);
718 | if (value > max_value)
719 | max_value = value;
720 | }
721 | for (i = 62; i >= 0; i--) {
722 | if (inn_gains[i]*(!z ? 0.5f : 1.0f)*13.5f
723 | < max_value)
724 | break;
725 | }
726 | min_pow = i+1;
727 |
728 | /* Find the innovation gain that results in the closest
729 | ** to the target bitrate without clipping occurring. */
730 | for (pow = min_pow; pow <= 63; pow++) {
731 | int distance;
732 |
733 | bwc_init(&encodings[m].bwc, inn_buffers[m]);
734 | bwc_write_bits(&encodings[m].bwc, bwc->buffer[bwc->pos],
735 | bwc->written_bits_count);
736 |
737 | encode_huffman(&encodings[m].bwc,
738 | encodings[m].innovation,
739 | &encodings[m].bits_used,
740 | &encodings[m].error,
741 | innovation, halved_innovation,
742 | pow, a, z);
743 |
744 | distance = ABS(encodings[m].bits_used
745 | - target_bit_count);
746 | if (pow == min_pow || distance < best_distance) {
747 | best_distance = distance;
748 | m = !m; /* swap the buffers */
749 | }
750 | }
751 | } else {
752 | /* Encode using the triangular noise model. */
753 | float best_error = 0.0f;
754 | int pow;
755 |
756 | /* Find the innovation gain that results in
757 | ** the highest quality. */
758 | for (pow = 0; pow <= 63; pow++) {
759 | bwc_init(&encodings[m].bwc, inn_buffers[m]);
760 | bwc_write_bits(&encodings[m].bwc, bwc->buffer[bwc->pos],
761 | bwc->written_bits_count);
762 |
763 | encode_triangular(&encodings[m].bwc,
764 | encodings[m].innovation,
765 | &encodings[m].bits_used,
766 | &encodings[m].error,
767 | innovation, halved_innovation,
768 | pow, a, z);
769 |
770 | if (pow == 0 || encodings[m].error < best_error) {
771 | best_error = encodings[m].error;
772 | m = !m; /* swap the buffers */
773 | }
774 | }
775 | }
776 |
777 | /* Swap the buffers again to return back to our best encoding. */
778 | m = !m;
779 |
780 | /* Write this encoding out to the UTK bitstream. */
781 | memcpy(&bwc->buffer[bwc->pos], encodings[m].bwc.buffer,
782 | encodings[m].bwc.pos+1);
783 | bwc->pos += encodings[m].bwc.pos;
784 | bwc->written_bits_count = encodings[m].bwc.written_bits_count;
785 |
786 | /* Update the innovation signal with the quantized version. */
787 | memcpy(innovation, encodings[m].innovation, 108*sizeof(float));
788 | if (halved_innovation)
789 | interpolate(innovation, a, z);
790 |
791 | *bits_used = encodings[m].bits_used;
792 | }
793 |
794 | static int parse_arguments(int argc, char *argv[])
795 | {
796 | int c;
797 | int value;
798 | char *endptr;
799 |
800 | prog_name = (argc >= 1 && argv[0][0] != '\0') ? argv[0] : "utkencode";
801 |
802 | while ((c = getopt_long(argc, argv, short_options,
803 | long_options, NULL)) != -1) {
804 | switch (c) {
805 | case 'b':
806 | bitrate = (int)strtol(optarg, &endptr, 10);
807 | if (*endptr != '\0'
808 | || bitrate < 1000
809 | || bitrate > 1000000) {
810 | fprintf(stderr, "%s: invalid bitrate -- %s\n",
811 | prog_name, optarg);
812 | print_usage_error();
813 | return -1;
814 | }
815 | break;
816 | case 'f':
817 | force = 1;
818 | break;
819 | case 'q':
820 | quiet = 1;
821 | break;
822 | case 'h':
823 | print_help();
824 | return 1;
825 | case 'V':
826 | print_version();
827 | return 1;
828 | case 'H':
829 | halved_innovation = 1;
830 | break;
831 | case 'F':
832 | halved_innovation = 0;
833 | break;
834 | case 'T':
835 | huffman_threshold = (int)strtol(optarg, &endptr, 10);
836 | if (*endptr != '\0'
837 | || huffman_threshold < 16
838 | || huffman_threshold > 32) {
839 | fprintf(stderr, "%s: invalid Huffman "
840 | "threshold -- %s\n", prog_name, optarg);
841 | print_usage_error();
842 | return -1;
843 | }
844 | break;
845 | case 'S':
846 | inngain_sig = (int)strtol(optarg, &endptr, 10);
847 | if (*endptr != '\0'
848 | || inngain_sig < 8
849 | || inngain_sig > 128
850 | || (inngain_sig & 7) != 0) {
851 | fprintf(stderr, "%s: invalid innovation gain"
852 | " significand -- %s\n", prog_name,
853 | optarg);
854 | print_usage_error();
855 | return -1;
856 | }
857 | break;
858 | case 'B':
859 | if (optarg[0] != '1' || optarg[1] != '.'
860 | || (value = read_dec_places(optarg+2, 3)) < 0
861 | || value < 40
862 | || value > 103) {
863 | fprintf(stderr, "%s: invalid innovation gain"
864 | " base -- %s\n", prog_name, optarg);
865 | print_usage_error();
866 | return -1;
867 | }
868 | inngain_base = 1.0f + (float)value/1000.0f;
869 | break;
870 | default:
871 | print_usage_error();
872 | return -1;
873 | }
874 | }
875 |
876 | if (argc - optind == 0) {
877 | fprintf(stderr, "%s: missing infile\n", prog_name);
878 | print_usage_error();
879 | return -1;
880 | } else if (argc - optind == 1) {
881 | fprintf(stderr, "%s: missing outfile\n", prog_name);
882 | print_usage_error();
883 | return -1;
884 | } else if (argc - optind >= 3) {
885 | fprintf(stderr, "%s: too many arguments passed\n", prog_name);
886 | print_usage_error();
887 | return -1;
888 | }
889 |
890 | infile = argv[optind];
891 | outfile = argv[optind+1];
892 |
893 | return 0;
894 | }
895 |
896 | int main(int argc, char *argv[])
897 | {
898 | int ret;
899 | uint8_t wav_header[44];
900 | uint8_t utk_header[32];
901 | unsigned bytes_remaining;
902 | int sampling_rate;
903 | struct bit_writer_context bwc;
904 | int i, j;
905 |
906 | ret = parse_arguments(argc, argv);
907 | if (ret < 0)
908 | return EXIT_FAILURE;
909 | else if (ret > 0)
910 | return EXIT_SUCCESS;
911 |
912 | if (!strcmp(infile, "-")) {
913 | infp = stdin;
914 | } else {
915 | infp = fopen(infile, "rb");
916 | if (!infp) {
917 | fprintf(stderr, "%s: failed to open '%s' for"
918 | " reading: %s\n", prog_name, infile,
919 | strerror(errno));
920 | return EXIT_FAILURE;
921 | }
922 | }
923 | setvbuf(infp, NULL, _IOFBF, BUFSIZ);
924 |
925 | if (!strcmp(outfile, "-")) {
926 | outfp = stdout;
927 | } else {
928 | if (!force && file_exists(outfile)) {
929 | if (quiet) {
930 | fprintf(stderr, "%s: failed to open '%s' for"
931 | " writing: file already exists\n",
932 | prog_name, outfile);
933 | return EXIT_FAILURE;
934 | } else {
935 | fprintf(stderr, "%s: overwrite '%s'? ",
936 | prog_name, outfile);
937 | if (getchar() != 'y')
938 | return EXIT_FAILURE;
939 | }
940 | }
941 |
942 | outfp = fopen(outfile, "wb");
943 | if (!outfp) {
944 | fprintf(stderr, "%s: failed to open '%s' for"
945 | " writing: %s\n", prog_name, outfile,
946 | strerror(errno));
947 | return EXIT_FAILURE;
948 | }
949 | }
950 | setvbuf(outfp, NULL, _IOFBF, BUFSIZ);
951 |
952 | if (fread(wav_header, 1, 44, infp) != 44) {
953 | if (ferror(infp))
954 | fprintf(stderr, "%s: failed to read '%s': %s\n",
955 | prog_name, infile, strerror(errno));
956 | else
957 | fprintf(stderr, "%s: '%s' is not a valid wav file\n",
958 | prog_name, infile); /* (reached end of file) */
959 | return EXIT_FAILURE;
960 | }
961 |
962 | if (memcmp(wav_header, "RIFF", 4) != 0
963 | || memcmp(wav_header+8, "WAVEfmt ", 8) != 0) {
964 | fprintf(stderr, "%s: '%s' is not a valid wav file\n",
965 | prog_name, infile);
966 | return EXIT_FAILURE;
967 | }
968 |
969 | if (READ16(wav_header+20) != 1 /* wFormatTag */
970 | || READ16(wav_header+22) != 1 /* nChannels */
971 | || READ16(wav_header+32) != 2 /* nBlockAlign */
972 | || READ16(wav_header+34) != 16 /* wBitsPerSample */) {
973 | fprintf(stderr, "%s: wav file must be 1-channel 16-bit LPCM\n",
974 | prog_name);
975 | return EXIT_FAILURE;
976 | }
977 |
978 | sampling_rate = READ32(wav_header+24); /* nSamplesPerSec */
979 | if (sampling_rate < 1000 || sampling_rate > 1000000) {
980 | fprintf(stderr, "%s: unsupported sampling rate %d\n",
981 | prog_name, sampling_rate);
982 | return EXIT_FAILURE;
983 | }
984 |
985 | memcpy(utk_header, "UTM0", 4); /* sID */
986 |
987 | /* Drop the last byte from the wav file if there are an odd
988 | ** number of sample bytes. */
989 | bytes_remaining = READ32(wav_header+40) & (~1);
990 | WRITE32(utk_header+4, bytes_remaining); /* dwOutSize */
991 |
992 | WRITE32(utk_header+8, 20); /* dwWfxSize */
993 | memcpy(utk_header+12, wav_header+20, 16); /* WAVEFORMATEX */
994 | WRITE32(utk_header+28, 0); /* cbSize */
995 |
996 | write_data(outfp, utk_header, 32);
997 |
998 | bwc_init(&bwc, compressed_buffer);
999 |
1000 | bwc_write_bits(&bwc, halved_innovation, 1);
1001 | bwc_write_bits(&bwc, 32 - huffman_threshold, 4);
1002 | bwc_write_bits(&bwc, inngain_sig/8 - 1, 4);
1003 | bwc_write_bits(&bwc, ROUND((inngain_base - 1.04f)*1000.0f), 6);
1004 | bwc_flush(&bwc, outfp);
1005 |
1006 | for (i = 0; i < 12; i++)
1007 | input_samples[i] = 0.0f;
1008 | for (i = 0; i < 324; i++)
1009 | adaptive_codebook[i] = 0.0f;
1010 | for (i = 0; i < 12; i++)
1011 | prev_rc[i] = 0.0f;
1012 | for (i = 0; i < 5; i++)
1013 | innovation[i] = 0.0f;
1014 | for (i = 5+108; i < 5+108+5; i++)
1015 | innovation[i] = 0.0f;
1016 |
1017 | inn_gains[0] = inngain_sig;
1018 | for (i = 1; i < 64; i++)
1019 | inn_gains[i] = inn_gains[i-1]*inngain_base;
1020 |
1021 | while (bytes_remaining != 0) {
1022 | /* Encode the next frame of 432 samples. */
1023 | int bytes_to_read;
1024 | int samples_to_read;
1025 | float rc[12];
1026 | float rc_delta[12];
1027 | int use_huffman = 0;
1028 |
1029 | bytes_to_read = (int)MIN(bytes_remaining, 432*2);
1030 | samples_to_read = bytes_to_read >> 1;
1031 |
1032 | read_data(infp, wav_buffer, bytes_to_read);
1033 | bytes_remaining -= bytes_to_read;
1034 |
1035 | for (i = 0; i < samples_to_read; i++) {
1036 | int16_t x = READ16(wav_buffer+2*i);
1037 | input_samples[12+i] = (float)x;
1038 | }
1039 | for (i = samples_to_read; i < 432; i++)
1040 | input_samples[12+i] = 0.0f;
1041 |
1042 | find_rc(rc, input_samples+12);
1043 |
1044 | /* Quantize the reflection coefficients.
1045 | ** In our encoder, we will not make use of utk_rc_table[0]. */
1046 | for (i = 0; i < 4; i++) {
1047 | int idx = 1+quantize(rc[i], utk_rc_table+1, 63);
1048 | bwc_write_bits(&bwc, idx, 6);
1049 | rc[i] = utk_rc_table[idx];
1050 | if (i == 0 && idx < huffman_threshold)
1051 | use_huffman = 1;
1052 | }
1053 | for (i = 4; i < 12; i++) {
1054 | int idx = quantize(rc[i], utk_rc_table+16, 32);
1055 | bwc_write_bits(&bwc, idx, 5);
1056 | rc[i] = utk_rc_table[16+idx];
1057 | }
1058 |
1059 | for (i = 0; i < 12; i++)
1060 | rc_delta[i] = (rc[i] - prev_rc[i])/4.0f;
1061 |
1062 | memcpy(rc, prev_rc, 12*sizeof(float));
1063 |
1064 | for (i = 0; i < 4; i++) {
1065 | /* Linearly interpolate the reflection coefficients over
1066 | ** the four subframes and find the excitation signal. */
1067 | float lpc[12];
1068 |
1069 | for (j = 0; j < 12; j++)
1070 | rc[j] += rc_delta[j];
1071 |
1072 | rc_to_lpc(lpc, rc);
1073 |
1074 | find_excitation(adaptive_codebook+324+12*i,
1075 | input_samples+12+12*i,
1076 | i < 3 ? 12 : 396, lpc);
1077 | }
1078 |
1079 | memcpy(input_samples, &input_samples[432], 12*sizeof(float));
1080 | memcpy(prev_rc, rc, 12*sizeof(float));
1081 |
1082 | for (i = 0; i < 4; i++) {
1083 | /* Encode the i'th subframe. */
1084 | float *excitation = adaptive_codebook+324+108*i;
1085 | int pitch_lag;
1086 | float pitch_gain;
1087 | int idx;
1088 | int bits_used;
1089 |
1090 | find_pitch(&pitch_lag, &pitch_gain, excitation);
1091 |
1092 | bwc_write_bits(&bwc, pitch_lag - 108, 8);
1093 |
1094 | idx = ROUND(pitch_gain*15.0f);
1095 | bwc_write_bits(&bwc, idx, 4);
1096 | pitch_gain = (float)idx/15.0f;
1097 |
1098 | for (j = 0; j < 108; j++)
1099 | innovation[5+j] = excitation[j]
1100 | - pitch_gain*excitation[j-pitch_lag];
1101 |
1102 | encode_innovation(&bwc, &innovation[5],
1103 | halved_innovation, use_huffman, &bits_used,
1104 | ROUND(bitrate * 432 / sampling_rate / 4) - 18);
1105 |
1106 | /* Update the adaptive codebook using the quantized
1107 | ** innovation signal. */
1108 | for (j = 0; j < 108; j++)
1109 | excitation[j] = innovation[5+j]
1110 | + pitch_gain*excitation[j-pitch_lag];
1111 | }
1112 |
1113 | /* Copy the last 3 subframes to the beginning of the
1114 | ** adaptive codebook. */
1115 | memcpy(adaptive_codebook, &adaptive_codebook[432],
1116 | 324*sizeof(float));
1117 |
1118 | bwc_flush(&bwc, outfp);
1119 | }
1120 |
1121 | bwc_pad(&bwc);
1122 | bwc_flush(&bwc, outfp);
1123 |
1124 | flush_data(outfp);
1125 |
1126 | fclose(outfp);
1127 | fclose(infp);
1128 |
1129 | return EXIT_SUCCESS;
1130 | }
--------------------------------------------------------------------------------