├── README.md
├── UNLICENSE
├── eachunk.h
├── io.h
├── samples
    ├── DS1.M10
    ├── fifa2001-mt5.dat
    ├── fifa2001.dat
    └── male.utk
├── utk.h
├── utkdecode-bnb.c
├── utkdecode-fifa.c
├── utkdecode.c
└── utkencode.c


/README.md:
--------------------------------------------------------------------------------
 1 | ## Overview
 2 | 
 3 | EA MicroTalk (also UTalk or UTK) is a linear-predictive speech codec used in
 4 | various games by Electronic Arts. The earliest known game to use it is
 5 | Beasts & Bumpkins (1997). The codec has a bandwidth of 11.025kHz (sampling rate
 6 | 22.05kHz) and frame size of 20ms (432 samples) and only supports mono. It is
 7 | typically encoded at 32 kbit/s.
 8 | 
 9 | Docs: http://wiki.niotso.org/UTK
10 | 
11 | In this repository, I have created a set of open source (public domain
12 | via the UNLICENSE) MicroTalk decoders/encoders.
13 | 
14 | * Use utkdecode to decode Maxis UTK (The Sims Online, SimCity 4).
15 | * Use utkdecode-bnb to decode PT/M10 (Beasts & Bumpkins).
16 | * Use utkdecode-fifa to decode FIFA 2001/2002 (PS2) speech samples. This tool
17 |   supports regular MicroTalk and MicroTalk Revision 3
18 |   [SCxl files](https://wiki.multimedia.cx/index.php/Electronic_Arts_SCxl).(*)
19 | * Use utkencode to encode Maxis UTK. (This is the simplest container format and
20 |   is currently the only one supported for encoding.)
21 | 
22 | (*) I wasn't able to find any real-world MicroTalk Rev. 3 samples in any games.
23 | However, you can transcode a FIFA MicroTalk Rev. 2 file to Rev. 3 using
24 | [EA's Sound eXchange tool](https://wiki.multimedia.cx/index.php/Electronic_Arts_Sound_eXchange)
25 | (`sx -mt_blk input.dat -=output.dat`).
26 | 
27 | ## Compiling
28 | 
29 | ```
30 | gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkdecode utkdecode.c
31 | gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkdecode-fifa utkdecode-fifa.c
32 | gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkdecode-bnb utkdecode-bnb.c
33 | gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math -fwhole-program -g0 -s -static-libgcc -o utkencode utkencode.c
34 | ```
35 | 
36 | ## How the encoder works
37 | 
38 | The encoder for now is very simple. It does LPC analysis using the Levinson
39 | algorithm and transmits the entire excitation signal explicitly. Compression is
40 | achieved by choosing a large fixed codebook gain, such that each excitation
41 | sample has a large (coarse) quantization step size. Error is minimized in the
42 | excitation domain, and the quality is somewhat poor for bitrates below about
43 | 48 kbit/s.
44 | 
45 | However, MicroTalk is a multi-pulse codec (it is cheap to code long runs of
46 | zeros in the excitation signal). Hence, a much better design (and indeed the
47 | standard practice for multi-pulse speech codecs) is to search for the positions
48 | and amplitudes of n pulses such that error is minimized in the output domain
49 | (or the perceptually weighted domain). This new encoder is still in the works.


--------------------------------------------------------------------------------
/UNLICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org/>
25 | 


--------------------------------------------------------------------------------
/eachunk.h:
--------------------------------------------------------------------------------
 1 | typedef struct EAChunk {
 2 |     uint32_t type;
 3 |     uint8_t *start;
 4 |     uint8_t *ptr;
 5 |     uint8_t *end;
 6 | } EAChunk;
 7 | 
 8 | static void chunk_read_bytes(EAChunk *chunk, uint8_t *dest, size_t size)
 9 | {
10 |     size_t bytes_remaining = chunk->end - chunk->ptr;
11 | 
12 |     if (bytes_remaining < size) {
13 |         fprintf(stderr, "error: unexpected end of chunk\n");
14 |         exit(EXIT_FAILURE);
15 |     }
16 | 
17 |     memcpy(dest, chunk->ptr, size);
18 |     chunk->ptr += size;
19 | }
20 | 
21 | static uint32_t chunk_read_u32(EAChunk *chunk)
22 | {
23 |     uint8_t dest[4];
24 |     chunk_read_bytes(chunk, dest, sizeof(dest));
25 |     return dest[0] | (dest[1] << 8) | (dest[2] << 16) | (dest[3] << 24);
26 | }
27 | 
28 | static uint32_t chunk_read_u8(EAChunk *chunk)
29 | {
30 |     uint8_t dest;
31 |     chunk_read_bytes(chunk, &dest, sizeof(dest));
32 |     return dest;
33 | }
34 | 
35 | static uint32_t chunk_read_var_int(EAChunk *chunk)
36 | {
37 |     uint8_t dest[4];
38 |     uint8_t size = chunk_read_u8(chunk);
39 | 
40 |     if (size > 4) {
41 |         fprintf(stderr, "error: invalid varint size %u\n", (unsigned)size);
42 |         exit(EXIT_FAILURE);
43 |     }
44 | 
45 |     chunk_read_bytes(chunk, dest, size);
46 | 
47 |     /* read a big-endian integer of variable length */
48 |     switch (size) {
49 |     case 1: return dest[0];
50 |     case 2: return (dest[0]<<8) | dest[1];
51 |     case 3: return (dest[0]<<16) | (dest[1] << 8) | dest[2];
52 |     case 4: return (dest[0]<<24) | (dest[1] << 16) | (dest[2] << 8) | dest[3];
53 |     default: return 0;
54 |     }
55 | }
56 | 
57 | static EAChunk *read_chunk(FILE *fp)
58 | {
59 |     uint32_t size;
60 |     static EAChunk chunk;
61 |     static uint8_t buffer[4096];
62 | 
63 |     chunk.type = read_u32(fp);
64 | 
65 |     size = read_u32(fp);
66 |     if (size < 8 || size-8 > sizeof(buffer)) {
67 |         fprintf(stderr, "error: invalid chunk size %u\n", (unsigned)size);
68 |         exit(EXIT_FAILURE);
69 |     }
70 | 
71 |     size -= 8;
72 |     read_bytes(fp, buffer, size);
73 |     chunk.start = chunk.ptr = buffer;
74 |     chunk.end = buffer+size;
75 | 
76 |     return &chunk;
77 | }


--------------------------------------------------------------------------------
/io.h:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <errno.h>
 6 | 
 7 | static void read_bytes(FILE *fp, uint8_t *dest, size_t size)
 8 | {
 9 |     size_t bytes_copied;
10 | 
11 |     if (!size)
12 |         return;
13 | 
14 |     bytes_copied = fread(dest, 1, size, fp);
15 |     if (bytes_copied < size) {
16 |         if (ferror(fp))
17 |             fprintf(stderr, "error: fread failed: %s\n", strerror(errno));
18 |         else
19 |             fprintf(stderr, "error: unexpected end of file\n");
20 | 
21 |         exit(EXIT_FAILURE);
22 |     }
23 | }
24 | 
25 | static uint32_t read_u32(FILE *fp)
26 | {
27 |     uint8_t dest[4];
28 |     read_bytes(fp, dest, sizeof(dest));
29 |     return dest[0] | (dest[1] << 8) | (dest[2] << 16) | (dest[3] << 24);
30 | }
31 | 
32 | static uint16_t read_u16(FILE *fp)
33 | {
34 |     uint8_t dest[2];
35 |     read_bytes(fp, dest, sizeof(dest));
36 |     return dest[0] | (dest[1] << 8);
37 | }
38 | 
39 | static uint16_t read_u8(FILE *fp)
40 | {
41 |     uint8_t dest;
42 |     read_bytes(fp, &dest, sizeof(dest));
43 |     return dest;
44 | }
45 | 
46 | static void write_bytes(FILE *fp, const uint8_t *dest, size_t size)
47 | {
48 |     if (!size)
49 |         return;
50 | 
51 |     if (fwrite(dest, 1, size, fp) != size) {
52 |         fprintf(stderr, "error: fwrite failed: %s\n", strerror(errno));
53 |         exit(EXIT_FAILURE);
54 |     }
55 | }
56 | 
57 | static void write_u32(FILE *fp, uint32_t x)
58 | {
59 |     uint8_t dest[4];
60 |     dest[0] = (uint8_t)x;
61 |     dest[1] = (uint8_t)(x>>8);
62 |     dest[2] = (uint8_t)(x>>16);
63 |     dest[3] = (uint8_t)(x>>24);
64 |     write_bytes(fp, dest, sizeof(dest));
65 | }
66 | 
67 | static void write_u16(FILE *fp, uint16_t x)
68 | {
69 |     uint8_t dest[2];
70 |     dest[0] = (uint8_t)x;
71 |     dest[1] = (uint8_t)(x>>8);
72 |     write_bytes(fp, dest, sizeof(dest));
73 | }
74 | 
75 | static void write_u8(FILE *fp, uint8_t x)
76 | {
77 |     write_bytes(fp, &x, sizeof(x));
78 | }


--------------------------------------------------------------------------------
/samples/DS1.M10:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daddesio/utkencode/e0b13fac7a75fc5ef16851c165d3f31f0fe220b1/samples/DS1.M10


--------------------------------------------------------------------------------
/samples/fifa2001-mt5.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daddesio/utkencode/e0b13fac7a75fc5ef16851c165d3f31f0fe220b1/samples/fifa2001-mt5.dat


--------------------------------------------------------------------------------
/samples/fifa2001.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daddesio/utkencode/e0b13fac7a75fc5ef16851c165d3f31f0fe220b1/samples/fifa2001.dat


--------------------------------------------------------------------------------
/samples/male.utk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daddesio/utkencode/e0b13fac7a75fc5ef16851c165d3f31f0fe220b1/samples/male.utk


--------------------------------------------------------------------------------
/utk.h:
--------------------------------------------------------------------------------
  1 | #include <stdint.h>
  2 | #include <string.h>
  3 | 
  4 | /* Note: This struct assumes a member alignment of 4 bytes.
  5 | ** This matters when pitch_lag > 216 on the first subframe of any given frame. */
  6 | typedef struct UTKContext {
  7 |     FILE *fp;
  8 |     const uint8_t *ptr, *end;
  9 |     int parsed_header;
 10 |     unsigned int bits_value;
 11 |     int bits_count;
 12 |     int reduced_bw;
 13 |     int multipulse_thresh;
 14 |     float fixed_gains[64];
 15 |     float rc[12];
 16 |     float synth_history[12];
 17 |     float adapt_cb[324];
 18 |     float decompressed_frame[432];
 19 | } UTKContext;
 20 | 
 21 | enum {
 22 |     MDL_NORMAL = 0,
 23 |     MDL_LARGEPULSE = 1
 24 | };
 25 | 
 26 | static const float utk_rc_table[64] = {
 27 |     +0.0f,
 28 |     -.99677598476409912109375f, -.99032700061798095703125f, -.983879029750823974609375f, -.977430999279022216796875f,
 29 |     -.970982015132904052734375f, -.964533984661102294921875f, -.958085000514984130859375f, -.9516370296478271484375f,
 30 |     -.930754005908966064453125f, -.904959976673126220703125f, -.879167020320892333984375f, -.853372991085052490234375f,
 31 |     -.827579021453857421875f, -.801786005496978759765625f, -.775991976261138916015625f, -.75019800662994384765625f,
 32 |     -.724404990673065185546875f, -.6986110210418701171875f, -.6706349849700927734375f, -.61904799938201904296875f,
 33 |     -.567460000514984130859375f, -.515873014926910400390625f, -.4642859995365142822265625f, -.4126980006694793701171875f,
 34 |     -.361110985279083251953125f, -.309523999691009521484375f, -.257937014102935791015625f, -.20634900033473968505859375f,
 35 |     -.1547619998455047607421875f, -.10317499935626983642578125f, -.05158700048923492431640625f,
 36 |     +0.0f,
 37 |     +.05158700048923492431640625f, +.10317499935626983642578125f, +.1547619998455047607421875f, +.20634900033473968505859375f,
 38 |     +.257937014102935791015625f, +.309523999691009521484375f, +.361110985279083251953125f, +.4126980006694793701171875f,
 39 |     +.4642859995365142822265625f, +.515873014926910400390625f, +.567460000514984130859375f, +.61904799938201904296875f,
 40 |     +.6706349849700927734375f, +.6986110210418701171875f, +.724404990673065185546875f, +.75019800662994384765625f,
 41 |     +.775991976261138916015625f, +.801786005496978759765625f, +.827579021453857421875f, +.853372991085052490234375f,
 42 |     +.879167020320892333984375f, +.904959976673126220703125f, +.930754005908966064453125f, +.9516370296478271484375f,
 43 |     +.958085000514984130859375f, +.964533984661102294921875f, +.970982015132904052734375f, +.977430999279022216796875f,
 44 |     +.983879029750823974609375f, +.99032700061798095703125f, +.99677598476409912109375f
 45 | };
 46 | 
 47 | static const uint8_t utk_codebooks[2][256] = {
 48 |     { /* normal model */
 49 |         4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 17,
 50 |         4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 21,
 51 |         4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 18,
 52 |         4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 25,
 53 |         4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 17,
 54 |         4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 22,
 55 |         4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 18,
 56 |         4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5,  0,
 57 |         4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 17,
 58 |         4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 21,
 59 |         4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 18,
 60 |         4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 26,
 61 |         4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 17,
 62 |         4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5, 22,
 63 |         4,  6,  5,  9,  4,  6,  5, 13,  4,  6,  5, 10,  4,  6,  5, 18,
 64 |         4,  6,  5,  9,  4,  6,  5, 14,  4,  6,  5, 10,  4,  6,  5,  2
 65 |     }, { /* large-pulse model */
 66 |         4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 23,
 67 |         4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8, 27,
 68 |         4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 24,
 69 |         4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8,  1,
 70 |         4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 23,
 71 |         4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8, 28,
 72 |         4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 24,
 73 |         4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8,  3,
 74 |         4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 23,
 75 |         4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8, 27,
 76 |         4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 24,
 77 |         4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8,  1,
 78 |         4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 23,
 79 |         4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8, 28,
 80 |         4, 11,  7, 15,  4, 12,  8, 19,  4, 11,  7, 16,  4, 12,  8, 24,
 81 |         4, 11,  7, 15,  4, 12,  8, 20,  4, 11,  7, 16,  4, 12,  8,  3
 82 |     }
 83 | };
 84 | 
 85 | static const struct {
 86 |     int next_model;
 87 |     int code_size;
 88 |     float pulse_value;
 89 | } utk_commands[29] = {
 90 |     {MDL_LARGEPULSE, 8,  0.0f},
 91 |     {MDL_LARGEPULSE, 7,  0.0f},
 92 |     {MDL_NORMAL,     8,  0.0f},
 93 |     {MDL_NORMAL,     7,  0.0f},
 94 |     {MDL_NORMAL,     2,  0.0f},
 95 |     {MDL_NORMAL,     2, -1.0f},
 96 |     {MDL_NORMAL,     2, +1.0f},
 97 |     {MDL_NORMAL,     3, -1.0f},
 98 |     {MDL_NORMAL,     3, +1.0f},
 99 |     {MDL_LARGEPULSE, 4, -2.0f},
100 |     {MDL_LARGEPULSE, 4, +2.0f},
101 |     {MDL_LARGEPULSE, 3, -2.0f},
102 |     {MDL_LARGEPULSE, 3, +2.0f},
103 |     {MDL_LARGEPULSE, 5, -3.0f},
104 |     {MDL_LARGEPULSE, 5, +3.0f},
105 |     {MDL_LARGEPULSE, 4, -3.0f},
106 |     {MDL_LARGEPULSE, 4, +3.0f},
107 |     {MDL_LARGEPULSE, 6, -4.0f},
108 |     {MDL_LARGEPULSE, 6, +4.0f},
109 |     {MDL_LARGEPULSE, 5, -4.0f},
110 |     {MDL_LARGEPULSE, 5, +4.0f},
111 |     {MDL_LARGEPULSE, 7, -5.0f},
112 |     {MDL_LARGEPULSE, 7, +5.0f},
113 |     {MDL_LARGEPULSE, 6, -5.0f},
114 |     {MDL_LARGEPULSE, 6, +5.0f},
115 |     {MDL_LARGEPULSE, 8, -6.0f},
116 |     {MDL_LARGEPULSE, 8, +6.0f},
117 |     {MDL_LARGEPULSE, 7, -6.0f},
118 |     {MDL_LARGEPULSE, 7, +6.0f}
119 | };
120 | 
121 | static int utk_read_byte(UTKContext *ctx)
122 | {
123 |     if (ctx->ptr < ctx->end)
124 |         return *ctx->ptr++;
125 | 
126 |     if (ctx->fp) {
127 |         static uint8_t buffer[4096];
128 |         size_t bytes_copied = fread(buffer, 1, sizeof(buffer), ctx->fp);
129 |         if (bytes_copied > 0 && bytes_copied <= sizeof(buffer)) {
130 |             ctx->ptr = buffer;
131 |             ctx->end = buffer + bytes_copied;
132 |             return *ctx->ptr++;
133 |         }
134 |     }
135 | 
136 |     return 0;
137 | }
138 | 
139 | static int16_t utk_read_i16(UTKContext *ctx)
140 | {
141 |     int x = utk_read_byte(ctx);
142 |     x = (x << 8) | utk_read_byte(ctx);
143 |     return x;
144 | }
145 | 
146 | static int utk_read_bits(UTKContext *ctx, int count)
147 | {
148 |     int ret = ctx->bits_value & ((1 << count) - 1);
149 |     ctx->bits_value >>= count;
150 |     ctx->bits_count -= count;
151 | 
152 |     if (ctx->bits_count < 8) {
153 |         /* read another byte */
154 |         ctx->bits_value |= utk_read_byte(ctx) << ctx->bits_count;
155 |         ctx->bits_count += 8;
156 |     }
157 | 
158 |     return ret;
159 | }
160 | 
161 | static void utk_parse_header(UTKContext *ctx)
162 | {
163 |     int i;
164 |     float multiplier;
165 | 
166 |     ctx->reduced_bw = utk_read_bits(ctx, 1);
167 |     ctx->multipulse_thresh = 32 - utk_read_bits(ctx, 4);
168 |     ctx->fixed_gains[0] = 8.0f * (1 + utk_read_bits(ctx, 4));
169 |     multiplier = 1.04f + utk_read_bits(ctx, 6)*0.001f;
170 | 
171 |     for (i = 1; i < 64; i++)
172 |         ctx->fixed_gains[i] = ctx->fixed_gains[i-1] * multiplier;
173 | }
174 | 
175 | static void utk_decode_excitation(UTKContext *ctx, int use_multipulse, float *out, int stride)
176 | {
177 |     int i;
178 | 
179 |     if (use_multipulse) {
180 |         /* multi-pulse model: n pulses are coded explicitly; the rest are zero */
181 |         int model, cmd;
182 |         model = 0;
183 |         i = 0;
184 |         while (i < 108) {
185 |             cmd = utk_codebooks[model][ctx->bits_value & 0xff];
186 |             model = utk_commands[cmd].next_model;
187 |             utk_read_bits(ctx, utk_commands[cmd].code_size);
188 | 
189 |             if (cmd > 3) {
190 |                 /* insert a pulse with magnitude <= 6.0f */
191 |                 out[i] = utk_commands[cmd].pulse_value;
192 |                 i += stride;
193 |             } else if (cmd > 1) {
194 |                 /* insert between 7 and 70 zeros */
195 |                 int count = 7 + utk_read_bits(ctx, 6);
196 |                 if (i + count * stride > 108)
197 |                     count = (108 - i)/stride;
198 | 
199 |                 while (count > 0) {
200 |                     out[i] = 0.0f;
201 |                     i += stride;
202 |                     count--;
203 |                 }
204 |             } else {
205 |                 /* insert a pulse with magnitude >= 7.0f */
206 |                 int x = 7;
207 | 
208 |                 while (utk_read_bits(ctx, 1))
209 |                     x++;
210 | 
211 |                 if (!utk_read_bits(ctx, 1))
212 |                     x *= -1;
213 | 
214 |                 out[i] = (float)x;
215 |                 i += stride;
216 |             }
217 |         }
218 |     } else {
219 |         /* RELP model: entire residual (excitation) signal is coded explicitly */
220 |         i = 0;
221 |         while (i < 108) {
222 |             if (!utk_read_bits(ctx, 1))
223 |                 out[i] = 0.0f;
224 |             else if (!utk_read_bits(ctx, 1))
225 |                 out[i] = -2.0f;
226 |             else
227 |                 out[i] = 2.0f;
228 | 
229 |             i += stride;
230 |         }
231 |     }
232 | }
233 | 
234 | static void rc_to_lpc(const float *rc, float *lpc)
235 | {
236 |     int i, j;
237 |     float tmp1[12];
238 |     float tmp2[12];
239 | 
240 |     for (i = 10; i >= 0; i--)
241 |         tmp2[1+i] = rc[i];
242 | 
243 |     tmp2[0] = 1.0f;
244 | 
245 |     for (i = 0; i < 12; i++) {
246 |         float x = -tmp2[11] * rc[11];
247 | 
248 |         for (j = 10; j >= 0; j--) {
249 |             x -= tmp2[j] * rc[j];
250 |             tmp2[j+1] = x * rc[j] + tmp2[j];
251 |         }
252 | 
253 |         tmp1[i] = tmp2[0] = x;
254 | 
255 |         for (j = 0; j < i; j++)
256 |             x -= tmp1[i-1-j] * lpc[j];
257 | 
258 |         lpc[i] = x;
259 |     }
260 | }
261 | 
262 | static void utk_lp_synthesis_filter(UTKContext *ctx, int offset, int num_blocks)
263 | {
264 |     int i, j, k;
265 |     float lpc[12];
266 |     float *ptr = &ctx->decompressed_frame[offset];
267 | 
268 |     rc_to_lpc(ctx->rc, lpc);
269 | 
270 |     for (i = 0; i < num_blocks; i++) {
271 |         for (j = 0; j < 12; j++) {
272 |             float x = *ptr;
273 | 
274 |             for (k = 0; k < j; k++)
275 |                 x += lpc[k] * ctx->synth_history[k-j+12];
276 |             for (; k < 12; k++)
277 |                 x += lpc[k] * ctx->synth_history[k-j];
278 | 
279 |             ctx->synth_history[11-j] = x;
280 |             *ptr++ = x;
281 |         }
282 |     }
283 | }
284 | 
285 | /*
286 | ** Public functions.
287 | */
288 | 
289 | static void utk_decode_frame(UTKContext *ctx)
290 | {
291 |     int i, j;
292 |     int use_multipulse = 0;
293 |     float excitation[5+108+5];
294 |     float rc_delta[12];
295 | 
296 |     if (!ctx->bits_count) {
297 |         ctx->bits_value = utk_read_byte(ctx);
298 |         ctx->bits_count = 8;
299 |     }
300 | 
301 |     if (!ctx->parsed_header) {
302 |         utk_parse_header(ctx);
303 |         ctx->parsed_header = 1;
304 |     }
305 | 
306 |     memset(&excitation[0], 0, 5*sizeof(float));
307 |     memset(&excitation[5+108], 0, 5*sizeof(float));
308 | 
309 |     /* read the reflection coefficients */
310 |     for (i = 0; i < 12; i++) {
311 |         int idx;
312 |         if (i == 0) {
313 |             idx = utk_read_bits(ctx, 6);
314 |             if (idx < ctx->multipulse_thresh)
315 |                 use_multipulse = 1;
316 |         } else if (i < 4) {
317 |             idx = utk_read_bits(ctx, 6);
318 |         } else {
319 |             idx = 16 + utk_read_bits(ctx, 5);
320 |         }
321 | 
322 |         rc_delta[i] = (utk_rc_table[idx] - ctx->rc[i])*0.25f;
323 |     }
324 | 
325 |     /* decode four subframes */
326 |     for (i = 0; i < 4; i++) {
327 |         int pitch_lag = utk_read_bits(ctx, 8);
328 |         float pitch_gain = (float)utk_read_bits(ctx, 4)/15.0f;
329 |         float fixed_gain = ctx->fixed_gains[utk_read_bits(ctx, 6)];
330 | 
331 |         if (!ctx->reduced_bw) {
332 |             utk_decode_excitation(ctx, use_multipulse, &excitation[5], 1);
333 |         } else {
334 |             /* residual (excitation) signal is encoded at reduced bandwidth */
335 |             int align = utk_read_bits(ctx, 1);
336 |             int zero = utk_read_bits(ctx, 1);
337 | 
338 |             utk_decode_excitation(ctx, use_multipulse, &excitation[5+align], 2);
339 | 
340 |             if (zero) {
341 |                 /* fill the remaining samples with zero
342 |                 ** (spectrum is duplicated into high frequencies) */
343 |                 for (j = 0; j < 54; j++)
344 |                     excitation[5+(1-align)+2*j] = 0.0f;
345 |             } else {
346 |                 /* interpolate the remaining samples
347 |                 ** (spectrum is low-pass filtered) */
348 |                 float *ptr = &excitation[5+(1-align)];
349 |                 for (j = 0; j < 108; j += 2)
350 |                     ptr[j] =   ptr[j-5] * 0.01803267933428287506103515625f
351 |                              - ptr[j-3] * 0.114591561257839202880859375f
352 |                              + ptr[j-1] * 0.597385942935943603515625f
353 |                              + ptr[j+1] * 0.597385942935943603515625f
354 |                              - ptr[j+3] * 0.114591561257839202880859375f
355 |                              + ptr[j+5] * 0.01803267933428287506103515625f;
356 | 
357 |                 /* scale by 0.5f to give the sinc impulse response unit energy */
358 |                 fixed_gain *= 0.5f;
359 |             }
360 |         }
361 | 
362 |         for (j = 0; j < 108; j++)
363 |             ctx->decompressed_frame[108*i+j] =   fixed_gain * excitation[5+j]
364 |                                                + pitch_gain * ctx->adapt_cb[108*i+216-pitch_lag+j];
365 |     }
366 | 
367 |     for (i = 0; i < 324; i++)
368 |         ctx->adapt_cb[i] = ctx->decompressed_frame[108+i];
369 | 
370 |     for (i = 0; i < 4; i++) {
371 |         for (j = 0; j < 12; j++)
372 |             ctx->rc[j] += rc_delta[j];
373 | 
374 |         utk_lp_synthesis_filter(ctx, 12*i, i < 3 ? 1 : 33);
375 |     }
376 | }
377 | 
378 | static void utk_init(UTKContext *ctx)
379 | {
380 |     memset(ctx, 0, sizeof(*ctx));
381 | }
382 | 
383 | static void utk_set_fp(UTKContext *ctx, FILE *fp)
384 | {
385 |     ctx->fp = fp;
386 | 
387 |     /* reset the bit reader */
388 |     ctx->bits_count = 0;
389 | }
390 | 
391 | static void utk_set_ptr(UTKContext *ctx, const uint8_t *ptr, const uint8_t *end)
392 | {
393 |     ctx->ptr = ptr;
394 |     ctx->end = end;
395 | 
396 |     /* reset the bit reader */
397 |     ctx->bits_count = 0;
398 | }
399 | 
400 | /*
401 | ** MicroTalk Revision 3 decoding function.
402 | */
403 | 
404 | static void utk_rev3_decode_frame(UTKContext *ctx)
405 | {
406 |     int pcm_data_present = (utk_read_byte(ctx) == 0xee);
407 |     int i;
408 | 
409 |     utk_decode_frame(ctx);
410 | 
411 |     /* unread the last 8 bits and reset the bit reader */
412 |     ctx->ptr--;
413 |     ctx->bits_count = 0;
414 | 
415 |     if (pcm_data_present) {
416 |         /* Overwrite n samples at a given offset in the decoded frame with
417 |         ** raw PCM data. */
418 |         int offset = utk_read_i16(ctx);
419 |         int count = utk_read_i16(ctx);
420 | 
421 |         /* sx.exe does not do any bounds checking or clamping of these two
422 |         ** fields (see 004274D1 in sx.exe v3.01.01), which means a specially
423 |         ** crafted MT5:1 file can crash sx.exe.
424 |         ** We will throw an error instead. */
425 |         if (offset < 0 || offset > 432) {
426 |             fprintf(stderr, "error: invalid PCM offset %d\n", offset);
427 |             exit(EXIT_FAILURE);
428 |         }
429 |         if (count < 0 || count > 432 - offset) {
430 |             fprintf(stderr, "error: invalid PCM count %d\n", count);
431 |             exit(EXIT_FAILURE);
432 |         }
433 | 
434 |         for (i = 0; i < count; i++)
435 |             ctx->decompressed_frame[offset+i] = (float)utk_read_i16(ctx);
436 |     }
437 | }
438 | 


--------------------------------------------------------------------------------
/utkdecode-bnb.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | ** utkdecode-bnb
  3 | ** Decode Beasts & Bumpkins M10 to wav.
  4 | ** Authors: Andrew D'Addesio
  5 | ** License: Public domain
  6 | ** Compile: gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math
  7 | **          -fwhole-program -g0 -s -o utkdecode-bnb utkdecode-bnb.c
  8 | */
  9 | #include <stdint.h>
 10 | #include <stdio.h>
 11 | #include <stdlib.h>
 12 | #include <string.h>
 13 | #include <errno.h>
 14 | #include "utk.h"
 15 | #include "io.h"
 16 | #include "eachunk.h"
 17 | 
 18 | #define MAKE_U32(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24))
 19 | #define ROUND(x) ((x) >= 0.0f ? ((x)+0.5f) : ((x)-0.5f))
 20 | #define MIN(x,y) ((x)<(y)?(x):(y))
 21 | #define MAX(x,y) ((x)>(y)?(x):(y))
 22 | #define CLAMP(x,min,max) MIN(MAX(x,min),max)
 23 | 
 24 | typedef struct PTContext {
 25 |     FILE *infp, *outfp;
 26 |     uint32_t num_samples;
 27 |     uint32_t compression_type;
 28 |     UTKContext utk;
 29 | } PTContext;
 30 | 
 31 | static void pt_read_header(PTContext *pt)
 32 | {
 33 |     EAChunk *chunk = read_chunk(pt->infp);
 34 | 
 35 |     if ((chunk->type & 0xffff) != MAKE_U32('P','T','\x00','\x00')) {
 36 |         fprintf(stderr, "error: expected PT chunk\n");
 37 |         exit(EXIT_FAILURE);
 38 |     }
 39 | 
 40 |     while (1) {
 41 |         uint8_t cmd = chunk_read_u8(chunk);
 42 |         if (cmd == 0xFD) {
 43 |             while (1) {
 44 |                 uint8_t key = chunk_read_u8(chunk);
 45 |                 uint32_t value = chunk_read_var_int(chunk);
 46 | 
 47 |                 if (key == 0xFF)
 48 |                     break;
 49 |                 else if (key == 0x85)
 50 |                     pt->num_samples = value;
 51 |                 else if (key == 0x83)
 52 |                     pt->compression_type = value;
 53 |             }
 54 |             break;
 55 |         } else {
 56 |             chunk_read_var_int(chunk);
 57 |         }
 58 |     }
 59 | 
 60 |     if (pt->compression_type != 9) {
 61 |         fprintf(stderr, "error: invalid compression type %u (expected 9 for MicroTalk 10:1)\n",
 62 |                 (unsigned)pt->compression_type);
 63 |         exit(EXIT_FAILURE);
 64 |     }
 65 | 
 66 |     if (pt->num_samples >= 0x01000000) {
 67 |         fprintf(stderr, "error: invalid num_samples %u\n", pt->num_samples);
 68 |         exit(EXIT_FAILURE);
 69 |     }
 70 | 
 71 |     /* Initialize the decoder. */
 72 |     utk_init(&pt->utk);
 73 | 
 74 |     /* Write the WAV header. */
 75 |     write_u32(pt->outfp, MAKE_U32('R','I','F','F'));
 76 |     write_u32(pt->outfp, 36 + pt->num_samples*2);
 77 |     write_u32(pt->outfp, MAKE_U32('W','A','V','E'));
 78 |     write_u32(pt->outfp, MAKE_U32('f','m','t',' '));
 79 |     write_u32(pt->outfp, 16);
 80 |     write_u16(pt->outfp, 1);
 81 |     write_u16(pt->outfp, 1);
 82 |     write_u32(pt->outfp, 22050);
 83 |     write_u32(pt->outfp, 22050*2);
 84 |     write_u16(pt->outfp, 2);
 85 |     write_u16(pt->outfp, 16);
 86 |     write_u32(pt->outfp, MAKE_U32('d','a','t','a'));
 87 |     write_u32(pt->outfp, pt->num_samples*2);
 88 | }
 89 | 
 90 | static void pt_decode(PTContext *pt)
 91 | {
 92 |     UTKContext *utk = &pt->utk;
 93 |     uint32_t num_samples = pt->num_samples;
 94 | 
 95 |     utk_set_fp(utk, pt->infp);
 96 | 
 97 |     while (num_samples > 0) {
 98 |         int count = MIN(num_samples, 432);
 99 |         int i;
100 | 
101 |         utk_decode_frame(utk);
102 | 
103 |         for (i = 0; i < count; i++) {
104 |             int x = ROUND(pt->utk.decompressed_frame[i]);
105 |             write_u16(pt->outfp, (int16_t)CLAMP(x, -32768, 32767));
106 |         }
107 | 
108 |         num_samples -= count;
109 |     }
110 | }
111 | 
112 | int main(int argc, char *argv[])
113 | {
114 |     PTContext pt;
115 |     const char *infile, *outfile;
116 |     FILE *infp, *outfp;
117 |     int force = 0;
118 | 
119 |     /* Parse arguments. */
120 |     if (argc == 4 && !strcmp(argv[1], "-f")) {
121 |         force = 1;
122 |         argv++, argc--;
123 |     }
124 | 
125 |     if (argc != 3) {
126 |         printf("Usage: utkdecode-bnb [-f] infile outfile\n");
127 |         printf("Decode Beasts & Bumpkins M10 to wav.\n");
128 |         return EXIT_FAILURE;
129 |     }
130 | 
131 |     infile = argv[1];
132 |     outfile = argv[2];
133 | 
134 |     /* Open the input/output files. */
135 |     infp = fopen(infile, "rb");
136 |     if (!infp) {
137 |         fprintf(stderr, "error: failed to open '%s' for reading: %s\n", infile, strerror(errno));
138 |         return EXIT_FAILURE;
139 |     }
140 | 
141 |     if (!force && fopen(outfile, "rb")) {
142 |         fprintf(stderr, "error: '%s' already exists\n", outfile);
143 |         return EXIT_FAILURE;
144 |     }
145 | 
146 |     outfp = fopen(outfile, "wb");
147 |     if (!outfp) {
148 |         fprintf(stderr, "error: failed to create '%s': %s\n", outfile, strerror(errno));
149 |         return EXIT_FAILURE;
150 |     }
151 | 
152 |     memset(&pt, 0, sizeof(pt));
153 |     pt.infp = infp;
154 |     pt.outfp = outfp;
155 | 
156 |     pt_read_header(&pt);
157 |     pt_decode(&pt);
158 | 
159 |     if (fclose(outfp) != 0) {
160 |         fprintf(stderr, "error: failed to close '%s': %s\n", outfile, strerror(errno));
161 |         return EXIT_FAILURE;
162 |     }
163 | 
164 |     fclose(infp);
165 | 
166 |     return EXIT_SUCCESS;
167 | }


--------------------------------------------------------------------------------
/utkdecode-fifa.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | ** utkdecode-fifa
  3 | ** Decode FIFA 2001/2002 MicroTalk to wav.
  4 | ** Authors: Andrew D'Addesio
  5 | ** License: Public domain
  6 | ** Compile: gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math
  7 | **          -fwhole-program -g0 -s -o utkdecode-fifa utkdecode-fifa.c
  8 | */
  9 | #include <stdint.h>
 10 | #include <stdio.h>
 11 | #include <stdlib.h>
 12 | #include <string.h>
 13 | #include <errno.h>
 14 | #include "utk.h"
 15 | #include "io.h"
 16 | #include "eachunk.h"
 17 | 
 18 | #define MAKE_U32(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24))
 19 | #define ROUND(x) ((x) >= 0.0f ? ((x)+0.5f) : ((x)-0.5f))
 20 | #define MIN(x,y) ((x)<(y)?(x):(y))
 21 | #define MAX(x,y) ((x)>(y)?(x):(y))
 22 | #define CLAMP(x,min,max) MIN(MAX(x,min),max)
 23 | 
 24 | typedef struct EAContext {
 25 |     FILE *infp, *outfp;
 26 |     uint32_t audio_pos;
 27 |     uint32_t num_samples;
 28 |     uint32_t num_data_chunks;
 29 |     uint32_t compression_type;
 30 |     uint32_t codec_revision;
 31 |     UTKContext utk;
 32 | } EAContext;
 33 | 
 34 | static void ea_read_schl(EAContext *ea)
 35 | {
 36 |     uint32_t id;
 37 |     EAChunk *chunk = read_chunk(ea->infp);
 38 | 
 39 |     if (chunk->type != MAKE_U32('S','C','H','l')) {
 40 |         fprintf(stderr, "error: expected SCHl chunk\n");
 41 |         exit(EXIT_FAILURE);
 42 |     }
 43 | 
 44 |     id = chunk_read_u32(chunk);
 45 |     if ((id & 0xffff) != MAKE_U32('P','T','\x00','\x00')) {
 46 |         fprintf(stderr, "error: expected PT chunk in SCHl header\n");
 47 |         exit(EXIT_FAILURE);
 48 |     }
 49 | 
 50 |     while (1) {
 51 |         uint8_t cmd = chunk_read_u8(chunk);
 52 |         if (cmd == 0xFD) {
 53 |             while (1) {
 54 |                 uint8_t key = chunk_read_u8(chunk);
 55 |                 uint32_t value = chunk_read_var_int(chunk);
 56 | 
 57 |                 if (key == 0xFF)
 58 |                     break;
 59 |                 else if (key == 0x80)
 60 |                     ea->codec_revision = value;
 61 |                 else if (key == 0x85)
 62 |                     ea->num_samples = value;
 63 |                 else if (key == 0xA0)
 64 |                     ea->compression_type = value;
 65 |             }
 66 |             break;
 67 |         } else {
 68 |             chunk_read_var_int(chunk);
 69 |         }
 70 |     }
 71 | 
 72 |     if (ea->compression_type != 4 && ea->compression_type != 22) {
 73 |         fprintf(stderr, "error: invalid compression type %u (expected 4 for MicroTalk 10:1 or 22 for MicroTalk 5:1)\n",
 74 |                 (unsigned)ea->compression_type);
 75 |         exit(EXIT_FAILURE);
 76 |     }
 77 | 
 78 |     if (ea->num_samples >= 0x01000000) {
 79 |         fprintf(stderr, "error: invalid num_samples %u\n", ea->num_samples);
 80 |         exit(EXIT_FAILURE);
 81 |     }
 82 | 
 83 |     /* Initialize the decoder. */
 84 |     utk_init(&ea->utk);
 85 | 
 86 |     /* Write the WAV header. */
 87 |     write_u32(ea->outfp, MAKE_U32('R','I','F','F'));
 88 |     write_u32(ea->outfp, 36 + ea->num_samples*2);
 89 |     write_u32(ea->outfp, MAKE_U32('W','A','V','E'));
 90 |     write_u32(ea->outfp, MAKE_U32('f','m','t',' '));
 91 |     write_u32(ea->outfp, 16);
 92 |     write_u16(ea->outfp, 1);
 93 |     write_u16(ea->outfp, 1);
 94 |     write_u32(ea->outfp, 22050);
 95 |     write_u32(ea->outfp, 22050*2);
 96 |     write_u16(ea->outfp, 2);
 97 |     write_u16(ea->outfp, 16);
 98 |     write_u32(ea->outfp, MAKE_U32('d','a','t','a'));
 99 |     write_u32(ea->outfp, ea->num_samples*2);
100 | }
101 | 
102 | static void ea_read_sccl(EAContext *ea)
103 | {
104 |     EAChunk *chunk = read_chunk(ea->infp);
105 | 
106 |     if (chunk->type != MAKE_U32('S','C','C','l')) {
107 |         fprintf(stderr, "error: expected SCCl chunk\n");
108 |         exit(EXIT_FAILURE);
109 |     }
110 | 
111 |     ea->num_data_chunks = chunk_read_u32(chunk);
112 |     if (ea->num_data_chunks >= 0x01000000) {
113 |         fprintf(stderr, "error: invalid num_data_chunks %u\n", (unsigned)ea->num_data_chunks);
114 |         exit(EXIT_FAILURE);
115 |     }
116 | }
117 | 
118 | static void ea_read_scdl(EAContext *ea)
119 | {
120 |     EAChunk *chunk = read_chunk(ea->infp);
121 |     UTKContext *utk = &ea->utk;
122 |     uint32_t num_samples;
123 | 
124 |     if (chunk->type != MAKE_U32('S','C','D','l')) {
125 |         fprintf(stderr, "error: expected SCDl chunk\n");
126 |         exit(EXIT_FAILURE);
127 |     }
128 | 
129 |     num_samples = chunk_read_u32(chunk);
130 |     chunk_read_u32(chunk); /* unknown */
131 |     chunk_read_u8(chunk);  /* unknown */
132 | 
133 |     if (num_samples > ea->num_samples - ea->audio_pos)
134 |         num_samples = ea->num_samples - ea->audio_pos;
135 | 
136 |     utk_set_ptr(utk, chunk->ptr, chunk->end);
137 | 
138 |     while (num_samples > 0) {
139 |         int count = MIN(num_samples, 432);
140 |         int i;
141 | 
142 |         if (ea->codec_revision >= 3)
143 |             utk_rev3_decode_frame(utk);
144 |         else
145 |             utk_decode_frame(utk);
146 | 
147 |         for (i = 0; i < count; i++) {
148 |             int x = ROUND(ea->utk.decompressed_frame[i]);
149 |             write_u16(ea->outfp, (int16_t)CLAMP(x, -32768, 32767));
150 |         }
151 | 
152 |         ea->audio_pos += count;
153 |         num_samples -= count;
154 |     }
155 | }
156 | 
157 | static void ea_read_scel(const EAContext *ea)
158 | {
159 |     EAChunk *chunk = read_chunk(ea->infp);
160 | 
161 |     if (chunk->type != MAKE_U32('S','C','E','l')) {
162 |         fprintf(stderr, "error: expected SCEl chunk\n");
163 |         exit(EXIT_FAILURE);
164 |     }
165 | 
166 |     if (ea->audio_pos != ea->num_samples) {
167 |         fprintf(stderr, "error: failed to decode the correct number of samples\n");
168 |         exit(EXIT_FAILURE);
169 |     }
170 | }
171 | 
172 | int main(int argc, char *argv[])
173 | {
174 |     EAContext ea;
175 |     const char *infile, *outfile;
176 |     FILE *infp, *outfp;
177 |     int force = 0;
178 |     unsigned int i;
179 | 
180 |     if (argc == 4 && !strcmp(argv[1], "-f")) {
181 |         force = 1;
182 |         argv++, argc--;
183 |     }
184 | 
185 |     if (argc != 3) {
186 |         printf("Usage: utkdecode-fifa [-f] infile outfile\n");
187 |         printf("Decode FIFA 2001/2002 MicroTalk to wav.\n");
188 |         return EXIT_FAILURE;
189 |     }
190 | 
191 |     infile = argv[1];
192 |     outfile = argv[2];
193 | 
194 |     infp = fopen(infile, "rb");
195 |     if (!infp) {
196 |         fprintf(stderr, "error: failed to open '%s' for reading: %s\n", infile, strerror(errno));
197 |         return EXIT_FAILURE;
198 |     }
199 | 
200 |     if (!force && fopen(outfile, "rb")) {
201 |         fprintf(stderr, "error: '%s' already exists\n", outfile);
202 |         return EXIT_FAILURE;
203 |     }
204 | 
205 |     outfp = fopen(outfile, "wb");
206 |     if (!outfp) {
207 |         fprintf(stderr, "error: failed to create '%s': %s\n", outfile, strerror(errno));
208 |         return EXIT_FAILURE;
209 |     }
210 | 
211 |     memset(&ea, 0, sizeof(ea));
212 |     ea.infp = infp;
213 |     ea.outfp = outfp;
214 | 
215 |     ea_read_schl(&ea);
216 |     ea_read_sccl(&ea);
217 | 
218 |     for (i = 0; i < ea.num_data_chunks; i++)
219 |         ea_read_scdl(&ea);
220 | 
221 |     ea_read_scel(&ea);
222 | 
223 |     if (!outfp) {
224 |         fprintf(stderr, "error: failed to close '%s': %s\n", outfile, strerror(errno));
225 |         return EXIT_FAILURE;
226 |     }
227 | 
228 |     return EXIT_SUCCESS;
229 | }


--------------------------------------------------------------------------------
/utkdecode.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | ** utkdecode
  3 | ** Decode Maxis UTK to wav.
  4 | ** Authors: Andrew D'Addesio
  5 | ** License: Public domain
  6 | ** Compile: gcc -Wall -Wextra -Wno-unused-function -ansi -pedantic -O2 -ffast-math
  7 | **          -fwhole-program -g0 -s -o utkdecode utkdecode.c
  8 | */
  9 | #include <stdint.h>
 10 | #include <stdio.h>
 11 | #include <stdlib.h>
 12 | #include <string.h>
 13 | #include <errno.h>
 14 | #include "utk.h"
 15 | #include "io.h"
 16 | 
 17 | #define MAKE_U32(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24))
 18 | #define ROUND(x) ((x) >= 0.0f ? ((x)+0.5f) : ((x)-0.5f))
 19 | #define MIN(x,y) ((x)<(y)?(x):(y))
 20 | #define MAX(x,y) ((x)>(y)?(x):(y))
 21 | #define CLAMP(x,min,max) MIN(MAX(x,min),max)
 22 | 
 23 | int main(int argc, char *argv[])
 24 | {
 25 |     const char *infile, *outfile;
 26 |     UTKContext ctx;
 27 |     uint32_t sID;
 28 |     uint32_t dwOutSize;
 29 |     uint32_t dwWfxSize;
 30 |     uint16_t wFormatTag;
 31 |     uint16_t nChannels;
 32 |     uint32_t nSamplesPerSec;
 33 |     uint32_t nAvgBytesPerSec;
 34 |     uint16_t nBlockAlign;
 35 |     uint16_t wBitsPerSample;
 36 |     uint16_t cbSize;
 37 |     uint32_t num_samples;
 38 |     FILE *infp, *outfp;
 39 |     int force = 0;
 40 |     int error = 0;
 41 |     int i;
 42 | 
 43 |     /* Parse arguments. */
 44 |     if (argc == 4 && !strcmp(argv[1], "-f")) {
 45 |         force = 1;
 46 |         argv++, argc--;
 47 |     }
 48 | 
 49 |     if (argc != 3) {
 50 |         printf("Usage: utkdecode [-f] infile outfile\n");
 51 |         printf("Decode Maxis UTK to wav.\n");
 52 |         return EXIT_FAILURE;
 53 |     }
 54 | 
 55 |     infile = argv[1];
 56 |     outfile = argv[2];
 57 | 
 58 |     /* Open the input/output files. */
 59 |     infp = fopen(infile, "rb");
 60 |     if (!infp) {
 61 |         fprintf(stderr, "error: failed to open '%s' for reading: %s\n", infile, strerror(errno));
 62 |         return EXIT_FAILURE;
 63 |     }
 64 | 
 65 |     if (!force && fopen(outfile, "rb")) {
 66 |         fprintf(stderr, "error: '%s' already exists\n", outfile);
 67 |         return EXIT_FAILURE;
 68 |     }
 69 | 
 70 |     outfp = fopen(outfile, "wb");
 71 |     if (!outfp) {
 72 |         fprintf(stderr, "error: failed to create '%s': %s\n", outfile, strerror(errno));
 73 |         return EXIT_FAILURE;
 74 |     }
 75 | 
 76 |     /* Parse the UTK header. */
 77 |     sID = read_u32(infp);
 78 |     dwOutSize = read_u32(infp);
 79 |     dwWfxSize = read_u32(infp);
 80 |     wFormatTag = read_u16(infp);
 81 |     nChannels = read_u16(infp);
 82 |     nSamplesPerSec = read_u32(infp);
 83 |     nAvgBytesPerSec = read_u32(infp);
 84 |     nBlockAlign = read_u16(infp);
 85 |     wBitsPerSample = read_u16(infp);
 86 |     cbSize = read_u16(infp);
 87 |     read_u16(infp); /* padding */
 88 | 
 89 |     if (sID != MAKE_U32('U','T','M','0')) {
 90 |         fprintf(stderr, "error: not a valid UTK file (expected UTM0 signature)\n");
 91 |         return EXIT_FAILURE;
 92 |     } else if ((dwOutSize & 0x01) != 0 || dwOutSize >= 0x01000000) {
 93 |         fprintf(stderr, "error: invalid dwOutSize %u\n", (unsigned)dwOutSize);
 94 |         return EXIT_FAILURE;
 95 |     } else if (dwWfxSize != 20) {
 96 |         fprintf(stderr, "error: invalid dwWfxSize %u (expected 20)\n", (unsigned)dwWfxSize);
 97 |         return EXIT_FAILURE;
 98 |     } else if (wFormatTag != 1) {
 99 |         fprintf(stderr, "error: invalid wFormatTag %u (expected 1)\n", (unsigned)wFormatTag);
100 |         return EXIT_FAILURE;
101 |     }
102 | 
103 |     if (nChannels != 1) {
104 |         fprintf(stderr, "error: invalid nChannels %u (only mono is supported)\n", (unsigned)nChannels);
105 |         error = 1;
106 |     }
107 |     if (nSamplesPerSec < 8000 || nSamplesPerSec > 192000) {
108 |         fprintf(stderr, "error: invalid nSamplesPerSec %u\n", (unsigned)nSamplesPerSec);
109 |         error = 1;
110 |     }
111 |     if (nAvgBytesPerSec != nSamplesPerSec * nBlockAlign) {
112 |         fprintf(stderr, "error: invalid nAvgBytesPerSec %u (expected nSamplesPerSec * nBlockAlign)\n", (unsigned)nAvgBytesPerSec);
113 |         error = 1;
114 |     }
115 |     if (nBlockAlign != 2) {
116 |         fprintf(stderr, "error: invalid nBlockAlign %u (expected 2)\n", (unsigned)nBlockAlign);
117 |         error = 1;
118 |     }
119 |     if (wBitsPerSample != 16) {
120 |         fprintf(stderr, "error: invalid wBitsPerSample %u (expected 16)\n", (unsigned)wBitsPerSample);
121 |         error = 1;
122 |     }
123 |     if (cbSize != 0) {
124 |         fprintf(stderr, "error: invalid cbSize %u (expected 0)\n", (unsigned)cbSize);
125 |         error = 1;
126 |     }
127 |     if (error)
128 |         return EXIT_FAILURE;
129 | 
130 |     num_samples = dwOutSize/2;
131 | 
132 |     /* Write the WAV header. */
133 |     write_u32(outfp, MAKE_U32('R','I','F','F'));
134 |     write_u32(outfp, 36 + num_samples*2);
135 |     write_u32(outfp, MAKE_U32('W','A','V','E'));
136 |     write_u32(outfp, MAKE_U32('f','m','t',' '));
137 |     write_u32(outfp, 16);
138 |     write_u16(outfp, wFormatTag);
139 |     write_u16(outfp, nChannels);
140 |     write_u32(outfp, nSamplesPerSec);
141 |     write_u32(outfp, nAvgBytesPerSec);
142 |     write_u16(outfp, nBlockAlign);
143 |     write_u16(outfp, wBitsPerSample);
144 |     write_u32(outfp, MAKE_U32('d','a','t','a'));
145 |     write_u32(outfp, num_samples*2);
146 | 
147 |     /* Decode. */
148 |     utk_init(&ctx);
149 |     utk_set_fp(&ctx, infp);
150 | 
151 |     while (num_samples > 0) {
152 |         int count = MIN(num_samples, 432);
153 | 
154 |         utk_decode_frame(&ctx);
155 | 
156 |         for (i = 0; i < count; i++) {
157 |             int x = ROUND(ctx.decompressed_frame[i]);
158 |             write_u16(outfp, (int16_t)CLAMP(x, -32768, 32767));
159 |         }
160 | 
161 |         num_samples -= count;
162 |     }
163 | 
164 |     if (fclose(outfp) != 0) {
165 |         fprintf(stderr, "error: failed to close '%s': %s\n", outfile, strerror(errno));
166 |         return EXIT_FAILURE;
167 |     }
168 | 
169 |     fclose(infp);
170 | 
171 |     return EXIT_SUCCESS;
172 | }


--------------------------------------------------------------------------------
/utkencode.c:
--------------------------------------------------------------------------------
   1 | /*
   2 | ** utkencode
   3 | ** Encode wav to Maxis UTalk.
   4 | ** Authors: Fatbag
   5 | ** License: Public domain (no warranties)
   6 | ** Compile: gcc -Wall -Wextra -ansi -pedantic -O2 -ffast-math -g0 -s
   7 | **	-o utkencode utkencode.c
   8 | */
   9 | 
  10 | #include <errno.h>
  11 | #include <stdint.h>
  12 | #include <stdio.h>
  13 | #include <stdlib.h>
  14 | #include <string.h>
  15 | #include <getopt.h>
  16 | 
  17 | #define MIN(x,y) ((x)<(y)?(x):(y))
  18 | #define MAX(x,y) ((x)>(y)?(x):(y))
  19 | #define CLAMP(x,min,max) ((x)<(min)?(min):(x)>(max)?(max):(x))
  20 | #define ROUND(x) ((int)((x)>=0?((x)+0.5):((x)-0.5)))
  21 | #define ABS(x) ((x)>=0?(x):-(x))
  22 | 
  23 | #define READ16(x) ((x)[0]|((x)[1]<<8))
  24 | #define READ32(x) ((x)[0]|((x)[1]<<8)|((x)[2]<<16)|((x)[3]<<24))
  25 | 
  26 | #define WRITE16(d,s) (d)[0]=(uint8_t)(s),(d)[1]=(uint8_t)((s)>>8)
  27 | #define WRITE32(d,s) (d)[0]=(uint8_t)(s),(d)[1]=(uint8_t)((s)>>8),\
  28 | 	(d)[2]=(uint8_t)((s)>>16),(d)[3]=(uint8_t)((s)>>24)
  29 | 
  30 | const float utk_rc_table[64] = {
  31 | 	0,
  32 | 	-.99677598476409912109375, -.99032700061798095703125, -.983879029750823974609375, -.977430999279022216796875,
  33 | 	-.970982015132904052734375, -.964533984661102294921875, -.958085000514984130859375, -.9516370296478271484375,
  34 | 	-.930754005908966064453125, -.904959976673126220703125, -.879167020320892333984375, -.853372991085052490234375,
  35 | 	-.827579021453857421875, -.801786005496978759765625, -.775991976261138916015625, -.75019800662994384765625,
  36 | 	-.724404990673065185546875, -.6986110210418701171875, -.6706349849700927734375, -.61904799938201904296875,
  37 | 	-.567460000514984130859375, -.515873014926910400390625, -.4642859995365142822265625, -.4126980006694793701171875,
  38 | 	-.361110985279083251953125, -.309523999691009521484375, -.257937014102935791015625, -.20634900033473968505859375,
  39 | 	-.1547619998455047607421875, -.10317499935626983642578125, -.05158700048923492431640625,
  40 | 	0,
  41 | 	+.05158700048923492431640625, +.10317499935626983642578125, +.1547619998455047607421875, +.20634900033473968505859375,
  42 | 	+.257937014102935791015625, +.309523999691009521484375, +.361110985279083251953125, +.4126980006694793701171875,
  43 | 	+.4642859995365142822265625, +.515873014926910400390625, +.567460000514984130859375, +.61904799938201904296875,
  44 | 	+.6706349849700927734375, +.6986110210418701171875, +.724404990673065185546875, +.75019800662994384765625,
  45 | 	+.775991976261138916015625, +.801786005496978759765625, +.827579021453857421875, +.853372991085052490234375,
  46 | 	+.879167020320892333984375, +.904959976673126220703125, +.930754005908966064453125, +.9516370296478271484375,
  47 | 	+.958085000514984130859375, +.964533984661102294921875, +.970982015132904052734375, +.977430999279022216796875,
  48 | 	+.983879029750823974609375, +.99032700061798095703125, +.99677598476409912109375
  49 | };
  50 | 
  51 | static const char *prog_name;
  52 | 
  53 | static void print_help(void)
  54 | {
  55 | 	printf("Usage: %s [options] infile.wav outfile.utk\n", prog_name);
  56 | 	printf("Encode wav to Maxis UTalk.\n");
  57 | 	printf("\n");
  58 | 	printf("General options:\n");
  59 | 	printf("  -f, --force               overwrite without prompting\n");
  60 | 	printf("  -q, --quiet               suppress normal output and do not prompt\n");
  61 | 	printf("  -h, --help                display this help and exit\n");
  62 | 	printf("  -V, --version             output version information and exit\n");
  63 | 	printf("\n");
  64 | 	printf("Encoding options:\n");
  65 | 	printf("  -b, --bitrate=N           target bitrate in bits/sec (default 32000)\n");
  66 | 	printf("  -H, --halved-inn          encode innovation using half bandwidth\n");
  67 | 	printf("                            (default)\n");
  68 | 	printf("  -F, --full-inn            encode innovation using full bandwidth\n");
  69 | 	printf("  -T, --huff-threshold=N    use the Huffman codebook with threshold N where\n");
  70 | 	printf("                            N is an integer between 16 and 32 (inclusive)\n");
  71 | 	printf("                            (default 24)\n");
  72 | 	printf("  -S, --inngain-sig=N       use innovation gain significand N where N is\n");
  73 | 	printf("                            between 8 and 128 (inclusive) in steps of 8\n");
  74 | 	printf("                            (default 64)\n");
  75 | 	printf("  -B, --inngain-base=N      use innovation gain base N where N is between\n");
  76 | 	printf("                            1.040 and 1.103 (inclusive) in steps of 0.001\n");
  77 | 	printf("                            (default 1.068)\n");
  78 | 	printf("\n");
  79 | 	printf("If infile is \"-\", read from standard input.\n");
  80 | 	printf("If outfile is \"-\", write to standard output.\n");
  81 | }
  82 | 
  83 | static void print_version(void)
  84 | {
  85 | 	printf("utkencode 0.0\n");
  86 | }
  87 | 
  88 | static void print_usage_error(void)
  89 | {
  90 | 	fprintf(stderr, "Usage: %s [options] infile.wav outfile.utk\n",
  91 | 		prog_name);
  92 | 	fprintf(stderr, "Try '%s --help' for more options.\n", prog_name);
  93 | }
  94 | 
  95 | static const char short_options[] = "fqhVb:HFT:S:B:";
  96 | static const struct option long_options[] = {
  97 | 	{"force",          no_argument,       0, 'f'},
  98 | 	{"quiet",          no_argument,       0, 'q'},
  99 | 	{"help",           no_argument,       0, 'h'},
 100 | 	{"version",        no_argument,       0, 'V'},
 101 | 	{"bitrate",        required_argument, 0, 'b'},
 102 | 	{"halved-inn",     no_argument,       0, 'H'},
 103 | 	{"full-inn",       no_argument,       0, 'F'},
 104 | 	{"huff-threshold", required_argument, 0, 'T'},
 105 | 	{"inngain-sig",    required_argument, 0, 'S'},
 106 | 	{"inngain-base",   required_argument, 0, 'B'},
 107 | 	{0, 0, 0, 0}
 108 | };
 109 | 
 110 | static int bitrate = 32000;
 111 | static int force = 0;
 112 | static int quiet = 0;
 113 | static int halved_innovation = 1;
 114 | static int huffman_threshold = 24;
 115 | static int inngain_sig = 64;
 116 | static float inngain_base = 1.068f;
 117 | static const char *infile = "";
 118 | static const char *outfile = "";
 119 | static FILE *infp = NULL;
 120 | static FILE *outfp = NULL;
 121 | 
 122 | static uint8_t wav_buffer[432*2];
 123 | static float input_samples[12+432];
 124 | static float adaptive_codebook[324+432];
 125 | static uint8_t compressed_buffer[1024];
 126 | static uint8_t inn_buffers[2][256];
 127 | static float prev_rc[12];
 128 | static float innovation[5+108+5];
 129 | static float inn_gains[64];
 130 | 
 131 | struct bit_writer_context {
 132 | 	uint8_t written_bits_count;
 133 | 	size_t pos;
 134 | 	uint8_t *buffer;
 135 | };
 136 | 
 137 | static void read_data(FILE *fp, uint8_t *buffer, size_t size)
 138 | {
 139 | 	if (fread(buffer, 1, size, fp) != size) {
 140 | 		fprintf(stderr, "%s: failed to read '%s': %s\n",
 141 | 			prog_name, infile, ferror(fp)
 142 | 			? strerror(errno) : "reached end of file");
 143 | 		exit(EXIT_FAILURE);
 144 | 	}
 145 | }
 146 | 
 147 | static void write_data(FILE *fp, const uint8_t *buffer, size_t size)
 148 | {
 149 | 	if (fwrite(buffer, 1, size, fp) != size) {
 150 | 		fprintf(stderr, "%s: failed to write to '%s': %s\n",
 151 | 			prog_name, outfile, ferror(fp)
 152 | 			? strerror(errno) : "reached end of file");
 153 | 		exit(EXIT_FAILURE);
 154 | 	}
 155 | }
 156 | 
 157 | static void flush_data(FILE *fp)
 158 | {
 159 | 	if (fflush(fp) != 0) {
 160 | 		fprintf(stderr, "%s: failed to flush '%s': %s\n",
 161 | 			prog_name, outfile, strerror(errno));
 162 | 		exit(EXIT_FAILURE);
 163 | 	}
 164 | }
 165 | 
 166 | static void bwc_init(struct bit_writer_context *bwc, uint8_t *buffer)
 167 | {
 168 | 	bwc->written_bits_count = 0;
 169 | 	bwc->pos = 0;
 170 | 	bwc->buffer = buffer;
 171 | 	bwc->buffer[0] = 0;
 172 | }
 173 | 
 174 | static void bwc_write_bits(struct bit_writer_context *bwc, unsigned value,
 175 | 	uint8_t count)
 176 | {
 177 | 	unsigned x = value << bwc->written_bits_count;
 178 | 
 179 | 	bwc->buffer[bwc->pos] |= (uint8_t)x;
 180 | 	bwc->written_bits_count += count;
 181 | 
 182 | 	while (bwc->written_bits_count >= 8) {
 183 | 		x >>= 8;
 184 | 		bwc->buffer[++bwc->pos] = (uint8_t)x;
 185 | 		bwc->written_bits_count -= 8;
 186 | 	}
 187 | }
 188 | 
 189 | static void bwc_pad(struct bit_writer_context *bwc)
 190 | {
 191 | 	if (bwc->written_bits_count != 0) {
 192 | 		bwc->buffer[++bwc->pos] = 0;
 193 | 		bwc->written_bits_count = 0;
 194 | 	}
 195 | }
 196 | 
 197 | static void bwc_flush(struct bit_writer_context *bwc, FILE *fp)
 198 | {
 199 | 	write_data(fp, bwc->buffer, bwc->pos);
 200 | 	bwc->buffer[0] = bwc->buffer[bwc->pos];
 201 | 	bwc->pos = 0;
 202 | }
 203 | 
 204 | static unsigned quantize(float value, const float *alphabet, size_t alphabet_size)
 205 | {
 206 | 	unsigned i;
 207 | 	unsigned min_idx = 0;
 208 | 	float min_distance = ABS(value - alphabet[0]);
 209 | 
 210 | 	for (i = 1; i < alphabet_size; i++) {
 211 | 		float distance = ABS(value - alphabet[i]);
 212 | 
 213 | 		if (distance < min_distance) {
 214 | 			min_distance = distance;
 215 | 			min_idx = i;
 216 | 		}
 217 | 	}
 218 | 
 219 | 	return min_idx;
 220 | }
 221 | 
 222 | /* used in the parsing of some arguments */
 223 | static int read_dec_places(const char *string, int n)
 224 | {
 225 | 	int i;
 226 | 	int value = 0;
 227 | 	int pows_10[10];
 228 | 
 229 | 	pows_10[0] = 1;
 230 | 	for (i = 1; i < n; i++)
 231 | 		pows_10[i] = pows_10[i-1] * 10;
 232 | 
 233 | 	for (i = 0; i < n && string[i] >= '0' && string[i] <= '9'; i++)
 234 | 		value += pows_10[n-1-i] * (string[i]-'0');
 235 | 
 236 | 	return (string[i] == '\0') ? value : -1;
 237 | }
 238 | 
 239 | static int file_exists(const char *filename)
 240 | {
 241 | 	FILE *fp;
 242 | 
 243 | 	fp = fopen(filename, "rb");
 244 | 	if (fp) {
 245 | 		fclose(fp);
 246 | 		return 1;
 247 | 	}
 248 | 
 249 | 	return 0;
 250 | }
 251 | 
 252 | static void find_autocorrelations(float *r, const float *samples)
 253 | {
 254 | 	int i, j;
 255 | 
 256 | 	for (i = 0; i < 13; i++) {
 257 | 		r[i] = 0;
 258 | 		for (j = 0; j < 432 - i; j++)
 259 | 			r[i] += samples[j]*samples[j+i];
 260 | 	}
 261 | }
 262 | 
 263 | static void levinson_durbin_symmetric(float *x, float *k,
 264 |     const float *r, const float *y)
 265 | {
 266 | 	float a[12]; /* the forward vector */
 267 | 	float e; /* prediction error */
 268 | 	int i;
 269 | 
 270 | 	if (r[0] <= 1.0f/32768.0f && r[0] >= -1.0f/32768.0f)
 271 | 		goto zero;
 272 | 
 273 | 	a[0] = 1;
 274 | 	e = r[0];
 275 | 	x[0] = y[0]/r[0];
 276 | 
 277 | 	for (i = 1; i < 12; i++) {
 278 | 		float u, m;
 279 | 		float a_temp[12];
 280 | 		int j;
 281 | 
 282 | 		u = 0.0f;
 283 | 		for (j = 0; j < i; j++)
 284 | 			u += a[j]*r[i-j];
 285 | 
 286 | 		k[i-1] = -u/e; /* reflection coefficient i-1 */
 287 | 		e += u*k[i-1]; /* update e to the new value e - u*u/e */
 288 | 
 289 | 		if (e <= 1.0f/32768.0f && e >= -1.0f/32768.0f)
 290 | 			goto zero;
 291 | 
 292 | 		memcpy(a_temp, a, i*sizeof(float));
 293 | 		a[i] = 0.0f;
 294 | 		for (j = 1; j <= i; j++)
 295 | 			a[j] += k[i-1]*a_temp[i-j];
 296 | 
 297 | 		m = y[i];
 298 | 		for (j = 0; j < i; j++)
 299 | 			m -= x[j]*r[i-j];
 300 | 		m /= e;
 301 | 
 302 | 		x[i] = 0.0f;
 303 | 		for (j = 0; j <= i; j++)
 304 | 			x[j] += m*a[i-j];
 305 | 	}
 306 | 
 307 | 	k[11] = -x[11];
 308 | 
 309 | 	return;
 310 | 
 311 | zero:
 312 | 	for (i = 0; i < 12; i++)
 313 | 		x[i] = 0.0f;
 314 | 	for (i = 0; i < 12; i++)
 315 | 		k[i] = 0.0f;
 316 | }
 317 | 
 318 | static void rc_to_lpc(float *x, const float *k)
 319 | {
 320 | 	float a[13]; /* the forward vector */
 321 | 	unsigned i, j;
 322 | 	a[0] = 1;
 323 | 
 324 | 	for (i = 1; i < 13; i++) {
 325 | 		float a_temp[12];
 326 | 		memcpy(a_temp, a, i*sizeof(float));
 327 | 		a[i] = 0.0f;
 328 | 		for (j = 1; j <= i; j++)
 329 | 			a[j] += k[i-1]*a_temp[i-j];
 330 | 	}
 331 | 
 332 | 	for (i = 1; i < 13; i++)
 333 | 		x[i-1] = -a[i];
 334 | }
 335 | 
 336 | static void find_rc(float *rc, const float *samples)
 337 | {
 338 | 	float r[13];
 339 | 	float lpc[12];
 340 | 	find_autocorrelations(r, samples);
 341 | 	levinson_durbin_symmetric(lpc, rc, r, r+1);
 342 | }
 343 | 
 344 | static void find_excitation(float *excitation, const float *source,
 345 | 	int length, const float *lpc)
 346 | {
 347 | 	int i, j;
 348 | 
 349 | 	for (i = 0; i < length; i++) {
 350 | 		float prediction = 0.0f;
 351 | 		for (j = 0; j < 12; j++)
 352 | 			prediction += lpc[j]*source[i-1-j];
 353 | 		excitation[i] = source[i] - prediction;
 354 | 	}
 355 | }
 356 | 
 357 | static void find_pitch(int *pitch_lag, float *pitch_gain,
 358 | 	const float *excitation)
 359 | {
 360 | 	int max_corr_offset = 108;
 361 | 	float max_corr_value = 0.0f;
 362 | 	float history_energy;
 363 | 	float gain;
 364 | 	int i, j;
 365 | 
 366 | 	/* Find the optimal pitch lag. */
 367 | 	for (i = 108; i < 324; i++) {
 368 | 		float corr = 0.0f;
 369 | 		for (j = 0; j < 108; j++)
 370 | 			corr += excitation[j]*excitation[j-i];
 371 | 		if (corr > max_corr_value) {
 372 | 			max_corr_offset = i;
 373 | 			max_corr_value = corr;
 374 | 		}
 375 | 	}
 376 | 
 377 | 	/* Find the optimal pitch gain. */
 378 | 	history_energy = 0.0f;
 379 | 	for (i = 0; i < 108; i++) {
 380 | 		float value = excitation[i-max_corr_offset];
 381 | 		history_energy += value*value;
 382 | 	}
 383 | 
 384 | 	if (history_energy >= 1/32768.0f) {
 385 | 		gain = max_corr_value / history_energy;
 386 | 		gain = CLAMP(gain, 0.0f, 1.0f);
 387 | 
 388 | 		*pitch_lag = max_corr_offset;
 389 | 		*pitch_gain = gain;
 390 | 	} else {
 391 | 		*pitch_lag = 108;
 392 | 		*pitch_gain = 0.0f;
 393 | 	}
 394 | }
 395 | 
 396 | static void interpolate(float *x, int a, int z)
 397 | {
 398 | 	int i;
 399 | 
 400 | 	if (z) {
 401 | 		for (i = !a; i < 108; i+=2)
 402 | 			x[i] = 0.0f;
 403 | 	} else {
 404 | 		for (i = !a; i < 108; i+=2)
 405 | 			x[i]
 406 | 				= (x[i-1]+x[i+1]) * .5973859429f
 407 | 				- (x[i-3]+x[i+3]) * .1145915613f
 408 | 				+ (x[i-5]+x[i+5]) * .0180326793f;
 409 | 	}
 410 | }
 411 | 
 412 | static float interpolation_error(int a, int z, const float *x)
 413 | {
 414 | 	float error = 0.0f;
 415 | 	int i;
 416 | 
 417 | 	if (z) {
 418 | 		for (i = !a; i < 108; i+=2)
 419 | 			error += x[i]*x[i];
 420 | 	} else {
 421 | 		for (i = !a; i < 108; i+=2) {
 422 | 			float prediction
 423 | 				= (x[i-1]+x[i+1]) * .5973859429f
 424 | 				- (x[i-3]+x[i+3]) * .1145915613f
 425 | 				+ (x[i-5]+x[i+5]) * .0180326793f;
 426 | 			error += (prediction - x[i])*(prediction - x[i]);
 427 | 		}
 428 | 	}
 429 | 
 430 | 	return error;
 431 | }
 432 | 
 433 | static void find_a_z_flags(int *a, int *z, const float *innovation)
 434 | {
 435 | 	/* Find the a and z flags such that the least error is introduced
 436 | 	** in the downsampling step. In case of a tie (e.g. in silence),
 437 | 	** prefer using the zero flag. Thus, we will test in the order:
 438 | 	** (a=0,z=1), (a=1,z=1), (a=0,z=0), (a=1,z=1). */
 439 | 	float error;
 440 | 	float best_error;
 441 | 	int best_a = 0, best_z = 1;
 442 | 
 443 | 	best_error = interpolation_error(0, 1, innovation);
 444 | 
 445 | 	error = interpolation_error(1, 1, innovation);
 446 | 	if (error < best_error) {
 447 | 		best_error = error;
 448 | 		best_a = 1, best_z = 1;
 449 | 	}
 450 | 
 451 | 	error = interpolation_error(0, 0, innovation);
 452 | 	if (error < best_error) {
 453 | 		best_error = error;
 454 | 		best_a = 0, best_z = 0;
 455 | 	}
 456 | 
 457 | 	error = interpolation_error(1, 0, innovation);
 458 | 	if (error < best_error) {
 459 | 		best_error = error;
 460 | 		best_a = 1, best_z = 0;
 461 | 	}
 462 | 
 463 | 	*a = best_a;
 464 | 	*z = best_z;
 465 | }
 466 | 
 467 | struct huffman_code {
 468 | 	uint16_t bits_value;
 469 | 	uint16_t bits_count;
 470 | };
 471 | 
 472 | static const struct huffman_code huffman_models[2][13+1+13] = {
 473 | 	/* model 0 */
 474 | 	{
 475 | 		/* -13 */ {16255, 16},
 476 | 		/* -12 */ {8063, 15},
 477 | 		/* -11 */ {3967, 14},
 478 | 		/* -10 */ {1919, 13},
 479 | 		/*  -9 */ {895, 12},
 480 | 		/*  -8 */ {383, 11},
 481 | 		/*  -7 */ {127, 10},
 482 | 		/*  -6 */ {63, 8},
 483 | 		/*  -5 */ {31, 7},
 484 | 		/*  -4 */ {15, 6},
 485 | 		/*  -3 */ {7, 5},
 486 | 		/*  -2 */ {3, 4},
 487 | 		/*  -1 */ {2, 2},
 488 | 		/*   0 */ {0, 2},
 489 | 		/*  +1 */ {1, 2},
 490 | 		/*  +2 */ {11, 4},
 491 | 		/*  +3 */ {23, 5},
 492 | 		/*  +4 */ {47, 6},
 493 | 		/*  +5 */ {95, 7},
 494 | 		/*  +6 */ {191, 8},
 495 | 		/*  +7 */ {639, 10},
 496 | 		/*  +8 */ {1407, 11},
 497 | 		/*  +9 */ {2943, 12},
 498 | 		/* +10 */ {6015, 13},
 499 | 		/* +11 */ {12159, 14},
 500 | 		/* +12 */ {24447, 15},
 501 | 		/* +13 */ {49023, 16}
 502 | 	},
 503 | 
 504 | 	/* model 1 */
 505 | 	{
 506 | 		/* -13 */ {8127, 15},
 507 | 		/* -12 */ {4031, 14},
 508 | 		/* -11 */ {1983, 13},
 509 | 		/* -10 */ {959, 12},
 510 | 		/*  -9 */ {447, 11},
 511 | 		/*  -8 */ {191, 10},
 512 | 		/*  -7 */ {63, 9},
 513 | 		/*  -6 */ {31, 7},
 514 | 		/*  -5 */ {15, 6},
 515 | 		/*  -4 */ {7, 5},
 516 | 		/*  -3 */ {3, 4},
 517 | 		/*  -2 */ {1, 3},
 518 | 		/*  -1 */ {2, 3},
 519 | 		/*   0 */ {0, 2},
 520 | 		/*  +1 */ {6, 3},
 521 | 		/*  +2 */ {5, 3},
 522 | 		/*  +3 */ {11, 4},
 523 | 		/*  +4 */ {23, 5},
 524 | 		/*  +5 */ {47, 6},
 525 | 		/*  +6 */ {95, 7},
 526 | 		/*  +7 */ {319, 9},
 527 | 		/*  +8 */ {703, 10},
 528 | 		/*  +9 */ {1471, 11},
 529 | 		/* +10 */ {3007, 12},
 530 | 		/* +11 */ {6079, 13},
 531 | 		/* +12 */ {12223, 14},
 532 | 		/* +13 */ {24511, 15}
 533 | 	}
 534 | };
 535 | 
 536 | static void encode_huffman(struct bit_writer_context *bwc,
 537 | 	float *innovation_out, int *bits_used_out, float *error_out,
 538 | 	const float *innovation_in, int halved_innovation,
 539 | 	int pow, int a, int z)
 540 | {
 541 | 	int interval = halved_innovation ? 2 : 1;
 542 | 	float inn_gain;
 543 | 	float total_error = 0.0f;
 544 | 	int counter;
 545 | 	int values[108];
 546 | 	int zero_counts[108];
 547 | 	int model;
 548 | 	int bits_start, bits_end;
 549 | 	int i;
 550 | 
 551 | 	inn_gain = inn_gains[pow];
 552 | 	if (!z)
 553 | 		inn_gain *= 0.5f;
 554 | 
 555 | 	bits_start = 8*bwc->pos + bwc->written_bits_count;
 556 | 
 557 | 	if (halved_innovation)
 558 | 		bwc_write_bits(bwc, pow | (a<<6) | (z<<7), 8);
 559 | 	else
 560 | 		bwc_write_bits(bwc, pow, 6);
 561 | 
 562 | 	for (i = a; i < 108; i += interval) {
 563 | 		float e;
 564 | 
 565 | 		values[i] = ROUND(CLAMP(
 566 | 			innovation_in[i]/inn_gain, -13.0f, 13.0f));
 567 | 
 568 | 		innovation_out[i] = inn_gain*values[i];
 569 | 
 570 | 		e = innovation_out[i] - innovation_in[i];
 571 | 		total_error += e*e;
 572 | 	}
 573 | 
 574 | 	*error_out = total_error;
 575 | 
 576 | 	/* Find the zero runs at each position i (how many zeros
 577 | 	** in a row there are at position i).
 578 | 	** When interval=2 and a=1, start the search from i=105 instead
 579 | 	** of 107 in order to duplicate the off-by-one mistake in the
 580 | 	** decoder. (Thus, we will subtract a instead of adding.)
 581 | 	** For details, see: http://wiki.niotso.org/UTK */
 582 | 	counter = 0;
 583 | 	for (i = 108 - interval - a; i >= 0; i -= interval) {
 584 | 		if (values[i] == 0)
 585 | 			counter++;
 586 | 		else
 587 | 			counter = 0;
 588 | 		zero_counts[i] = counter;
 589 | 	}
 590 | 
 591 | 	i = a;
 592 | 	model = 0;
 593 | 	while (i < 108) {
 594 | 		if (zero_counts[i] >= 7) {
 595 | 			int length = MIN(zero_counts[i], 70);
 596 | 
 597 | 			if (model == 0)
 598 | 				bwc_write_bits(bwc, 255 | ((length-7)<<8), 14);
 599 | 			else
 600 | 				bwc_write_bits(bwc, 127 | ((length-7)<<7), 13);
 601 | 
 602 | 			model = 0;
 603 | 			i += length * interval;
 604 | 		} else {
 605 | 			int value = values[i];
 606 | 
 607 | 			bwc_write_bits(bwc,
 608 | 				huffman_models[model][13+value].bits_value,
 609 | 				huffman_models[model][13+value].bits_count);
 610 | 
 611 | 			model = (value < -1 || value > 1);
 612 | 			i += interval;
 613 | 		}
 614 | 	}
 615 | 
 616 | 	bits_end = 8*bwc->pos + bwc->written_bits_count;
 617 | 	*bits_used_out = bits_end - bits_start;
 618 | }
 619 | 
 620 | static void encode_triangular(struct bit_writer_context *bwc,
 621 | 	float *innovation_out, int *bits_used_out, float *error_out,
 622 | 	const float *innovation_in, int halved_innovation,
 623 | 	int pow, int a, int z)
 624 | {
 625 | 	int interval = halved_innovation ? 2 : 1;
 626 | 	float inn_gain;
 627 | 	float total_error = 0.0f;
 628 | 	int bits_start, bits_end;
 629 | 	int i;
 630 | 
 631 | 	inn_gain = 2.0f*inn_gains[pow];
 632 | 	if (!z)
 633 | 		inn_gain *= 0.5f;
 634 | 
 635 | 	bits_start = 8*bwc->pos + bwc->written_bits_count;
 636 | 
 637 | 	if (halved_innovation)
 638 | 		bwc_write_bits(bwc, pow | (a<<6) | (z<<7), 8);
 639 | 	else
 640 | 		bwc_write_bits(bwc, pow, 6);
 641 | 
 642 | 	for (i = a; i < 108; i += interval) {
 643 | 		float e;
 644 | 		int value = ROUND(CLAMP(
 645 | 			innovation_in[i]/inn_gain, -1.0f, 1.0f));
 646 | 
 647 | 		if (value > 0)
 648 | 			bwc_write_bits(bwc, 3, 2);
 649 | 		else if (value < 0)
 650 | 			bwc_write_bits(bwc, 1, 2);
 651 | 		else
 652 | 			bwc_write_bits(bwc, 0, 1);
 653 | 
 654 | 		innovation_out[i] = inn_gain*value;
 655 | 
 656 | 		e = innovation_out[i] - innovation_in[i];
 657 | 		total_error += e*e;
 658 | 	}
 659 | 
 660 | 	bits_end = 8*bwc->pos + bwc->written_bits_count;
 661 | 	*bits_used_out = bits_end - bits_start;
 662 | 
 663 | 	*error_out = total_error;
 664 | }
 665 | 
 666 | static void low_pass_innovation(float *x, int a, int z)
 667 | {
 668 | 	/* Apply a weak low-pass filter to the innovation signal suitable for
 669 | 	** downsampling it by 1/2. Note that, since we are throwing out all
 670 | 	** x[m] samples where m != a+2*k for integer k, we only have to filter
 671 | 	** the x[n] samples where n = a+2*k. */
 672 | 	int i;
 673 | 
 674 | 	/* filter coeffs: (GNU Octave)
 675 | 	** n = 10; b = sinc((-n/4):.5:(n/4)).*hamming(n+9)(5:(n+5))' */
 676 | 	for (i = a; i < 108; i+=2)
 677 | 		x[i] = (z ? 1.0f : 0.5f)*(x[i]
 678 | 			+ (x[i-1]+x[i+1]) * 0.6189590521549956f
 679 | 			+ (x[i-3]+x[i+3]) * -0.1633990749076792f
 680 | 			+ (x[i-5]+x[i+5]) * 0.05858453198856907f);
 681 | }
 682 | 
 683 | struct innovation_encoding {
 684 | 	struct bit_writer_context bwc;
 685 | 	float innovation[108];
 686 | 	int bits_used;
 687 | 	float error;
 688 | };
 689 | 
 690 | static void encode_innovation(struct bit_writer_context *bwc,
 691 | 	float *innovation, int halved_innovation, int use_huffman,
 692 | 	int *bits_used, int target_bit_count)
 693 | {
 694 | 	int a = 0, z = 1;
 695 | 	struct innovation_encoding encodings[2];
 696 | 	int m = 0;
 697 | 
 698 | 	if (halved_innovation) {
 699 | 		find_a_z_flags(&a, &z, innovation);
 700 | 		low_pass_innovation(innovation, a, z);
 701 | 	}
 702 | 
 703 | 	if (use_huffman) {
 704 | 		/* Encode using the Huffman model. */
 705 | 		int interval = halved_innovation ? 2 : 1;
 706 | 		float max_value = 0.0f;
 707 | 		int min_pow;
 708 | 		int best_distance = 0;
 709 | 		int pow;
 710 | 		int i;
 711 | 
 712 | 		/* Find the minimum innovation power such that the innovation
 713 | 		** signal doesn't clip anywhere in time. (We consider clipping
 714 | 		** a sample by <=0.5 of a quantization level to be okay since
 715 | 		** the sample already rounds down [towards zero].) */
 716 | 		for (i = a; i < 108; i += interval) {
 717 | 			float value = ABS(innovation[i]);
 718 | 			if (value > max_value)
 719 | 				max_value = value;
 720 | 		}
 721 | 		for (i = 62; i >= 0; i--) {
 722 | 			if (inn_gains[i]*(!z ? 0.5f : 1.0f)*13.5f
 723 | 				< max_value)
 724 | 				break;
 725 | 		}
 726 | 		min_pow = i+1;
 727 | 
 728 | 		/* Find the innovation gain that results in the closest
 729 | 		** to the target bitrate without clipping occurring. */
 730 | 		for (pow = min_pow; pow <= 63; pow++) {
 731 | 			int distance;
 732 | 
 733 | 			bwc_init(&encodings[m].bwc, inn_buffers[m]);
 734 | 			bwc_write_bits(&encodings[m].bwc, bwc->buffer[bwc->pos],
 735 | 				bwc->written_bits_count);
 736 | 
 737 | 			encode_huffman(&encodings[m].bwc,
 738 | 				encodings[m].innovation,
 739 | 				&encodings[m].bits_used,
 740 | 				&encodings[m].error,
 741 | 				innovation, halved_innovation,
 742 | 				pow, a, z);
 743 | 
 744 | 			distance = ABS(encodings[m].bits_used
 745 | 				- target_bit_count);
 746 | 			if (pow == min_pow || distance < best_distance) {
 747 | 				best_distance = distance;
 748 | 				m = !m; /* swap the buffers */
 749 | 			}
 750 | 		}
 751 | 	} else {
 752 | 		/* Encode using the triangular noise model. */
 753 | 		float best_error = 0.0f;
 754 | 		int pow;
 755 | 
 756 | 		/* Find the innovation gain that results in
 757 | 		** the highest quality. */
 758 | 		for (pow = 0; pow <= 63; pow++) {
 759 | 			bwc_init(&encodings[m].bwc, inn_buffers[m]);
 760 | 			bwc_write_bits(&encodings[m].bwc, bwc->buffer[bwc->pos],
 761 | 				bwc->written_bits_count);
 762 | 
 763 | 			encode_triangular(&encodings[m].bwc,
 764 | 				encodings[m].innovation,
 765 | 				&encodings[m].bits_used,
 766 | 				&encodings[m].error,
 767 | 				innovation, halved_innovation,
 768 | 				pow, a, z);
 769 | 
 770 | 			if (pow == 0 || encodings[m].error < best_error) {
 771 | 				best_error = encodings[m].error;
 772 | 				m = !m; /* swap the buffers */
 773 | 			}
 774 | 		}
 775 | 	}
 776 | 
 777 | 	/* Swap the buffers again to return back to our best encoding. */
 778 | 	m = !m;
 779 | 
 780 | 	/* Write this encoding out to the UTK bitstream. */
 781 | 	memcpy(&bwc->buffer[bwc->pos], encodings[m].bwc.buffer,
 782 | 		encodings[m].bwc.pos+1);
 783 | 	bwc->pos += encodings[m].bwc.pos;
 784 | 	bwc->written_bits_count = encodings[m].bwc.written_bits_count;
 785 | 
 786 | 	/* Update the innovation signal with the quantized version. */
 787 | 	memcpy(innovation, encodings[m].innovation, 108*sizeof(float));
 788 | 	if (halved_innovation)
 789 | 		interpolate(innovation, a, z);
 790 | 
 791 | 	*bits_used = encodings[m].bits_used;
 792 | }
 793 | 
 794 | static int parse_arguments(int argc, char *argv[])
 795 | {
 796 | 	int c;
 797 | 	int value;
 798 | 	char *endptr;
 799 | 
 800 | 	prog_name = (argc >= 1 && argv[0][0] != '\0') ? argv[0] : "utkencode";
 801 | 
 802 | 	while ((c = getopt_long(argc, argv, short_options,
 803 | 		long_options, NULL)) != -1) {
 804 | 		switch (c) {
 805 | 		case 'b':
 806 | 			bitrate = (int)strtol(optarg, &endptr, 10);
 807 | 			if (*endptr != '\0'
 808 | 				|| bitrate < 1000
 809 | 				|| bitrate > 1000000) {
 810 | 				fprintf(stderr, "%s: invalid bitrate -- %s\n",
 811 | 					prog_name, optarg);
 812 | 				print_usage_error();
 813 | 				return -1;
 814 | 			}
 815 | 			break;
 816 | 		case 'f':
 817 | 			force = 1;
 818 | 			break;
 819 | 		case 'q':
 820 | 			quiet = 1;
 821 | 			break;
 822 | 		case 'h':
 823 | 			print_help();
 824 | 			return 1;
 825 | 		case 'V':
 826 | 			print_version();
 827 | 			return 1;
 828 | 		case 'H':
 829 | 			halved_innovation = 1;
 830 | 			break;
 831 | 		case 'F':
 832 | 			halved_innovation = 0;
 833 | 			break;
 834 | 		case 'T':
 835 | 			huffman_threshold = (int)strtol(optarg, &endptr, 10);
 836 | 			if (*endptr != '\0'
 837 | 				|| huffman_threshold < 16
 838 | 				|| huffman_threshold > 32) {
 839 | 				fprintf(stderr, "%s: invalid Huffman "
 840 | 					"threshold -- %s\n", prog_name, optarg);
 841 | 				print_usage_error();
 842 | 				return -1;
 843 | 			}
 844 | 			break;
 845 | 		case 'S':
 846 | 			inngain_sig = (int)strtol(optarg, &endptr, 10);
 847 | 			if (*endptr != '\0'
 848 | 				|| inngain_sig < 8
 849 | 				|| inngain_sig > 128
 850 | 				|| (inngain_sig & 7) != 0) {
 851 | 				fprintf(stderr, "%s: invalid innovation gain"
 852 | 					" significand -- %s\n", prog_name,
 853 | 					optarg);
 854 | 				print_usage_error();
 855 | 				return -1;
 856 | 			}
 857 | 			break;
 858 | 		case 'B':
 859 | 			if (optarg[0] != '1' || optarg[1] != '.'
 860 | 				|| (value = read_dec_places(optarg+2, 3)) < 0
 861 | 				|| value < 40
 862 | 				|| value > 103) {
 863 | 				fprintf(stderr, "%s: invalid innovation gain"
 864 | 					" base -- %s\n", prog_name, optarg);
 865 | 				print_usage_error();
 866 | 				return -1;
 867 | 			}
 868 | 			inngain_base = 1.0f + (float)value/1000.0f;
 869 | 			break;
 870 | 		default:
 871 | 			print_usage_error();
 872 | 			return -1;
 873 | 		}
 874 | 	}
 875 | 
 876 | 	if (argc - optind == 0) {
 877 | 		fprintf(stderr, "%s: missing infile\n", prog_name);
 878 | 		print_usage_error();
 879 | 		return -1;
 880 | 	} else if (argc - optind == 1) {
 881 | 		fprintf(stderr, "%s: missing outfile\n", prog_name);
 882 | 		print_usage_error();
 883 | 		return -1;
 884 | 	} else if (argc - optind >= 3) {
 885 | 		fprintf(stderr, "%s: too many arguments passed\n", prog_name);
 886 | 		print_usage_error();
 887 | 		return -1;
 888 | 	}
 889 | 
 890 | 	infile = argv[optind];
 891 | 	outfile = argv[optind+1];
 892 | 
 893 | 	return 0;
 894 | }
 895 | 
 896 | int main(int argc, char *argv[])
 897 | {
 898 | 	int ret;
 899 | 	uint8_t wav_header[44];
 900 | 	uint8_t utk_header[32];
 901 | 	unsigned bytes_remaining;
 902 | 	int sampling_rate;
 903 | 	struct bit_writer_context bwc;
 904 | 	int i, j;
 905 | 
 906 | 	ret = parse_arguments(argc, argv);
 907 | 	if (ret < 0)
 908 | 		return EXIT_FAILURE;
 909 | 	else if (ret > 0)
 910 | 		return EXIT_SUCCESS;
 911 | 
 912 | 	if (!strcmp(infile, "-")) {
 913 | 		infp = stdin;
 914 | 	} else {
 915 | 		infp = fopen(infile, "rb");
 916 | 		if (!infp) {
 917 | 			fprintf(stderr, "%s: failed to open '%s' for"
 918 | 				" reading: %s\n", prog_name, infile,
 919 | 				strerror(errno));
 920 | 			return EXIT_FAILURE;
 921 | 		}
 922 | 	}
 923 | 	setvbuf(infp, NULL, _IOFBF, BUFSIZ);
 924 | 
 925 | 	if (!strcmp(outfile, "-")) {
 926 | 		outfp = stdout;
 927 | 	} else {
 928 | 		if (!force && file_exists(outfile)) {
 929 | 			if (quiet) {
 930 | 				fprintf(stderr, "%s: failed to open '%s' for"
 931 | 					" writing: file already exists\n",
 932 | 					prog_name, outfile);
 933 | 				return EXIT_FAILURE;
 934 | 			} else {
 935 | 				fprintf(stderr, "%s: overwrite '%s'? ",
 936 | 					prog_name, outfile);
 937 | 				if (getchar() != 'y')
 938 | 					return EXIT_FAILURE;
 939 | 			}
 940 | 		}
 941 | 
 942 | 		outfp = fopen(outfile, "wb");
 943 | 		if (!outfp) {
 944 | 			fprintf(stderr, "%s: failed to open '%s' for"
 945 | 				" writing: %s\n", prog_name, outfile,
 946 | 				strerror(errno));
 947 | 			return EXIT_FAILURE;
 948 | 		}
 949 | 	}
 950 | 	setvbuf(outfp, NULL, _IOFBF, BUFSIZ);
 951 | 
 952 | 	if (fread(wav_header, 1, 44, infp) != 44) {
 953 | 		if (ferror(infp))
 954 | 			fprintf(stderr, "%s: failed to read '%s': %s\n",
 955 | 				prog_name, infile, strerror(errno));
 956 | 		else
 957 | 			fprintf(stderr, "%s: '%s' is not a valid wav file\n",
 958 | 				prog_name, infile); /* (reached end of file) */
 959 | 		return EXIT_FAILURE;
 960 | 	}
 961 | 
 962 | 	if (memcmp(wav_header, "RIFF", 4) != 0
 963 | 		|| memcmp(wav_header+8, "WAVEfmt ", 8) != 0) {
 964 | 		fprintf(stderr, "%s: '%s' is not a valid wav file\n",
 965 | 			prog_name, infile);
 966 | 		return EXIT_FAILURE;
 967 | 	}
 968 | 
 969 | 	if (READ16(wav_header+20) != 1 /* wFormatTag */
 970 | 		|| READ16(wav_header+22) != 1 /* nChannels */
 971 | 		|| READ16(wav_header+32) != 2 /* nBlockAlign */
 972 | 		|| READ16(wav_header+34) != 16 /* wBitsPerSample */) {
 973 | 		fprintf(stderr, "%s: wav file must be 1-channel 16-bit LPCM\n",
 974 | 			prog_name);
 975 | 		return EXIT_FAILURE;
 976 | 	}
 977 | 
 978 | 	sampling_rate = READ32(wav_header+24); /* nSamplesPerSec */
 979 | 	if (sampling_rate < 1000 || sampling_rate > 1000000) {
 980 | 		fprintf(stderr, "%s: unsupported sampling rate %d\n",
 981 | 			prog_name, sampling_rate);
 982 | 		return EXIT_FAILURE;
 983 | 	}
 984 | 
 985 | 	memcpy(utk_header, "UTM0", 4); /* sID */
 986 | 
 987 | 	/* Drop the last byte from the wav file if there are an odd
 988 | 	** number of sample bytes. */
 989 | 	bytes_remaining = READ32(wav_header+40) & (~1);
 990 | 	WRITE32(utk_header+4, bytes_remaining); /* dwOutSize */
 991 | 
 992 | 	WRITE32(utk_header+8, 20); /* dwWfxSize */
 993 | 	memcpy(utk_header+12, wav_header+20, 16); /* WAVEFORMATEX */
 994 | 	WRITE32(utk_header+28, 0); /* cbSize */
 995 | 
 996 | 	write_data(outfp, utk_header, 32);
 997 | 
 998 | 	bwc_init(&bwc, compressed_buffer);
 999 | 
1000 | 	bwc_write_bits(&bwc, halved_innovation, 1);
1001 | 	bwc_write_bits(&bwc, 32 - huffman_threshold, 4);
1002 | 	bwc_write_bits(&bwc, inngain_sig/8 - 1, 4);
1003 | 	bwc_write_bits(&bwc, ROUND((inngain_base - 1.04f)*1000.0f), 6);
1004 | 	bwc_flush(&bwc, outfp);
1005 | 
1006 | 	for (i = 0; i < 12; i++)
1007 | 		input_samples[i] = 0.0f;
1008 | 	for (i = 0; i < 324; i++)
1009 | 		adaptive_codebook[i] = 0.0f;
1010 | 	for (i = 0; i < 12; i++)
1011 | 		prev_rc[i] = 0.0f;
1012 | 	for (i = 0; i < 5; i++)
1013 | 		innovation[i] = 0.0f;
1014 | 	for (i = 5+108; i < 5+108+5; i++)
1015 | 		innovation[i] = 0.0f;
1016 | 
1017 | 	inn_gains[0] = inngain_sig;
1018 | 	for (i = 1; i < 64; i++)
1019 | 		inn_gains[i] = inn_gains[i-1]*inngain_base;
1020 | 
1021 | 	while (bytes_remaining != 0) {
1022 | 		/* Encode the next frame of 432 samples. */
1023 | 		int bytes_to_read;
1024 | 		int samples_to_read;
1025 | 		float rc[12];
1026 | 		float rc_delta[12];
1027 | 		int use_huffman = 0;
1028 | 
1029 | 		bytes_to_read = (int)MIN(bytes_remaining, 432*2);
1030 | 		samples_to_read = bytes_to_read >> 1;
1031 | 
1032 | 		read_data(infp, wav_buffer, bytes_to_read);
1033 | 		bytes_remaining -= bytes_to_read;
1034 | 
1035 | 		for (i = 0; i < samples_to_read; i++) {
1036 | 			int16_t x = READ16(wav_buffer+2*i);
1037 | 			input_samples[12+i] = (float)x;
1038 | 		}
1039 | 		for (i = samples_to_read; i < 432; i++)
1040 | 			input_samples[12+i] = 0.0f;
1041 | 
1042 | 		find_rc(rc, input_samples+12);
1043 | 
1044 | 		/* Quantize the reflection coefficients.
1045 | 		** In our encoder, we will not make use of utk_rc_table[0]. */
1046 | 		for (i = 0; i < 4; i++) {
1047 | 			int idx = 1+quantize(rc[i], utk_rc_table+1, 63);
1048 | 			bwc_write_bits(&bwc, idx, 6);
1049 | 			rc[i] = utk_rc_table[idx];
1050 | 			if (i == 0 && idx < huffman_threshold)
1051 | 				use_huffman = 1;
1052 | 		}
1053 | 		for (i = 4; i < 12; i++) {
1054 | 			int idx = quantize(rc[i], utk_rc_table+16, 32);
1055 | 			bwc_write_bits(&bwc, idx, 5);
1056 | 			rc[i] = utk_rc_table[16+idx];
1057 | 		}
1058 | 
1059 | 		for (i = 0; i < 12; i++)
1060 | 			rc_delta[i] = (rc[i] - prev_rc[i])/4.0f;
1061 | 
1062 | 		memcpy(rc, prev_rc, 12*sizeof(float));
1063 | 
1064 | 		for (i = 0; i < 4; i++) {
1065 | 			/* Linearly interpolate the reflection coefficients over
1066 | 			** the four subframes and find the excitation signal. */
1067 | 			float lpc[12];
1068 | 
1069 | 			for (j = 0; j < 12; j++)
1070 | 				rc[j] += rc_delta[j];
1071 | 
1072 | 			rc_to_lpc(lpc, rc);
1073 | 
1074 | 			find_excitation(adaptive_codebook+324+12*i,
1075 | 				input_samples+12+12*i,
1076 | 				i < 3 ? 12 : 396, lpc);
1077 | 		}
1078 | 
1079 | 		memcpy(input_samples, &input_samples[432], 12*sizeof(float));
1080 | 		memcpy(prev_rc, rc, 12*sizeof(float));
1081 | 
1082 | 		for (i = 0; i < 4; i++) {
1083 | 			/* Encode the i'th subframe. */
1084 | 			float *excitation = adaptive_codebook+324+108*i;
1085 | 			int pitch_lag;
1086 | 			float pitch_gain;
1087 | 			int idx;
1088 | 			int bits_used;
1089 | 
1090 | 			find_pitch(&pitch_lag, &pitch_gain, excitation);
1091 | 
1092 | 			bwc_write_bits(&bwc, pitch_lag - 108, 8);
1093 | 
1094 | 			idx = ROUND(pitch_gain*15.0f);
1095 | 			bwc_write_bits(&bwc, idx, 4);
1096 | 			pitch_gain = (float)idx/15.0f;
1097 | 
1098 | 			for (j = 0; j < 108; j++)
1099 | 				innovation[5+j] = excitation[j]
1100 | 					- pitch_gain*excitation[j-pitch_lag];
1101 | 
1102 | 			encode_innovation(&bwc, &innovation[5],
1103 | 				halved_innovation, use_huffman, &bits_used,
1104 | 				ROUND(bitrate * 432 / sampling_rate / 4) - 18);
1105 | 
1106 | 			/* Update the adaptive codebook using the quantized
1107 | 			** innovation signal. */
1108 | 			for (j = 0; j < 108; j++)
1109 | 				excitation[j] = innovation[5+j]
1110 | 					+ pitch_gain*excitation[j-pitch_lag];
1111 | 		}
1112 | 
1113 | 		/* Copy the last 3 subframes to the beginning of the
1114 | 		** adaptive codebook. */
1115 | 		memcpy(adaptive_codebook, &adaptive_codebook[432],
1116 | 			324*sizeof(float));
1117 | 
1118 | 		bwc_flush(&bwc, outfp);
1119 | 	}
1120 | 
1121 | 	bwc_pad(&bwc);
1122 | 	bwc_flush(&bwc, outfp);
1123 | 
1124 | 	flush_data(outfp);
1125 | 
1126 | 	fclose(outfp);
1127 | 	fclose(infp);
1128 | 
1129 | 	return EXIT_SUCCESS;
1130 | }


--------------------------------------------------------------------------------