├── .gitignore └── main.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | *.obj 2 | *.exe -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | // Simple byte-aligned binary arithmetic coder (Ilya Muravyov's variant) - public domain - Fabian 'ryg' Giesen 2015 2 | // 3 | // Written for clarity not speed! 4 | 5 | #define _CRT_SECURE_NO_WARNINGS 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | // Probabilities are expressed in fixed point, with kProbBits bits of 14 | // resolution. No need to go overboard with this. 15 | static int const kProbBits = 12; 16 | static uint32_t const kProbMax = 1u << kProbBits; 17 | 18 | // Type used for buffers. 19 | typedef std::vector ByteVec; 20 | 21 | // Binary arithmetic encoder (Ilya Muravyov's variant) 22 | // Encodes/decodes a string of binary (0/1) events with 23 | // probabilities that are not 1/2. 24 | // 25 | // This code is written for clarity, not performance. 26 | class BinArithEncoder 27 | { 28 | uint32_t lo, hi; 29 | ByteVec &bytes; 30 | 31 | // noncopyable 32 | BinArithEncoder(BinArithEncoder const &); 33 | BinArithEncoder &operator =(BinArithEncoder const &); 34 | 35 | public: 36 | // Initialize 37 | explicit BinArithEncoder(ByteVec &target) : lo(0), hi(~0u), bytes(target) { } 38 | 39 | // Finish encoding - flushes remaining codeword 40 | ~BinArithEncoder() 41 | { 42 | for (int i = 0; i < 4; ++i) 43 | { 44 | bytes.push_back(lo >> 24); 45 | lo <<= 8; 46 | } 47 | } 48 | 49 | // Encode a binary symbol "bit" with the probability of a 1 being "prob". 50 | // Note that prob=0 (or prob=1<> kProbBits); 56 | 57 | if (bit) 58 | hi = x; 59 | else 60 | lo = x + 1; 61 | 62 | // Renormalize: when top byte of lo/hi is same, shift it out. 63 | while ((lo ^ hi) < (1u << 24)) 64 | { 65 | bytes.push_back(lo >> 24); 66 | lo <<= 8; 67 | hi = (hi << 8) | 0xff; 68 | } 69 | } 70 | }; 71 | 72 | // Corresponding decoder. 73 | class BinArithDecoder 74 | { 75 | uint32_t code, lo, hi; 76 | ByteVec const &bytes; 77 | size_t read_pos; 78 | 79 | // noncopyable 80 | BinArithDecoder(BinArithDecoder const &); 81 | BinArithDecoder &operator =(BinArithDecoder const &); 82 | 83 | public: 84 | // Start decoding 85 | explicit BinArithDecoder(ByteVec const &source) 86 | : lo(0), hi(~0u), bytes(source), read_pos(0) 87 | { 88 | code = 0; 89 | for (int i = 0; i < 4; ++i) 90 | code = (code << 8) | bytes[read_pos++]; 91 | } 92 | 93 | // Decode a binary symbol with the probability of a 1 being "prob". 94 | int decode(uint32_t prob) 95 | { 96 | int bit; 97 | 98 | // Midpoint of active probability interval subdivided via prob 99 | uint32_t x = lo + ((uint64_t(hi - lo) * prob) >> kProbBits); 100 | 101 | if (code <= x) 102 | { 103 | hi = x; 104 | bit = 1; 105 | } 106 | else 107 | { 108 | lo = x + 1; 109 | bit = 0; 110 | } 111 | 112 | // Renormalize 113 | while ((lo ^ hi) < (1u << 24)) 114 | { 115 | code = (code << 8) | bytes[read_pos++]; 116 | lo <<= 8; 117 | hi = (hi << 8) | 0xff; 118 | } 119 | 120 | return bit; 121 | } 122 | }; 123 | 124 | // ---- A few basic models 125 | 126 | // NOTE: Again, this is written for clarity and ease of tinkering. 127 | // In practice, you will write more direct code for these once you've 128 | // figured out your coding structure. 129 | 130 | // Adaptive binary model. These are pretty good! 131 | // Lower Inertia = faster. 132 | // 133 | // You typically build more sophisticated models out of these 134 | // by having lots of them and choosing the active model based on 135 | // context. 136 | template 137 | struct BinShiftModel 138 | { 139 | uint16_t prob; 140 | 141 | BinShiftModel() : prob(kProbMax / 2) {} 142 | 143 | void encode(BinArithEncoder &enc, int bit) 144 | { 145 | enc.encode(bit, prob); 146 | adapt(bit); 147 | } 148 | 149 | int decode(BinArithDecoder &dec) 150 | { 151 | int bit = dec.decode(prob); 152 | adapt(bit); 153 | return bit; 154 | } 155 | 156 | void adapt(int bit) 157 | { 158 | // Note prob never his 0 or kProbMax with this update rule! 159 | if (bit) 160 | prob += (kProbMax - prob) >> Inertia; 161 | else 162 | prob -= prob >> Inertia; 163 | } 164 | }; 165 | 166 | // BitTree model. A tree-shaped cascade of BinShiftModels. 167 | // This is the de-facto standard way to build a multi-symbol coder 168 | // (values with NumBits bits) out of binary models. 169 | // 170 | // LZMA (as in 7zip/xz) uses this type of model (backed by a BinShiftModel 171 | // as above) for its literals. 172 | template 173 | struct BitTreeModel 174 | { 175 | static size_t const kNumSyms = 1 << NumBits; 176 | static size_t const kMSB = kNumSyms / 2; 177 | 178 | BitModel model[kNumSyms - 1]; 179 | 180 | void encode(BinArithEncoder &enc, size_t value) 181 | { 182 | assert(value < kNumSyms); 183 | 184 | // The first bit sent is the MSB of the value and coded without context 185 | // Second bit is the bit below the MSB, using the value of the MSB as context 186 | // and so forth. 187 | // 188 | // 1 + 2 + 4 + ... = 2^NumBits - 1 contexts. 189 | // Numbering the MSB context 1 and then shifting in the coded bits from the 190 | // bottom is a convenient way to index them. (So ctx is 1-based) 191 | size_t ctx = 1; 192 | while (ctx < kNumSyms) 193 | { 194 | int bit = (value & kMSB) != 0; 195 | value += value; // shift value by 1 for next iter 196 | model[ctx - 1].encode(enc, bit); 197 | ctx += ctx + bit; // shift in "bit" into context 198 | } 199 | } 200 | 201 | size_t decode(BinArithDecoder &dec) 202 | { 203 | // Corresponding decoder is nice and easy: 204 | size_t ctx = 1; 205 | while (ctx < kNumSyms) 206 | ctx += ctx + model[ctx - 1].decode(dec); 207 | 208 | return ctx - kNumSyms; 209 | } 210 | }; 211 | 212 | // ---- Random utility code 213 | 214 | static double log_2(double x) 215 | { 216 | return log(x) / log(2.0); 217 | } 218 | 219 | // ---- Some examples 220 | 221 | static void example_static() 222 | { 223 | // A static binary source with known probability of 1 being 1/5. 224 | ByteVec source; 225 | uint32_t const kProbOne = kProbMax / 5; 226 | 227 | srand(1234); 228 | for (size_t i = 0; i < 10000; ++i) 229 | source.push_back(rand() < (RAND_MAX/5)); 230 | 231 | // Encode it 232 | ByteVec coded; 233 | { 234 | BinArithEncoder coder(coded); 235 | for (size_t i = 0; i < source.size(); ++i) 236 | coder.encode(source[i], kProbOne); 237 | } 238 | 239 | // Print actual and expected size (based on order-0 entropy) 240 | { 241 | double p = kProbOne / (double)kProbMax; 242 | double entropy_bits_per_sym = -p * log_2(p) - (1.0 - p) * log_2(1.0 - p); 243 | printf("static size: %d bytes - entropy: %.2f bytes\n", coded.size(), source.size() * entropy_bits_per_sym / 8.0); 244 | } 245 | 246 | // Decode it 247 | ByteVec decoded; 248 | { 249 | BinArithDecoder coder(coded); 250 | for (size_t i = 0; i < source.size(); ++i) 251 | decoded.push_back((uint8_t) coder.decode(kProbOne)); 252 | } 253 | 254 | if (decoded != source) 255 | printf("error decoding!\n"); 256 | else 257 | printf("decodes ok!\n"); 258 | } 259 | 260 | static void example_dynamic() 261 | { 262 | // A binary source that keeps changing its probability of 1 regularly 263 | // in a way opaque to the coder. 264 | // Use this as example for an adaptive model. 265 | static int const kInertia = 4; 266 | ByteVec source; 267 | 268 | srand(2345); 269 | for (size_t chunk = 0; chunk < 50; ++chunk) 270 | { 271 | int threshold = rand(); 272 | for (size_t i = 0; i < 200; ++i) 273 | source.push_back(rand() < threshold); 274 | } 275 | 276 | // Encode it 277 | ByteVec coded; 278 | { 279 | BinArithEncoder coder(coded); 280 | BinShiftModel model; 281 | for (size_t i = 0; i < source.size(); ++i) 282 | model.encode(coder, source[i]); 283 | } 284 | 285 | printf("dynamic size: %d bytes\n", coded.size()); 286 | 287 | // Decode it 288 | ByteVec decoded; 289 | { 290 | BinArithDecoder coder(coded); 291 | BinShiftModel model; 292 | for (size_t i = 0; i < source.size(); ++i) 293 | decoded.push_back((uint8_t) model.decode(coder)); 294 | } 295 | 296 | if (decoded != source) 297 | printf("error decoding!\n"); 298 | else 299 | printf("decodes ok!\n"); 300 | } 301 | 302 | static void example_multisymbol() 303 | { 304 | // Example for a multi-symbol alphabet - bytes in this case. 305 | // Let's get meta and use this source code as our source! 306 | typedef BitTreeModel, 8> ByteModel; 307 | ByteVec source; 308 | 309 | { 310 | FILE *f = fopen("main.cpp", "rb"); 311 | if (!f) 312 | return; 313 | 314 | fseek(f, 0, SEEK_END); 315 | source.resize(ftell(f)); 316 | fseek(f, 0, SEEK_SET); 317 | fread(&source[0], 1, source.size(), f); 318 | fclose(f); 319 | } 320 | 321 | // Encode it 322 | ByteVec coded; 323 | { 324 | BinArithEncoder coder(coded); 325 | ByteModel model; 326 | for (size_t i = 0; i < source.size(); ++i) 327 | model.encode(coder, source[i]); 328 | } 329 | 330 | printf("multisymbol size: %d bytes\n", coded.size()); 331 | 332 | // Decode it 333 | ByteVec decoded; 334 | { 335 | BinArithDecoder coder(coded); 336 | ByteModel model; 337 | for (size_t i = 0; i < source.size(); ++i) 338 | decoded.push_back((uint8_t) model.decode(coder)); 339 | } 340 | 341 | if (decoded != source) 342 | printf("error decoding!\n"); 343 | else 344 | printf("decodes ok!\n"); 345 | } 346 | 347 | int main() 348 | { 349 | example_static(); 350 | example_dynamic(); 351 | example_multisymbol(); 352 | return 0; 353 | } 354 | 355 | // vim:et:sts=4:sw=4 356 | --------------------------------------------------------------------------------