├── README.md └── zmolly.cpp /README.md: -------------------------------------------------------------------------------- 1 | zmolly 2 | ====== 3 | 4 | zmolly is a generic data compressor with high compression ratio. it is based on LZP/PPM algorithm. 5 | 6 | Simple benchmark with **enwik8**(100,000,000 bytes): 7 | 8 | Tool | Compressed Size | 9 | ------------------|-----------------| 10 | gzip | 36518 KB | 11 | gzip -9 | 36445 KB | 12 | bzip2 | 29009 KB | 13 | xz | 26376 KB | 14 | xz --extreme | 26366 KB | 15 | uharc -mx md32768 | 23919 KB | 16 | zmolly | 22525 KB | 17 | -------------------------------------------------------------------------------- /zmolly.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2016 by Zhang Li 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 3. Neither the name of the project nor the names of its contributors 14 | * may be used to endorse or promote products derived from this software 15 | * without specific prior written permission. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 | * SUCH DAMAGE. 28 | */ 29 | #include 30 | #include 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | /******************************************************************************* 41 | * Arithmetic coder 42 | ******************************************************************************/ 43 | static const auto RC_TOP = 1u << 24; 44 | static const auto RC_BOT = 1u << 16; 45 | 46 | struct rc_encoder_t { 47 | std::ostream& m_ostream; 48 | uint32_t m_low; 49 | uint32_t m_range; 50 | 51 | rc_encoder_t(std::ostream& ostream): 52 | m_ostream(ostream), 53 | m_low(0), 54 | m_range(-1) {} 55 | 56 | void encode(uint16_t cum, uint16_t frq, uint16_t sum) { 57 | m_range /= sum; 58 | m_low += cum * m_range; 59 | m_range *= frq; 60 | while ((m_low ^ (m_low + m_range)) < RC_TOP || (m_range < RC_BOT && ((m_range = -m_low & (RC_BOT - 1)), 1))) { 61 | m_ostream.put(m_low >> 24); 62 | m_low <<= 8; 63 | m_range <<= 8; 64 | } 65 | } 66 | void flush() { 67 | m_ostream.put(m_low >> 24), m_low <<= 8; 68 | m_ostream.put(m_low >> 24), m_low <<= 8; 69 | m_ostream.put(m_low >> 24), m_low <<= 8; 70 | m_ostream.put(m_low >> 24), m_low <<= 8; 71 | } 72 | }; 73 | 74 | struct rc_decoder_t { 75 | std::istream& m_istream; 76 | uint32_t m_low; 77 | uint32_t m_range; 78 | uint32_t m_code; 79 | 80 | rc_decoder_t(std::istream& istream): m_istream(istream), m_low(0), m_range(-1), m_code(0) { 81 | m_code = m_code << 8 | istream.get(); 82 | m_code = m_code << 8 | istream.get(); 83 | m_code = m_code << 8 | istream.get(); 84 | m_code = m_code << 8 | istream.get(); 85 | } 86 | 87 | void decode(uint16_t cum, uint16_t frq) { 88 | m_low += cum * m_range; 89 | m_range *= frq; 90 | while ((m_low ^ (m_low + m_range)) < RC_TOP || (m_range < RC_BOT && ((m_range = -m_low & (RC_BOT - 1)), 1))) { 91 | m_code = m_code << 8 | (unsigned char) m_istream.get(); 92 | m_range <<= 8; 93 | m_low <<= 8; 94 | } 95 | } 96 | uint16_t decode_cum(uint16_t sum) { 97 | m_range /= sum; 98 | return (m_code - m_low) / m_range; 99 | } 100 | }; 101 | 102 | /******************************************************************************* 103 | * PPM Model 104 | ******************************************************************************/ 105 | static const auto PPM_O4_BUCKET_SIZE = 262144; 106 | static const auto PPM_SEE_SIZE = 131072; 107 | 108 | struct symbol_counter_t { 109 | uint8_t m_sym; 110 | uint8_t m_frq; 111 | symbol_counter_t(): 112 | m_sym(0), 113 | m_frq(0) {} 114 | }; 115 | 116 | struct bit_model_t { 117 | uint16_t m_c[2]; 118 | 119 | int encode(rc_encoder_t* coder, int c) { 120 | c == 0 121 | ? coder->encode(0, m_c[0], m_c[0] + m_c[1]) 122 | : coder->encode(m_c[0], m_c[1], m_c[0] + m_c[1]); 123 | return c; 124 | } 125 | int decode(rc_decoder_t* coder) { 126 | if (m_c[0] > coder->decode_cum(m_c[0] + m_c[1])) { 127 | coder->decode(0, m_c[0]); 128 | return 0; 129 | } else { 130 | coder->decode(m_c[0], m_c[1]); 131 | return 1; 132 | } 133 | } 134 | void update(int c) { 135 | if ((m_c[c] += 15) > 9000) { 136 | m_c[0] = (m_c[0] + 1) * 0.9; 137 | m_c[1] = (m_c[1] + 1) * 0.9; 138 | } 139 | return; 140 | } 141 | }; 142 | 143 | struct dense_model_t { // dense model types, use for short context 144 | uint16_t m_sum; 145 | uint16_t m_cnt; 146 | uint16_t m_esc; 147 | std::array m_symbols; 148 | 149 | dense_model_t(): 150 | m_sum(0), 151 | m_cnt(0), 152 | m_esc(0) {} 153 | 154 | int encode(rc_encoder_t* coder, std::bitset<256>& exclude, int c) { 155 | auto found = 0; 156 | auto found_pos = 0; 157 | auto cum = 0; 158 | auto frq = 0; 159 | auto sum = 0; 160 | auto esc = 0; 161 | auto recent_frq = m_symbols[0].m_frq & -!exclude[m_symbols[0].m_sym]; 162 | 163 | if (!exclude.any()) { 164 | for (auto i = 0; i < m_cnt; i++) { // no exclusion 165 | if (m_symbols[i].m_sym == c) { 166 | found_pos = i; 167 | found = 1; 168 | break; 169 | } 170 | cum += m_symbols[i].m_frq; 171 | } 172 | sum = m_sum; 173 | } else { 174 | for (auto i = 0; i < m_cnt; i++) { 175 | if (m_symbols[i].m_sym == c) { 176 | found_pos = i; 177 | found = 1; 178 | } 179 | cum += m_symbols[i].m_frq & -(!exclude[m_symbols[i].m_sym] && !found); 180 | sum += m_symbols[i].m_frq & -(!exclude[m_symbols[i].m_sym]); 181 | } 182 | } 183 | 184 | esc = m_esc + !m_esc; 185 | sum += recent_frq + esc; 186 | frq = m_symbols[found_pos].m_frq; 187 | if (found_pos == 0) { 188 | frq += recent_frq; 189 | } else { 190 | std::swap(m_symbols[found_pos], m_symbols[0]); 191 | cum += recent_frq; 192 | } 193 | 194 | if (!found) { 195 | for (auto i = 0; i < m_cnt; i++) { // do exclude 196 | exclude[m_symbols[i].m_sym] = 1; 197 | } 198 | m_symbols[m_cnt].m_frq = m_symbols[0].m_frq; 199 | m_symbols[m_cnt].m_sym = m_symbols[0].m_sym; 200 | m_symbols[0].m_sym = c; 201 | m_symbols[0].m_frq = 0; 202 | m_cnt += 1; 203 | cum = sum - esc; 204 | frq = esc; 205 | } 206 | coder->encode(cum, frq, sum); 207 | return found; 208 | } 209 | 210 | int decode(rc_decoder_t* coder, std::bitset<256>& exclude) { 211 | auto cum = 0; 212 | auto frq = 0; 213 | auto sum = 0; 214 | auto esc = 0; 215 | auto recent_frq = m_symbols[0].m_frq & -!exclude[m_symbols[0].m_sym]; 216 | auto sym = -1; 217 | 218 | for (auto i = 0; i < m_cnt; i++) { 219 | sum += m_symbols[i].m_frq & -!exclude[m_symbols[i].m_sym]; 220 | } 221 | esc = m_esc + !m_esc; 222 | sum += recent_frq + esc; 223 | 224 | auto decode_cum = coder->decode_cum(sum); 225 | if (sum - esc <= decode_cum) { 226 | for (auto i = 0; i < m_cnt; i++) { // do exclude 227 | exclude[m_symbols[i].m_sym] = 1; 228 | } 229 | m_symbols[m_cnt].m_frq = m_symbols[0].m_frq; 230 | m_symbols[m_cnt].m_sym = m_symbols[0].m_sym; 231 | m_symbols[0].m_frq = 0; 232 | m_cnt += 1; 233 | cum = sum - esc; 234 | frq = esc; 235 | } else { 236 | auto i = 0; 237 | if (!exclude.any()) { // no exclusion 238 | while (cum + recent_frq + m_symbols[i].m_frq <= decode_cum) { 239 | cum += m_symbols[i].m_frq; 240 | i++; 241 | } 242 | } else { 243 | while (cum + recent_frq + (m_symbols[i].m_frq & -!exclude[m_symbols[i].m_sym]) <= decode_cum) { 244 | cum += m_symbols[i].m_frq & -!exclude[m_symbols[i].m_sym]; 245 | i++; 246 | } 247 | } 248 | frq = m_symbols[i].m_frq; 249 | sym = m_symbols[i].m_sym; 250 | if (i == 0) { 251 | frq += recent_frq; 252 | } else { 253 | std::swap(m_symbols[i], m_symbols[0]); 254 | cum += recent_frq; 255 | } 256 | } 257 | coder->decode(cum, frq); 258 | return sym; 259 | } 260 | 261 | void update(int c) { 262 | m_symbols[0].m_frq += 1; 263 | m_symbols[0].m_sym = c; 264 | m_sum += 1; 265 | m_esc += (m_symbols[0].m_frq == 1) - (m_symbols[0].m_frq == 2); 266 | 267 | if (m_symbols[0].m_frq > 250) { // rescale 268 | auto n = 0; 269 | m_cnt = 0; 270 | m_sum = 0; 271 | m_esc = 0; 272 | for (auto i = 0; i + n < 256; i++) { 273 | if ((m_symbols[i].m_frq = m_symbols[i + n].m_frq / 2) > 0) { 274 | m_symbols[i].m_sym = m_symbols[i + n].m_sym; 275 | m_cnt += 1; 276 | m_sum += m_symbols[i].m_frq; 277 | m_esc += m_symbols[i].m_frq == 1; 278 | } else { 279 | n++; 280 | i--; 281 | } 282 | } 283 | std::fill(m_symbols.begin() + m_cnt, m_symbols.end(), symbol_counter_t()); 284 | } 285 | } 286 | 287 | }; 288 | 289 | struct sparse_model_t { // sparse model types, use for long context 290 | sparse_model_t* m_next; 291 | uint16_t m_sum; 292 | uint8_t m_cnt; 293 | uint8_t m_visited; 294 | uint64_t m_context : 48; 295 | std::array m_symbols; // symbol size = 56: make 128byte struct 296 | 297 | sparse_model_t(): 298 | m_next(nullptr), 299 | m_sum(0), 300 | m_cnt(0), 301 | m_visited(0), 302 | m_context(0) {} 303 | 304 | int encode(bit_model_t* see, rc_encoder_t* coder, int c, std::bitset<256>& exclude) { 305 | auto cum = 0; 306 | auto frq = 0; 307 | auto found_pos = -1; 308 | 309 | for (auto i = 0; i < m_cnt; i++) { // search for symbol 310 | if (m_symbols[i].m_sym == c) { 311 | found_pos = i; 312 | break; 313 | } 314 | cum += m_symbols[i].m_frq; 315 | } 316 | if (found_pos >= 0) { // found -- bring to front of linked-list 317 | see->encode(coder, 0); 318 | see->update(0); 319 | if (m_cnt != 1) { // no need to encode binary context 320 | auto recent_frq = (m_symbols[0].m_frq + 6) / 2; // recency scaling 321 | if (found_pos == 0) { 322 | frq = m_symbols[found_pos].m_frq + recent_frq; 323 | } else { 324 | frq = m_symbols[found_pos].m_frq; 325 | cum += recent_frq; 326 | auto tmp_symbol = m_symbols[found_pos]; 327 | std::copy(&m_symbols[0], &m_symbols[found_pos], &m_symbols[1]); 328 | m_symbols[0] = tmp_symbol; 329 | } 330 | coder->encode(cum, frq, m_sum + recent_frq); 331 | } 332 | return 1; 333 | 334 | } else { // not found -- create new node for sym 335 | see->encode(coder, 1); 336 | see->update(1); 337 | for (auto i = 0; i < m_cnt; i++) { 338 | exclude[m_symbols[i].m_sym] = 1; // exclude o4 339 | } 340 | if (m_cnt == m_symbols.size()) { 341 | m_sum -= m_symbols[m_cnt - 1].m_frq; 342 | } else { 343 | m_cnt += 1; 344 | } 345 | std::copy(&m_symbols[0], &m_symbols[m_cnt - 1], &m_symbols[1]); 346 | m_symbols[0].m_sym = c; 347 | m_symbols[0].m_frq = 0; 348 | } 349 | return 0; 350 | } 351 | 352 | int decode(bit_model_t* see, rc_decoder_t* coder, std::bitset<256>& exclude) { 353 | auto cum = 0; 354 | auto frq = 0; 355 | 356 | if (see->decode(coder) == 0) { 357 | see->update(0); 358 | if (m_cnt != 1) { // no need to decode binary context 359 | auto recent_frq = (m_symbols[0].m_frq + 6) / 2; // recency scaling 360 | auto decode_cum = coder->decode_cum(m_sum + recent_frq); 361 | auto i = 0; 362 | while (cum + recent_frq + m_symbols[i].m_frq <= decode_cum) { 363 | cum += m_symbols[i].m_frq; 364 | i++; 365 | } 366 | if (i == 0) { 367 | frq = m_symbols[i].m_frq + recent_frq; 368 | } else { 369 | frq = m_symbols[i].m_frq; 370 | cum += recent_frq; 371 | symbol_counter_t tmp_symbol = m_symbols[i]; 372 | std::copy(&m_symbols[0], &m_symbols[i], &m_symbols[1]); 373 | m_symbols[0] = tmp_symbol; 374 | } 375 | coder->decode(cum, frq); 376 | } 377 | return m_symbols[0].m_sym; 378 | 379 | } else { // not found 380 | see->update(1); 381 | for (auto i = 0; i < m_cnt; i++) { 382 | exclude[m_symbols[i].m_sym] = 1; // exclude o4 383 | } 384 | if (m_cnt == m_symbols.size()) { 385 | m_sum -= m_symbols[m_cnt - 1].m_frq; 386 | } else { 387 | m_cnt += 1; 388 | } 389 | std::copy(&m_symbols[0], &m_symbols[m_cnt - 1], &m_symbols[1]); 390 | m_symbols[0].m_frq = 0; 391 | } 392 | return -1; 393 | } 394 | 395 | void update(dense_model_t* lower_o2, int c) { 396 | if (m_symbols[0].m_frq == 0) { // calculate init frequency 397 | auto o2c = symbol_counter_t(); 398 | for (auto i = 0; i < lower_o2->m_cnt; i++) { 399 | if (lower_o2->m_symbols[i].m_sym == c) { 400 | o2c = lower_o2->m_symbols[i]; 401 | break; 402 | } 403 | } 404 | m_symbols[0].m_frq = 2 + (o2c.m_frq * 16 > lower_o2->m_sum); 405 | m_symbols[0].m_sym = c; 406 | m_sum += m_symbols[0].m_frq; 407 | } else { 408 | auto inc = 1 + (m_symbols[0].m_frq <= 3) + (m_symbols[0].m_frq <= 220); 409 | m_symbols[0].m_sym = c; 410 | m_symbols[0].m_frq += inc; 411 | m_sum += inc; 412 | } 413 | 414 | if (m_symbols[0].m_frq > 250) { // rescale 415 | auto n = 0; 416 | m_cnt = 0; 417 | m_sum = 0; 418 | for (auto i = 0; i + n < m_symbols.size(); i++) { 419 | if ((m_symbols[i].m_frq = m_symbols[i + n].m_frq / 2) > 0) { 420 | m_symbols[i].m_sym = m_symbols[i + n].m_sym; 421 | m_cnt += 1; 422 | m_sum += m_symbols[i].m_frq; 423 | } else { 424 | n++; 425 | i--; 426 | } 427 | } 428 | std::fill(m_symbols.begin() + m_cnt, m_symbols.end(), symbol_counter_t()); 429 | } 430 | return; 431 | } 432 | } __attribute__((__aligned__(128))); 433 | 434 | // main ppm-model type 435 | struct ppm_model_t { 436 | std::array m_see; 437 | std::array m_o4_buckets; 438 | std::array m_o2; 439 | std::array m_o1; 440 | std::array m_o0; 441 | uint32_t m_o4_count; 442 | uint64_t m_context; 443 | uint8_t m_see_ch_context; 444 | uint8_t m_see_last_esc; 445 | 446 | ppm_model_t(): 447 | m_o4_count(0), 448 | m_context(0), 449 | m_see_ch_context(0), 450 | m_see_last_esc(0) { 451 | 452 | for (auto i = 0; i < PPM_SEE_SIZE; i++) { 453 | m_see[i].m_c[0] = 20; 454 | m_see[i].m_c[1] = 10; 455 | } 456 | } 457 | 458 | bit_model_t* current_see(sparse_model_t* o4) { 459 | auto log2i = [](uint32_t x) { 460 | return (31 - __builtin_clz((x << 1) | 0x01)); 461 | }; 462 | 463 | if (o4->m_cnt == 0) { 464 | static bit_model_t see_01 = {{0, 1}}; 465 | return &see_01; // no symbols under current context -- always escape 466 | } 467 | auto curcnt = o4->m_cnt; 468 | auto lowsum = current_o2()->m_sum; 469 | auto lowcnt = current_o2()->m_cnt; 470 | auto context = 0 471 | | ((m_context >> 6) & 0x03) << 0 472 | | ((m_context >> 14) & 0x03) << 2 473 | | ((m_context >> 22) & 0x03) << 4 474 | | m_see_last_esc << 6; 475 | 476 | if (curcnt == 1) { 477 | // QUANTIZE(binary) = (sum[3] | lowcnt[2] | lowsum[1] | bin_symbol[3] | last_esc[1] | previous symbols[6]) 478 | context |= 0 479 | | (o4->m_symbols[0].m_sym >> 5) << 7 480 | | (lowsum >= 5) << 10 481 | | std::min(log2i(curcnt / 2), 3) << 11 482 | | std::min(log2i(o4->m_sum / 3), 7) << 13 483 | | 1 << 16; 484 | return &m_see[context]; 485 | } else { 486 | // QUANTIZE = (sum[3] | curcnt[2] | lowsum[1] | (lowcnt - curcnt)[3] | last_esc[1] | previous symbols[6]) 487 | context |= 0 488 | | std::min(log2i(std::max(lowcnt - curcnt, 0) / 2), 3) << 7 489 | | (lowsum >= 5) << 10 490 | | std::min(log2i(curcnt / 2), 3) << 11 491 | | std::min(log2i(o4->m_sum / 8), 7) << 13 492 | | 0 << 16; 493 | return &m_see[context]; 494 | } 495 | return nullptr; 496 | } 497 | 498 | sparse_model_t* current_o4() { 499 | if (m_o4_count >= PPM_O4_BUCKET_SIZE * 5) { // too many o4-context/symbol nodes? 500 | for (auto bucket: m_o4_buckets) { 501 | auto it0 = bucket; 502 | auto it1 = bucket ? bucket->m_next : NULL; 503 | while (it1) { // clear nodes: non most recent nodes with visited=1 504 | if ((it1->m_visited /= 2) == 0) { 505 | it0->m_next = it1->m_next; 506 | delete it1; 507 | m_o4_count -= 1; 508 | it1 = it0->m_next; 509 | continue; 510 | } 511 | it0 = it1; 512 | it1 = it1->m_next; 513 | } 514 | } 515 | } 516 | 517 | auto compacted_context = 0 | (m_context & 0xc0ffffffffff); 518 | 519 | auto& bucket = m_o4_buckets[((compacted_context >> 16) * 13131 + compacted_context) % PPM_O4_BUCKET_SIZE]; 520 | auto it0 = bucket; 521 | auto it1 = bucket; 522 | while (it1 != nullptr) { 523 | if (it1->m_context == compacted_context) { // found -- bring to front 524 | if (it1 != bucket) { 525 | it0->m_next = it1->m_next; 526 | it1->m_next = bucket; 527 | bucket = it1; 528 | } 529 | it1->m_visited += (it1->m_visited < 255); 530 | return it1; 531 | } 532 | it0 = it1; 533 | it1 = it1->m_next; 534 | } 535 | auto new_node = new sparse_model_t(); // not found -- create a new one 536 | new_node->m_context = compacted_context; 537 | new_node->m_visited = 1; 538 | new_node->m_next = bucket; 539 | bucket = new_node; 540 | m_o4_count++; 541 | return new_node; 542 | } 543 | dense_model_t* current_o2() { return &m_o2[m_context & 0xffff]; } 544 | dense_model_t* current_o1() { return &m_o1[m_context & 0x00ff]; } 545 | dense_model_t* current_o0() { return &m_o0[0]; } 546 | 547 | void encode(rc_encoder_t* coder, int c) { 548 | auto o4 = current_o4(); 549 | auto o2 = current_o2(); 550 | auto o1 = current_o1(); 551 | auto o0 = current_o0(); 552 | auto order = 0; 553 | auto exclude = std::bitset<256>(); 554 | 555 | while (-1) { 556 | order = 4; if (o4->encode(current_see(o4), coder, c, exclude)) break; 557 | order = 2; if (o2->encode(coder, exclude, c)) break; 558 | order = 1; if (o1->encode(coder, exclude, c)) break; 559 | order = 0; if (o0->encode(coder, exclude, c)) break; 560 | 561 | // decode with o(-1) 562 | auto cum = 0; 563 | for (auto i = 0; i < c; i++) { 564 | cum += !exclude[i]; 565 | } 566 | coder->encode(cum, 1, 256 - exclude.count()); 567 | break; 568 | } 569 | switch (order) { // fall-through switch 570 | case 0: o0->update(c); 571 | case 1: o1->update(c); 572 | case 2: o2->update(c); 573 | case 4: o4->update(o2, c); 574 | } 575 | m_see_last_esc = (order == 4); 576 | } 577 | 578 | // main ppm-decode method 579 | int decode(rc_decoder_t* coder) { 580 | auto o4 = current_o4(); 581 | auto o2 = current_o2(); 582 | auto o1 = current_o1(); 583 | auto o0 = current_o0(); 584 | auto order = 0; 585 | auto c = 0; 586 | auto exclude = std::bitset<256>(); 587 | 588 | while (-1) { 589 | order = 4; if ((c = o4->decode(current_see(o4), coder, exclude)) != -1) break; 590 | order = 2; if ((c = o2->decode(coder, exclude)) != -1) break; 591 | order = 1; if ((c = o1->decode(coder, exclude)) != -1) break; 592 | order = 0; if ((c = o0->decode(coder, exclude)) != -1) break; 593 | 594 | // decode with o(-1) 595 | auto decode_cum = coder->decode_cum(256 - exclude.count()); 596 | auto cum = 0; 597 | for (c = 0; cum + !exclude[c] <= decode_cum; c++) { 598 | cum += !exclude[c]; 599 | } 600 | coder->decode(cum, 1); 601 | break; 602 | } 603 | switch (order) { // fall-through switch 604 | case 0: o0->update(c); 605 | case 1: o1->update(c); 606 | case 2: o2->update(c); 607 | case 4: o4->update(o2, c); 608 | } 609 | m_see_last_esc = (order == 4); 610 | return c; 611 | } 612 | 613 | void update_context(int c) { 614 | m_context = m_context << 8 | c; 615 | } 616 | }; 617 | 618 | /******************************************************************************* 619 | * Matcher 620 | ******************************************************************************/ 621 | struct matcher_t { 622 | static const auto match_min = 12; 623 | static const auto match_max = 255; 624 | std::array m_lzp; // lzp = pos[32] + checksum[16] + prefetch[16] 625 | 626 | matcher_t() { 627 | m_lzp.fill(0); 628 | } 629 | 630 | static uint32_t hash2(unsigned char* p) { 631 | return uint32_t(p[1] * 1919191 + p[0]) % 1048576; 632 | } 633 | static uint32_t hash5(unsigned char* p) { 634 | return uint32_t(p[0] * 1717171 + p[1] * 17171 + p[2] * 171 + p[3]) % 1048576; 635 | } 636 | static uint32_t hash8(unsigned char* p) { 637 | return uint32_t( 638 | p[0] * 13131313 + p[1] * 1313131 + p[2] * 131313 + p[3] * 13131 + 639 | p[4] * 1313 + p[5] * 131 + p[6] * 13 + p[7] * 1) % 1048576; 640 | } 641 | 642 | uint64_t getlzp(unsigned char* data, uint32_t pos) { 643 | if (pos >= 8) { 644 | auto lzp8 = m_lzp[hash8(data + pos - 8)]; 645 | auto lzp5 = m_lzp[hash5(data + pos - 5)]; 646 | auto lzp2 = m_lzp[hash2(data + pos - 2)]; 647 | if ((lzp8 >> 32 & 0xffff) == *(uint16_t*)(data + pos - 2) && (lzp8 & 0xffffffff) != 0) return lzp8; 648 | if ((lzp5 >> 32 & 0xffff) == *(uint16_t*)(data + pos - 2) && (lzp5 & 0xffffffff) != 0) return lzp5; 649 | if ((lzp2 >> 32 & 0xffff) == *(uint16_t*)(data + pos - 2) && (lzp2 & 0xffffffff) != 0) return lzp2; 650 | } 651 | return 0; 652 | } 653 | 654 | uint32_t getpos(unsigned char* data, uint32_t pos) { 655 | return getlzp(data, pos) & 0xffffffff; 656 | } 657 | 658 | uint32_t lookup(unsigned char* data, uint32_t data_size, uint32_t pos, int do_lazy_match = 1, int maxlen = match_max) { 659 | auto match_lzp = getlzp(data, pos); 660 | if ((match_lzp >> 48 & 0xffff) != *(uint16_t*)(data + pos + match_min - 2)) { 661 | return 1; 662 | } 663 | auto match_pos = match_lzp & 0xffffffff; 664 | auto match_len = 0; 665 | if (match_pos > 0) { 666 | while (match_pos + match_len < data_size 667 | && match_len < maxlen 668 | && data[match_pos + match_len] == data[pos + match_len]) { 669 | match_len++; 670 | } 671 | } 672 | if (do_lazy_match) { 673 | auto next_match_len = lookup(data, data_size, pos + 1, 0, match_len + 2); 674 | if (match_len + 1 < next_match_len) { 675 | return 1; 676 | } 677 | } 678 | return (match_len >= match_min) ? match_len : 1; 679 | } 680 | 681 | void update(unsigned char* data, uint32_t pos) { 682 | if (pos >= 8) { // avoid overflow 683 | (m_lzp[hash8(data + pos - 8)] = 684 | m_lzp[hash5(data + pos - 5)] = 685 | m_lzp[hash2(data + pos - 2)] = (0 686 | | (uint64_t) pos 687 | | (uint64_t) *(uint16_t*) (data + pos - 2) << 32 688 | | (uint64_t) *(uint16_t*) (data + pos + match_min - 2) << 48)); 689 | } 690 | } 691 | }; 692 | 693 | /******************************************************************************* 694 | * Codec 695 | ******************************************************************************/ 696 | static const auto BLOCK_SIZE = 16777216; 697 | static const auto MATCH_LENS_SIZE = 64000; 698 | 699 | void zmolly_encode(std::istream& orig, std::ostream& comp) { 700 | auto ppm = std::make_unique(); 701 | auto orig_data = std::make_unique(BLOCK_SIZE); 702 | 703 | while (orig.peek() != EOF) { 704 | orig.read((char*) &orig_data[0], BLOCK_SIZE); 705 | auto orig_size = orig.gcount(); 706 | 707 | // find escape char 708 | auto counts = std::array(); 709 | auto escape = 0; 710 | for (auto i = 0; i < orig_size; i++) { 711 | counts[orig_data[i]]++; 712 | } 713 | for (auto i = 0; i < 256; i++) { 714 | escape = counts[escape] < counts[i] ? escape : i; 715 | } 716 | 717 | auto comp_start_pos = comp.tellp(); 718 | auto matcher = std::make_unique(); 719 | comp.put(escape); 720 | 721 | auto coder = rc_encoder_t(comp); 722 | auto orig_pos = size_t(0); 723 | 724 | auto match_idx = 0; 725 | auto match_pos = 0; 726 | auto thread = std::thread(); 727 | auto match_lens1 = std::array(); 728 | auto match_lens2 = std::array(); 729 | auto match_lens_current = &match_lens1; 730 | auto func_matching_thread = [&](auto match_lens) { 731 | auto match_idx = 0; 732 | while (std::streampos(match_pos) < orig_size && match_idx < MATCH_LENS_SIZE) { 733 | auto match_len = matcher->lookup(&orig_data[0], orig_size, match_pos); 734 | for (auto i = 0; i < match_len; i++) { 735 | matcher->update(&orig_data[0], match_pos + i); 736 | } 737 | match_pos += match_len; 738 | match_lens[match_idx++] = match_len; 739 | } 740 | }; 741 | 742 | // start thread (matching first block) 743 | thread = std::thread(func_matching_thread, &match_lens1[0]); thread.join(); 744 | thread = std::thread(func_matching_thread, &match_lens2[0]); 745 | 746 | while (orig_pos < orig_size) { 747 | // find match in separated thread 748 | if (match_idx >= MATCH_LENS_SIZE) { // start the next matching thread 749 | thread.join(); 750 | thread = std::thread(func_matching_thread, &match_lens_current->operator[](0)); 751 | match_lens_current = (*match_lens_current == match_lens1) ? &match_lens2 : &match_lens1; 752 | match_idx = 0; 753 | } 754 | auto match_len = match_lens_current->operator[](match_idx++); 755 | 756 | if (match_len > 1) { // encode a match 757 | ppm->encode(&coder, escape); 758 | ppm->update_context(escape); 759 | ppm->encode(&coder, match_len); 760 | ppm->update_context(match_len); 761 | for (auto i = 0; i < match_len; i++) { 762 | ppm->update_context(orig_data[orig_pos++]); 763 | } 764 | 765 | } else { // encode a literal 766 | ppm->encode(&coder, orig_data[orig_pos]); 767 | ppm->update_context(orig_data[orig_pos]); 768 | if (orig_data[orig_pos] == escape) { 769 | ppm->encode(&coder, 0); 770 | ppm->update_context(0); 771 | } 772 | orig_pos++; 773 | } 774 | } 775 | thread.join(); 776 | ppm->encode(&coder, escape); // write end of block code 777 | ppm->update_context(escape); 778 | ppm->encode(&coder, orig.peek() != EOF ? 1 : 2); // 1: end of block, 2: end of input 779 | coder.flush(); 780 | fprintf(stderr, "encode-block: %zu => %zu\n", orig_pos, size_t(comp.tellp() - comp_start_pos)); 781 | } 782 | } 783 | 784 | void zmolly_decode(std::istream& comp, std::ostream& orig) { 785 | auto ppm = std::make_unique(); 786 | auto end_of_input = false; 787 | auto orig_data = std::make_unique(BLOCK_SIZE + 1024); 788 | 789 | while (!end_of_input) { 790 | auto end_of_block = false; 791 | auto comp_start_pos = comp.tellg(); 792 | auto matcher = std::make_unique(); 793 | auto escape = comp.get(); 794 | auto coder = rc_decoder_t(comp); 795 | auto orig_pos = size_t(0); 796 | 797 | while (!end_of_block) { 798 | auto c = ppm->decode(&coder); 799 | ppm->update_context(c); 800 | if (c != escape) { // literal 801 | orig_data[orig_pos] = c; 802 | matcher->update(&orig_data[0], orig_pos); 803 | orig_pos++; 804 | } else { 805 | auto match_len = ppm->decode(&coder); 806 | if (match_len >= matcher_t::match_min && match_len <= matcher_t::match_max) { // match 807 | auto match_pos = matcher->getpos(&orig_data[0], orig_pos); 808 | for (auto i = 0; i < match_len; i++) { // update context 809 | orig_data[orig_pos] = orig_data[match_pos]; 810 | ppm->update_context(orig_data[orig_pos]); 811 | matcher->update(&orig_data[0], orig_pos); 812 | orig_pos++; 813 | match_pos++; 814 | } 815 | } else if (match_len == 0) { // escape literal 816 | orig_data[orig_pos] = escape; 817 | ppm->update_context(orig_data[orig_pos]); 818 | matcher->update(&orig_data[0], orig_pos); 819 | orig_pos++; 820 | } else if (match_len == 1) { // end of block 821 | end_of_block = true; 822 | } else if (match_len == 2) { // end of block 823 | end_of_block = true; 824 | end_of_input = true; 825 | } else { 826 | throw std::runtime_error("invalid input data"); 827 | } 828 | } 829 | if (orig_pos > BLOCK_SIZE) { 830 | throw std::runtime_error("invalid input data"); 831 | } 832 | } 833 | orig.write((char*) &orig_data[0], orig_pos); 834 | fprintf(stderr, "decode-block: %zu <= %zu\n", orig_pos, size_t(comp.tellg() - comp_start_pos)); 835 | } 836 | } 837 | 838 | /******************************************************************************* 839 | * Main 840 | ******************************************************************************/ 841 | int main(int argc, char** argv) { 842 | fprintf(stderr, 843 | "zmolly:\n" 844 | " simple LZP/PPM data compressor.\n" 845 | " author: Zhang Li \n" 846 | "usage:\n" 847 | " encode: zmolly e inputFile outputFile\n" 848 | " decode: zmolly d inputFile outputFile\n"); 849 | 850 | // check args 851 | if (argc != 4) { 852 | throw std::runtime_error(std::string() + "invalid number of arguments"); 853 | } 854 | if (std::string() + argv[1] != "e" && std::string() + argv[1] != std::string("d")) { 855 | throw std::runtime_error(std::string() + "error: invalid mode: " + argv[1]); 856 | } 857 | 858 | // open input file 859 | auto fin = std::ifstream(std::string() + argv[2], std::ios::in | std::ios::binary); 860 | fin.exceptions(std::ios_base::failbit); 861 | if (!fin.is_open()) { 862 | throw std::runtime_error(std::string() + "cannot open input file: " + argv[2]); 863 | } 864 | 865 | // open output file 866 | auto fout = std::ofstream(argv[3], std::ios::out | std::ios::binary); 867 | fin.exceptions(std::ios_base::failbit); 868 | if (!fout.is_open()) { 869 | throw std::runtime_error(std::string() + "cannot open output file: " + argv[3]); 870 | } 871 | 872 | // encode/decode 873 | if (std::string() + argv[1] == "e") zmolly_encode(fin, fout); 874 | if (std::string() + argv[1] == "d") zmolly_decode(fin, fout); 875 | return 0; 876 | } 877 | --------------------------------------------------------------------------------