├── CMakeLists.txt ├── LICENSE ├── README.md └── src ├── CMakeLists.txt ├── executable ├── CMakeLists.txt └── m99 │ ├── CMakeLists.txt │ └── main.cpp ├── include ├── endian.h └── endian │ ├── byte_swap.h │ ├── endian.h │ ├── endian_swap.h │ └── endian_type.h └── library ├── CMakeLists.txt ├── m99.h └── m99 ├── CMakeLists.txt ├── m99_decode.cpp ├── m99_decode.h ├── m99_decode_stream.cpp ├── m99_decode_stream.h ├── m99_encode.cpp ├── m99_encode.h ├── m99_encode_stream.cpp └── m99_encode_stream.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16.3) 2 | 3 | project(m99) 4 | 5 | 6 | if(NOT CMAKE_BUILD_TYPE) 7 | set(CMAKE_BUILD_TYPE Release) 8 | message("*** Build type not set. defaulting to Release") 9 | endif() 10 | 11 | if (CMAKE_BUILD_TYPE STREQUAL "Debug") 12 | add_compile_options( 13 | -g 14 | -O0 15 | -march=native 16 | ) 17 | else() 18 | add_compile_options( 19 | -O3 20 | -march=native 21 | ) 22 | endif() 23 | 24 | 25 | option(M99_BUILD_DEMO "Build the CLI demo" ON) 26 | 27 | 28 | include(FetchContent) 29 | 30 | set(IO_BUILD_DEMO OFF CACHE INTERNAL "") 31 | FetchContent_Declare( 32 | io 33 | GIT_REPOSITORY https://github.com/michaelmaniscalco/io.git 34 | GIT_TAG master 35 | SOURCE_DIR "${CMAKE_BINARY_DIR}/io-src" 36 | BINARY_DIR "${CMAKE_BINARY_DIR}/io-build" 37 | INSTALL_DIR "${CMAKE_BINARY_DIR}" 38 | INSTALL_COMMAND "" 39 | ) 40 | FetchContent_MakeAvailable(io) 41 | FetchContent_GetProperties(io) 42 | 43 | 44 | set(ENTROPY_BUILD_DEMO OFF CACHE INTERNAL "") 45 | FetchContent_Declare( 46 | entropy 47 | GIT_REPOSITORY https://github.com/michaelmaniscalco/entropy.git 48 | GIT_TAG master 49 | SOURCE_DIR "${CMAKE_BINARY_DIR}/entropy-src" 50 | BINARY_DIR "${CMAKE_BINARY_DIR}/entropy-build" 51 | INSTALL_DIR "${CMAKE_BINARY_DIR}" 52 | INSTALL_COMMAND "" 53 | ) 54 | FetchContent_MakeAvailable(entropy) 55 | FetchContent_GetProperties(entropy) 56 | 57 | 58 | 59 | add_subdirectory(src) 60 | 61 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 62 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 63 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Michael Maniscalco 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # m99 2 | 3 | novel high performance BWT compression algorithm 4 | 5 | ``` 6 | mkdir build 7 | cd build 8 | cmake .. 9 | make 10 | ``` 11 | 12 | 13 | To build demo (default=ON): 14 | 15 | ``` 16 | mkdir build 17 | cd build 18 | cmake -DM99_BUILD_DEMO=ON .. 19 | make 20 | ``` 21 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(_m99_include_dir ${CMAKE_CURRENT_SOURCE_DIR}) 2 | 3 | add_subdirectory(library) 4 | add_subdirectory(executable) 5 | -------------------------------------------------------------------------------- /src/executable/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if (M99_BUILD_DEMO) 2 | add_subdirectory(m99) 3 | endif() -------------------------------------------------------------------------------- /src/executable/m99/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | include(FetchContent) 3 | 4 | FetchContent_Declare( 5 | msufsort 6 | GIT_REPOSITORY https://github.com/michaelmaniscalco/msufsort.git 7 | GIT_TAG master 8 | SOURCE_DIR "${CMAKE_BINARY_DIR}/msufsort-src" 9 | BINARY_DIR "${CMAKE_BINARY_DIR}/msufsort-build" 10 | INSTALL_DIR "${CMAKE_BINARY_DIR}" 11 | INSTALL_COMMAND "" 12 | ) 13 | FetchContent_MakeAvailable(msufsort) 14 | FetchContent_GetProperties(msufsort) 15 | 16 | find_library(LIBCXX_LIB c++) 17 | find_package(Threads) 18 | find_library(LIBCXXABI_LIB c++abi) 19 | 20 | link_libraries( 21 | ${LIBCXX_LIB} 22 | ${LIBCXXABI_LIB} 23 | ) 24 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 25 | 26 | add_executable(m99_demo main.cpp) 27 | 28 | target_link_libraries(m99_demo ${CMAKE_THREAD_LIBS_INIT} m99 msufsort) -------------------------------------------------------------------------------- /src/executable/m99/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | 17 | namespace 18 | { 19 | 20 | static auto constexpr max_encode_block_size = (1ull << 20); 21 | 22 | struct block_header 23 | { 24 | std::uint32_t blockSize_; 25 | std::uint32_t sentinelIndex_; 26 | }; 27 | 28 | 29 | //================================================================================================================== 30 | std::vector load_file 31 | ( 32 | char const * path 33 | ) 34 | { 35 | // read data from file 36 | std::vector input; 37 | std::ifstream inputStream(path, std::ios_base::in | std::ios_base::binary); 38 | if (!inputStream.is_open()) 39 | { 40 | std::cout << "failed to open file \"" << path << "\"" << std::endl; 41 | return std::vector(); 42 | } 43 | 44 | inputStream.seekg(0, std::ios_base::end); 45 | std::size_t size = inputStream.tellg(); 46 | input.resize(size); 47 | inputStream.seekg(0, std::ios_base::beg); 48 | inputStream.read(input.data(), input.size()); 49 | inputStream.close(); 50 | return input; 51 | } 52 | 53 | 54 | //================================================================================================================== 55 | void encode_block 56 | ( 57 | // single threaded 58 | std::uint8_t const * inputBegin, 59 | std::uint8_t const * inputEnd, 60 | std::ofstream & outStream 61 | ) 62 | { 63 | // transform input (BWT) 64 | auto sentinelIndex = maniscalco::forward_burrows_wheeler_transform(inputBegin, inputEnd, 1); 65 | 66 | // write header for input 67 | block_header blockHeader 68 | { 69 | .blockSize_ = std::distance(inputBegin, inputEnd), 70 | .sentinelIndex_ = sentinelIndex 71 | }; 72 | outStream.write((char const *)&blockHeader, sizeof(blockHeader)); 73 | 74 | std::uint32_t subBlockId{0}; 75 | // encode next available sub block until there are none remaining 76 | std::uint32_t currentSubBlockId = subBlockId++; 77 | auto blockBegin = inputBegin + (currentSubBlockId * max_encode_block_size); 78 | while (blockBegin < inputEnd) 79 | { 80 | auto blockEnd = (blockBegin + max_encode_block_size); 81 | if (blockEnd > inputEnd) 82 | blockEnd = inputEnd; 83 | // create encode stream and encode this subblock 84 | maniscalco::m99_encode_stream encodeStream; 85 | maniscalco::m99_encode(blockBegin, blockEnd, encodeStream); 86 | encodeStream.flush(); 87 | // write this encoded sub block to the destination 88 | auto encodedSize = ((encodeStream.size() + 7) / 8); 89 | outStream.write((char const *)&encodedSize, 4); 90 | outStream.write((char const *)¤tSubBlockId, 4); 91 | // write the encoded data for the stream 92 | for (auto const & packet : encodeStream) 93 | { 94 | auto bytesToWrite = ((packet.size() + 7) / 8); 95 | auto address = (packet.data() + packet.capacity() - bytesToWrite); 96 | outStream.write((char const *)address, bytesToWrite); 97 | } 98 | currentSubBlockId = subBlockId++; 99 | blockBegin = inputBegin + (currentSubBlockId * max_encode_block_size); 100 | } 101 | } 102 | 103 | 104 | 105 | //================================================================================================================== 106 | void encode_block 107 | ( 108 | std::uint8_t const * inputBegin, 109 | std::uint8_t const * inputEnd, 110 | std::ofstream & outStream, 111 | std::size_t numThreads 112 | ) 113 | { 114 | // transform input (BWT) 115 | auto sentinelIndex = maniscalco::forward_burrows_wheeler_transform(inputBegin, inputEnd, numThreads); 116 | 117 | // write header for input 118 | block_header blockHeader 119 | { 120 | .blockSize_ = std::distance(inputBegin, inputEnd), 121 | .sentinelIndex_ = sentinelIndex 122 | }; 123 | outStream.write((char const *)&blockHeader, sizeof(blockHeader)); 124 | 125 | // create worker threads for encoding 126 | std::vector threads; 127 | threads.resize(numThreads); 128 | 129 | // set threads to process sub blocks of the input 130 | std::atomic subBlockId{0}; 131 | std::mutex mutex; 132 | for (auto & thread : threads) 133 | { 134 | thread = std::thread([&]() 135 | { 136 | // encode next available sub block until there are none remaining 137 | std::uint32_t currentSubBlockId = subBlockId++; 138 | auto blockBegin = inputBegin + (currentSubBlockId * max_encode_block_size); 139 | while (blockBegin < inputEnd) 140 | { 141 | auto blockEnd = (blockBegin + max_encode_block_size); 142 | if (blockEnd > inputEnd) 143 | blockEnd = inputEnd; 144 | // create encode stream and encode this subblock 145 | maniscalco::m99_encode_stream encodeStream; 146 | maniscalco::m99_encode(blockBegin, blockEnd, encodeStream); 147 | encodeStream.flush(); 148 | // write this encoded sub block to the destination 149 | std::lock_guard lockGuard(mutex); 150 | auto encodedSize = ((encodeStream.size() + 7) / 8); 151 | outStream.write((char const *)&encodedSize, 4); 152 | outStream.write((char const *)¤tSubBlockId, 4); 153 | // write the encoded data for the stream 154 | for (auto const & packet : encodeStream) 155 | { 156 | auto bytesToWrite = ((packet.size() + 7) / 8); 157 | auto address = (packet.data() + packet.capacity() - bytesToWrite); 158 | outStream.write((char const *)address, bytesToWrite); 159 | } 160 | currentSubBlockId = subBlockId++; 161 | blockBegin = inputBegin + (currentSubBlockId * max_encode_block_size); 162 | } 163 | }); 164 | } 165 | // wait for threads to complete encoding 166 | for (auto & thread : threads) 167 | thread.join(); 168 | } 169 | 170 | 171 | //================================================================================================================== 172 | void decode_block 173 | ( 174 | std::ifstream & inStream, 175 | std::ofstream & outStream, 176 | std::uint32_t numThreads 177 | ) 178 | { 179 | // read header for block 180 | block_header blockHeader; 181 | inStream.read((char *)&blockHeader, sizeof(blockHeader)); 182 | std::uint32_t bytesPerSubBlock = ((blockHeader.blockSize_ + numThreads - 1) / numThreads); 183 | 184 | // allocate space for decoded block data 185 | std::vector output; 186 | output.resize(blockHeader.blockSize_); 187 | auto outputBegin = output.data(); 188 | auto outputEnd = (outputBegin + output.size()); 189 | 190 | // create decode threads 191 | std::vector threads; 192 | threads.resize(numThreads - 1); 193 | 194 | std::atomic numSubBlocksToDecode((blockHeader.blockSize_ + max_encode_block_size - 1) / max_encode_block_size); 195 | auto n = numSubBlocksToDecode.load(); 196 | 197 | std::mutex mutex; 198 | for (auto & thread : threads) 199 | { 200 | thread = std::thread([&]() 201 | { 202 | while (true) 203 | { 204 | maniscalco::buffer encodedData; 205 | std::uint32_t encodedSize = 0; 206 | std::uint32_t subBlockId = 0; 207 | { 208 | std::lock_guard lockGuard(mutex); 209 | if (numSubBlocksToDecode < 1) 210 | return; // no more work to do 211 | 212 | --numSubBlocksToDecode; 213 | // read next compress subblock from source 214 | inStream.read((char *)&encodedSize, 4); 215 | inStream.read((char *)&subBlockId, 4); 216 | // read encoded sub block data 217 | encodedData = std::move(maniscalco::buffer(encodedSize)); 218 | inStream.read((char *)encodedData.data(), encodedSize); 219 | } 220 | auto destinationBegin = (outputBegin + (subBlockId * max_encode_block_size)); 221 | auto destinationEnd = (destinationBegin + max_encode_block_size); 222 | if (destinationEnd > outputEnd) 223 | destinationEnd = outputEnd; 224 | maniscalco::m99_decode_stream decodeStream(std::move(encodedData), encodedSize); 225 | maniscalco::m99_decode(decodeStream, destinationBegin, destinationEnd); 226 | } 227 | }); 228 | } 229 | 230 | // wait for all subblocks to be decoded 231 | for (auto & thread : threads) 232 | thread.join(); 233 | 234 | // reverse the BWT 235 | maniscalco::reverse_burrows_wheeler_transform(output.begin(), output.end(), blockHeader.sentinelIndex_, numThreads); 236 | outStream.write((char const *)&*outputBegin, output.size()); 237 | } 238 | 239 | 240 | //================================================================================================================== 241 | void decode_block 242 | ( 243 | // single threaded 244 | std::ifstream & inStream, 245 | std::ofstream & outStream 246 | ) 247 | { 248 | // read header for block 249 | block_header blockHeader; 250 | inStream.read((char *)&blockHeader, sizeof(blockHeader)); 251 | std::uint32_t bytesPerSubBlock = blockHeader.blockSize_; 252 | 253 | // allocate space for decoded block data 254 | std::vector output; 255 | output.resize(blockHeader.blockSize_); 256 | auto outputBegin = output.data(); 257 | auto outputEnd = (outputBegin + output.size()); 258 | 259 | std::uint32_t numSubBlocksToDecode((blockHeader.blockSize_ + max_encode_block_size - 1) / max_encode_block_size); 260 | while (numSubBlocksToDecode-- > 0) 261 | { 262 | maniscalco::buffer encodedData; 263 | std::uint32_t encodedSize = 0; 264 | std::uint32_t subBlockId = 0; 265 | // read next compress subblock from source 266 | inStream.read((char *)&encodedSize, 4); 267 | inStream.read((char *)&subBlockId, 4); 268 | // read encoded sub block data 269 | encodedData = std::move(maniscalco::buffer(encodedSize)); 270 | inStream.read((char *)encodedData.data(), encodedSize); 271 | auto destinationBegin = (outputBegin + (subBlockId * max_encode_block_size)); 272 | auto destinationEnd = (destinationBegin + max_encode_block_size); 273 | if (destinationEnd > outputEnd) 274 | destinationEnd = outputEnd; 275 | maniscalco::m99_decode_stream decodeStream(std::move(encodedData), encodedSize); 276 | maniscalco::m99_decode(decodeStream, destinationBegin, destinationEnd); 277 | } 278 | // reverse the BWT 279 | maniscalco::reverse_burrows_wheeler_transform(output.begin(), output.end(), blockHeader.sentinelIndex_, 1); 280 | outStream.write((char const *)&*outputBegin, output.size()); 281 | } 282 | 283 | 284 | //================================================================================================================== 285 | void print_about 286 | ( 287 | ) 288 | { 289 | std::cout << "m99 - high performance BWT compressor. Author: M.A. Maniscalco (1999 - 2020)" << std::endl; 290 | } 291 | 292 | 293 | //================================================================================================================== 294 | std::int32_t print_usage 295 | ( 296 | ) 297 | { 298 | std::cout << "Usage: m99 [e|d] inputFile outputFile [switches]" << std::endl; 299 | std::cout << "\t -t = threadCount" << std::endl; 300 | std::cout << "\t -b = blockSize (max = 1GB)" << std::endl; 301 | 302 | std::cout << "example: m99 e inputFile outputFile -t8 -b100000" << std::endl; 303 | std::cout << "example: m99 d inputFile outputFile -t8" << std::endl; 304 | return 0; 305 | } 306 | 307 | 308 | //========================================================================== 309 | void decode 310 | ( 311 | char const * inputPath, 312 | char const * outputPath, 313 | int numThreads 314 | ) 315 | { 316 | // read data from file 317 | std::ifstream inputStream(inputPath, std::ios_base::in | std::ios_base::binary); 318 | if (!inputStream.is_open()) 319 | { 320 | std::cout << "failed to open file \"" << inputPath << "\"" << std::endl; 321 | return; 322 | } 323 | 324 | // create the output stream 325 | std::ofstream outStream(outputPath, std::ios_base::out | std::ios_base::binary); 326 | if (!outStream.is_open()) 327 | { 328 | std::cout << "failed to create output file \"" << outputPath << "\"" << std::endl; 329 | return; 330 | } 331 | 332 | auto startTime = std::chrono::system_clock::now(); 333 | inputStream.seekg(0, std::ios_base::end); 334 | auto end = inputStream.tellg(); 335 | inputStream.seekg(0, std::ios_base::beg); 336 | 337 | if (numThreads == 1) 338 | { 339 | while (inputStream.tellg() != end) 340 | decode_block(inputStream, outStream); 341 | } 342 | else 343 | { 344 | while (inputStream.tellg() != end) 345 | decode_block(inputStream, outStream, numThreads); 346 | } 347 | 348 | auto finishTime = std::chrono::system_clock::now(); 349 | auto elapsedTime = std::chrono::duration_cast(finishTime - startTime).count(); 350 | std::cout << "Elapsed time: " << ((long double)elapsedTime / 1000) << " seconds" << std::endl; 351 | 352 | inputStream.close(); 353 | outStream.close(); 354 | } 355 | 356 | 357 | //================================================================================= 358 | void encode 359 | ( 360 | char const * inputPath, 361 | char const * outputPath, 362 | int numThreads, 363 | int blockSize 364 | ) 365 | { 366 | // create the output stream 367 | std::ofstream outStream(outputPath, std::ios_base::out | std::ios_base::binary); 368 | if (!outStream.is_open()) 369 | { 370 | std::cout << "failed to create output file \"" << outputPath << "\"" << std::endl; 371 | return; 372 | } 373 | 374 | auto startTime = std::chrono::system_clock::now(); 375 | 376 | // read data from file 377 | std::vector input; 378 | input.reserve(blockSize); 379 | std::ifstream inputStream(inputPath, std::ios_base::in | std::ios_base::binary); 380 | if (!inputStream.is_open()) 381 | { 382 | std::cout << "failed to open file \"" << inputPath << "\"" << std::endl; 383 | return; 384 | } 385 | 386 | std::size_t bytesEncoded = 0; 387 | inputStream.seekg(0, std::ios_base::beg); 388 | while (true) 389 | { 390 | inputStream.read((char *)input.data(), input.capacity()); 391 | auto size = inputStream.gcount(); 392 | if (size == 0) 393 | break; 394 | bytesEncoded += size; 395 | if (numThreads == 1) 396 | encode_block(input.data(), input.data() + size, outStream); 397 | else 398 | encode_block(input.data(), input.data() + size, outStream, numThreads); 399 | } 400 | auto finishTime = std::chrono::system_clock::now(); 401 | auto elapsedOverallEncode = std::chrono::duration_cast(finishTime - startTime).count(); 402 | 403 | std::size_t inputSize = bytesEncoded; 404 | std::size_t outputSize = outStream.tellp(); 405 | 406 | std::cout << "compressed: " << inputSize << " -> " << outputSize << " bytes. ratio = " << (((long double)outputSize / inputSize) * 100) << "%" << std::endl; 407 | std::cout << "Elapsed time: " << ((long double)elapsedOverallEncode / 1000) << " seconds : " << (((long double)inputSize / (1 << 20)) / ((double)elapsedOverallEncode / 1000)) << " MB/sec" << std::endl; 408 | 409 | outStream.close(); 410 | inputStream.close(); 411 | } 412 | 413 | } 414 | 415 | 416 | //====================================================================================================================== 417 | std::int32_t main 418 | ( 419 | std::int32_t argCount, 420 | char const * argValue[] 421 | ) 422 | { 423 | print_about(); 424 | 425 | if ((argCount < 4) || (strlen(argValue[1]) != 1)) 426 | return print_usage(); 427 | 428 | std::size_t numThreads = 0; 429 | std::size_t maxBlockSize = (1 << 30); 430 | for (auto argIndex = 4; argIndex < argCount; ++argIndex) 431 | { 432 | if (argValue[argIndex][0] != '-') 433 | return print_usage(); 434 | 435 | switch (argValue[argIndex][1]) 436 | { 437 | case 'b': 438 | { 439 | // block size 440 | maxBlockSize = 0; 441 | auto cur = argValue[argIndex] + 2; 442 | while (*cur != 0) 443 | { 444 | if ((*cur < '0') || (*cur > '9')) 445 | { 446 | std::cout << "invalid block size" << std::endl; 447 | print_usage(); 448 | return -1; 449 | } 450 | maxBlockSize *= 10; 451 | maxBlockSize += (*cur - '0'); 452 | ++cur; 453 | } 454 | if (maxBlockSize > (1 << 30)) 455 | maxBlockSize = (1 << 30); 456 | break; 457 | } 458 | case 't': 459 | { 460 | // thread count 461 | numThreads = 0; 462 | auto cur = argValue[argIndex] + 2; 463 | while (*cur != 0) 464 | { 465 | if ((*cur < '0') || (*cur > '9')) 466 | { 467 | std::cout << "invalid thread count" << std::endl; 468 | print_usage(); 469 | return -1; 470 | } 471 | numThreads *= 10; 472 | numThreads += (*cur - '0'); 473 | ++cur; 474 | } 475 | break; 476 | } 477 | default: 478 | { 479 | std::cout << "unknown switch: " << argValue[argIndex] << std::endl; 480 | return print_usage(); 481 | } 482 | } 483 | } 484 | if ((numThreads == 0) || (numThreads > std::thread::hardware_concurrency())) 485 | numThreads = std::thread::hardware_concurrency(); 486 | 487 | switch (argValue[1][0]) 488 | { 489 | case 'e': 490 | { 491 | encode(argValue[2], argValue[3], numThreads, maxBlockSize); 492 | break; 493 | } 494 | 495 | case 'd': 496 | { 497 | decode(argValue[2], argValue[3], numThreads); 498 | break; 499 | } 500 | 501 | default: 502 | { 503 | print_usage(); 504 | break; 505 | } 506 | } 507 | 508 | return 0; 509 | } 510 | 511 | -------------------------------------------------------------------------------- /src/include/endian.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "./endian/endian.h" 5 | 6 | -------------------------------------------------------------------------------- /src/include/endian/byte_swap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #ifdef __APPLE__ 7 | #include 8 | #else 9 | #include 10 | #endif 11 | 12 | 13 | namespace maniscalco 14 | { 15 | 16 | //============================================================================== 17 | template 18 | auto byte_swap 19 | ( 20 | T value 21 | ) -> typename std::enable_if::type 22 | { 23 | return value; 24 | } 25 | 26 | 27 | //============================================================================== 28 | template 29 | auto byte_swap 30 | ( 31 | T value 32 | ) -> typename std::enable_if::type 33 | { 34 | auto v = static_cast(value); 35 | return static_cast((v >> 8) | (v << 8)); 36 | } 37 | 38 | 39 | //============================================================================== 40 | template 41 | auto byte_swap 42 | ( 43 | T value 44 | ) -> typename std::enable_if::type 45 | { 46 | #ifdef __APPLE__ 47 | return static_cast(OSSwapInt32(static_cast(value))); 48 | #else 49 | return static_cast(__builtin_bswap32(static_cast(value))); 50 | #endif 51 | } 52 | 53 | 54 | //============================================================================== 55 | template 56 | auto byte_swap 57 | ( 58 | T value 59 | ) -> typename std::enable_if::type 60 | { 61 | #ifdef __APPLE__ 62 | return static_cast(OSSwapInt64(static_cast(value))); 63 | #else 64 | return static_cast(__builtin_bswap64(static_cast(value))); 65 | #endif 66 | } 67 | 68 | } // namespace maniscalco 69 | -------------------------------------------------------------------------------- /src/include/endian/endian.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | 5 | namespace maniscalco 6 | { 7 | 8 | template class endian; 9 | 10 | } // namespace maniscalco 11 | 12 | 13 | #include "./byte_swap.h" 14 | #include "./endian_type.h" 15 | #include "./endian_swap.h" 16 | 17 | #include 18 | 19 | 20 | namespace maniscalco 21 | { 22 | 23 | //============================================================================== 24 | template 25 | class endian 26 | { 27 | public: 28 | 29 | using underlying_type = data_type; 30 | using type = endian_type; 31 | 32 | template 33 | friend class endian; 34 | 35 | endian(); 36 | 37 | endian 38 | ( 39 | endian const & 40 | ); 41 | 42 | endian 43 | ( 44 | endian && 45 | ); 46 | 47 | endian 48 | ( 49 | underlying_type 50 | ); 51 | 52 | endian & operator = 53 | ( 54 | endian const & 55 | ); 56 | 57 | endian & operator = 58 | ( 59 | endian && 60 | ); 61 | 62 | endian & operator = 63 | ( 64 | underlying_type 65 | ); 66 | 67 | operator underlying_type() const; 68 | 69 | underlying_type get() const; 70 | 71 | protected: 72 | 73 | private: 74 | 75 | underlying_type value_; 76 | 77 | }; 78 | 79 | template using big_endian = endian; 80 | template using little_endian = endian; 81 | template using network_order = endian; 82 | template using host_order = endian; 83 | 84 | // global operator overloads involving endian types 85 | template inline static bool operator < (input_type a, endian b){return (a < (data_type)b);} 86 | template inline static bool operator < (endian a, data_type b){return ((data_type)a < b);} 87 | template inline static bool operator <= (data_type a, endian b){return (a <= (data_type)b);} 88 | template inline static bool operator <= (endian a, data_type b){return ((data_type)a <= b);} 89 | template inline static bool operator == (data_type a, endian b){return (a == (data_type)b);} 90 | template inline static bool operator == (endian a, data_type b){return ((data_type)a == b);} 91 | template inline static bool operator >= (data_type a, endian b){return (a >= (data_type)b);} 92 | template inline static bool operator >= (endian a, data_type b){return ((data_type)a >= b);} 93 | template inline static bool operator > (data_type a, endian b){return (a > (data_type)b);} 94 | template inline static bool operator > (endian a, data_type b){return ((data_type)a > b);} 95 | template inline static bool operator != (data_type a, endian b){return (a != (data_type)b);} 96 | template inline static bool operator != (endian a, data_type b){return ((data_type)a != b);} 97 | 98 | // static make functions 99 | template big_endian make_big_endian(endian); 100 | template big_endian make_big_endian(data_type); 101 | template little_endian make_little_endian(endian); 102 | template little_endian make_little_endian(data_type); 103 | template host_order make_host_order(endian); 104 | template host_order make_host_order(data_type); 105 | template network_order make_network_order(endian); 106 | template network_order make_network_order(data_type); 107 | 108 | } 109 | 110 | 111 | //============================================================================== 112 | template 113 | maniscalco::endian::endian 114 | ( 115 | ): 116 | value_() 117 | { 118 | } 119 | 120 | 121 | //============================================================================== 122 | template 123 | maniscalco::endian::endian 124 | ( 125 | endian && input 126 | ): 127 | value_(input.value_) 128 | { 129 | } 130 | 131 | 132 | //============================================================================== 133 | template 134 | maniscalco::endian::endian 135 | ( 136 | endian const & input 137 | ): 138 | value_(input.value_) 139 | { 140 | } 141 | 142 | 143 | //============================================================================== 144 | template 145 | maniscalco::endian::endian 146 | ( 147 | data_type input 148 | ): 149 | value_(endian_swap(input)) 150 | { 151 | } 152 | 153 | 154 | //============================================================================== 155 | template 156 | auto maniscalco::endian::operator = 157 | ( 158 | endian const & input 159 | ) -> endian & 160 | { 161 | value_ = input.value_; 162 | return *this; 163 | } 164 | 165 | 166 | //============================================================================== 167 | template 168 | auto maniscalco::endian::operator = 169 | ( 170 | endian && input 171 | ) -> endian & 172 | { 173 | value_ = input.value_; 174 | return *this; 175 | } 176 | 177 | 178 | //============================================================================== 179 | template 180 | auto maniscalco::endian::operator = 181 | ( 182 | data_type input 183 | ) -> endian & 184 | { 185 | value_ = endian_swap(input); 186 | return *this; 187 | } 188 | 189 | 190 | //============================================================================== 191 | template 192 | maniscalco::endian::operator underlying_type 193 | ( 194 | ) const 195 | { 196 | return endian_swap(value_); 197 | } 198 | 199 | 200 | //============================================================================== 201 | template 202 | auto maniscalco::endian::get 203 | ( 204 | ) const -> underlying_type 205 | { 206 | return endian_swap(value_); 207 | } 208 | 209 | 210 | //============================================================================== 211 | template 212 | auto maniscalco::make_big_endian 213 | ( 214 | maniscalco::endian value 215 | ) -> big_endian 216 | { 217 | return big_endian((T)value); 218 | } 219 | 220 | 221 | //============================================================================== 222 | template 223 | auto maniscalco::make_big_endian 224 | ( 225 | T value 226 | ) -> big_endian 227 | { 228 | return big_endian((T)value); 229 | } 230 | 231 | 232 | //============================================================================== 233 | template 234 | auto maniscalco::make_little_endian 235 | ( 236 | endian value 237 | ) -> little_endian 238 | { 239 | return little_endian((T)value); 240 | } 241 | 242 | 243 | //============================================================================== 244 | template 245 | auto maniscalco::make_little_endian 246 | ( 247 | T value 248 | ) -> little_endian 249 | { 250 | return little_endian((T)value); 251 | } 252 | 253 | 254 | //============================================================================== 255 | template 256 | auto maniscalco::make_host_order 257 | ( 258 | endian value 259 | ) -> host_order 260 | { 261 | return host_order((T)value); 262 | } 263 | 264 | 265 | //============================================================================== 266 | template 267 | auto maniscalco::make_host_order 268 | ( 269 | T value 270 | ) -> host_order 271 | { 272 | return host_order((T)value); 273 | } 274 | 275 | 276 | //============================================================================== 277 | template 278 | auto maniscalco::make_network_order 279 | ( 280 | endian value 281 | ) -> network_order 282 | { 283 | return network_order((T)value); 284 | } 285 | 286 | 287 | //============================================================================== 288 | template 289 | auto maniscalco::make_network_order 290 | ( 291 | T value 292 | ) -> network_order 293 | { 294 | return network_order((T)value); 295 | } 296 | -------------------------------------------------------------------------------- /src/include/endian/endian_swap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "./endian_type.h" 4 | #include "./byte_swap.h" 5 | #include 6 | 7 | 8 | namespace maniscalco 9 | { 10 | 11 | namespace impl 12 | { 13 | 14 | template 15 | < 16 | typename, 17 | typename, 18 | typename = void 19 | > 20 | struct endian_swap; 21 | 22 | 23 | //====================================================================== 24 | // specialization for from == to (no byte swap) 25 | template 26 | < 27 | typename from_endian, 28 | typename to_endian 29 | > 30 | struct endian_swap 31 | < 32 | from_endian, 33 | to_endian, 34 | typename std::enable_if 35 | < 36 | std::is_same 37 | < 38 | from_endian, 39 | to_endian 40 | >::value 41 | >::type 42 | > 43 | { 44 | template 45 | inline data_type operator() 46 | ( 47 | data_type input 48 | ) const 49 | { 50 | return input; 51 | } 52 | }; 53 | 54 | 55 | //====================================================================== 56 | // specialization for from != to (do byte swap) 57 | template 58 | < 59 | typename from_endian, 60 | typename to_endian 61 | > 62 | struct endian_swap 63 | < 64 | from_endian, 65 | to_endian, 66 | typename std::enable_if 67 | < 68 | !std::is_same 69 | < 70 | from_endian, 71 | to_endian 72 | >::value 73 | >::type 74 | > 75 | { 76 | template 77 | inline data_type operator() 78 | ( 79 | data_type input 80 | ) const 81 | { 82 | return byte_swap(input); 83 | } 84 | }; 85 | 86 | } 87 | 88 | 89 | //========================================================================== 90 | // static 91 | // do a byte swap from one endian to another as speicified 92 | template 93 | < 94 | typename from_endian, 95 | typename to_endian, 96 | typename data_type 97 | > 98 | static inline data_type endian_swap 99 | ( 100 | data_type input 101 | ) 102 | { 103 | return maniscalco::impl::endian_swap()(input); 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /src/include/endian/endian_type.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | namespace maniscalco 5 | { 6 | 7 | struct big_endian_type; 8 | struct little_endian_type; 9 | 10 | using network_order_type = big_endian_type; 11 | using host_order_type = little_endian_type; 12 | // using host_order_type = big_endian_type; 13 | } 14 | -------------------------------------------------------------------------------- /src/library/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(m99) 2 | -------------------------------------------------------------------------------- /src/library/m99.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "./m99/m99.h" 4 | -------------------------------------------------------------------------------- /src/library/m99/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(m99 2 | m99_decode.cpp 3 | m99_encode.cpp 4 | m99_encode_stream.cpp 5 | m99_decode_stream.cpp 6 | ) 7 | 8 | target_link_libraries(m99 io entropy) 9 | 10 | target_include_directories(m99 11 | PUBLIC 12 | $ 13 | $ 14 | ) 15 | 16 | target_compile_features(m99 PUBLIC cxx_std_17) 17 | -------------------------------------------------------------------------------- /src/library/m99/m99_decode.cpp: -------------------------------------------------------------------------------- 1 | #include "./m99_decode.h" 2 | 3 | #include 4 | 5 | 6 | namespace 7 | { 8 | using namespace maniscalco; 9 | 10 | struct symbol_info 11 | { 12 | symbol_info(){} 13 | symbol_info(std::uint8_t symbol, std::uint32_t count):symbol_(symbol), count_(count){} 14 | std::uint8_t symbol_; 15 | std::uint32_t count_; 16 | }; 17 | 18 | 19 | //====================================================================================================================== 20 | std::uint32_t unpack_value 21 | ( 22 | m99_decode_stream & decodeStream, 23 | std::uint32_t total, 24 | std::uint32_t maxLeft, 25 | std::uint32_t maxRight 26 | ) 27 | { 28 | if (total > maxLeft) 29 | { 30 | auto inferredRight = (total - maxLeft); 31 | maxRight -= inferredRight; 32 | total -= inferredRight; 33 | } 34 | auto left = 0; 35 | if (total > maxRight) 36 | { 37 | left = (total - maxRight); 38 | total -= left; 39 | } 40 | if (total) 41 | { 42 | std::uint32_t codeLength = 1; 43 | while (total >> ++codeLength) 44 | ; 45 | auto code = decodeStream.pop(--codeLength); 46 | if (((code | (1ull << codeLength)) <= total)) 47 | code |= (decodeStream.pop_bit() << codeLength); 48 | left += code; 49 | } 50 | return left; 51 | } 52 | 53 | 54 | //====================================================================================================================== 55 | void split 56 | ( 57 | m99_decode_stream & decodeStream, 58 | std::uint8_t * decodedData, 59 | std::uint32_t totalSize, 60 | std::uint32_t leftSize, 61 | symbol_info const * parentSymbolInfo 62 | ) 63 | { 64 | if (parentSymbolInfo[0].count_ >= totalSize) 65 | { 66 | while (totalSize--) 67 | *decodedData++ = parentSymbolInfo[0].symbol_; 68 | return; 69 | } 70 | 71 | if (totalSize <= 2) 72 | { 73 | if (totalSize == 2) 74 | { 75 | auto c = decodeStream.pop_bit(); 76 | decodedData[c == 1] = parentSymbolInfo[1].symbol_; 77 | decodedData[c == 0] = parentSymbolInfo[0].symbol_; 78 | } 79 | else 80 | { 81 | decodedData[0] = parentSymbolInfo[0].symbol_; 82 | } 83 | return; 84 | } 85 | 86 | std::uint32_t rightSize = (totalSize - leftSize); 87 | symbol_info leftSymbolInfo[256]; 88 | symbol_info rightSymbolInfo[256]; 89 | symbol_info * result[2] = {leftSymbolInfo, rightSymbolInfo}; 90 | symbol_info const * currentSymbolInfo = parentSymbolInfo; 91 | static auto constexpr leftSide = 0; 92 | static auto constexpr rightSide = 1; 93 | 94 | auto leftSizeRemaining = leftSize; 95 | auto rightSizeRemaining = rightSize; 96 | while (leftSizeRemaining && rightSizeRemaining) 97 | { 98 | symbol_info symbolInfo = *currentSymbolInfo++; 99 | auto totalCount = symbolInfo.count_; 100 | auto leftCount = unpack_value(decodeStream, totalCount, leftSizeRemaining, rightSizeRemaining); 101 | auto rightCount = (totalCount - leftCount); 102 | leftSizeRemaining -= leftCount; 103 | rightSizeRemaining -= rightCount; 104 | *result[leftSide] = {symbolInfo.symbol_, leftCount}; 105 | *result[rightSide] = {symbolInfo.symbol_, rightCount}; 106 | result[leftSide] += (leftCount != 0); 107 | result[rightSide] += (rightCount != 0); 108 | } 109 | auto n = leftSizeRemaining + rightSizeRemaining; 110 | symbol_info * c = result[(leftSizeRemaining == 0)]; 111 | while (n > 0) 112 | { 113 | n -= currentSymbolInfo->count_; 114 | *c++ = *currentSymbolInfo++; 115 | } 116 | split(decodeStream, decodedData, leftSize, leftSize >> 1, leftSymbolInfo); 117 | split(decodeStream, decodedData + leftSize, rightSize, rightSize >> 1, rightSymbolInfo); 118 | } 119 | 120 | 121 | } // namespace 122 | 123 | 124 | //====================================================================================================================== 125 | void maniscalco::m99_decode 126 | ( 127 | m99_decode_stream & decodeStream, 128 | std::uint8_t * outputBegin, 129 | std::uint8_t * outputEnd 130 | ) 131 | { 132 | while (!decodeStream.pop(1)) 133 | ; // pop until a 1 bit is decoded. this is start of stream marker. 134 | 135 | // decode the header stream 136 | symbol_info symbolInfo[256]; 137 | auto bytesToDecode = std::distance(outputBegin, outputEnd); 138 | auto n = bytesToDecode; 139 | for (auto i = 0; i < 256; ++i) 140 | { 141 | if (n == 0) 142 | break; 143 | symbolInfo[i].count_ = unpack_value(decodeStream, n, n, n); 144 | symbolInfo[i].symbol_ = decodeStream.pop(8); 145 | n -= symbolInfo[i].count_; 146 | } 147 | 148 | std::uint32_t leftSize = 1; 149 | while (leftSize < bytesToDecode) 150 | leftSize <<= 1; 151 | split(decodeStream, outputBegin, bytesToDecode, leftSize >> 1, symbolInfo); 152 | } 153 | 154 | -------------------------------------------------------------------------------- /src/library/m99/m99_decode.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "./m99_decode_stream.h" 4 | 5 | #include 6 | 7 | 8 | namespace maniscalco 9 | { 10 | 11 | void m99_decode 12 | ( 13 | m99_decode_stream &, 14 | std::uint8_t *, 15 | std::uint8_t * 16 | ); 17 | 18 | } // namespace maniscalco 19 | 20 | -------------------------------------------------------------------------------- /src/library/m99/m99_decode_stream.cpp: -------------------------------------------------------------------------------- 1 | #include "./m99_decode_stream.h" -------------------------------------------------------------------------------- /src/library/m99/m99_decode_stream.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | 9 | namespace maniscalco 10 | { 11 | 12 | class m99_decode_stream 13 | { 14 | public: 15 | 16 | static auto constexpr stream_direction = io::stream_direction::forward; 17 | using stream_type = io::push_stream; 18 | using packet_type = stream_type::packet_type; 19 | 20 | m99_decode_stream 21 | ( 22 | buffer b, 23 | buffer::size_type size 24 | ): 25 | stream_ 26 | ( 27 | {.inputHandler_ = [this](){return std::move(packet_);}} 28 | ), 29 | packet_(std::move(b), 0, size * 8) 30 | { 31 | } 32 | 33 | auto pop 34 | ( 35 | std::size_t codeLength 36 | ) 37 | { 38 | return stream_.pop(codeLength); 39 | } 40 | 41 | auto pop_bit() 42 | { 43 | return stream_.pop_bit(); 44 | } 45 | 46 | private: 47 | 48 | io::forward_pop_stream stream_; 49 | 50 | packet_type packet_; 51 | 52 | }; 53 | } // namespace maniscalco -------------------------------------------------------------------------------- /src/library/m99/m99_encode.cpp: -------------------------------------------------------------------------------- 1 | #include "./m99_encode.h" 2 | 3 | 4 | namespace 5 | { 6 | 7 | using namespace maniscalco; 8 | 9 | struct symbol_info 10 | { 11 | symbol_info(){} 12 | symbol_info(std::uint8_t symbol, std::uint32_t count):symbol_(symbol), count_(count){} 13 | std::uint8_t symbol_; 14 | std::uint32_t count_; 15 | }; 16 | 17 | struct tiny_encode_table_entry_type 18 | { 19 | std::uint32_t value_; 20 | std::uint32_t length_; 21 | }; 22 | 23 | using tiny_encode_table_type = tiny_encode_table_entry_type[8][8][8][8]; 24 | 25 | 26 | tiny_encode_table_type tinyEncodeTable; 27 | auto const initialize = [] 28 | ( 29 | tiny_encode_table_type & result 30 | ) -> bool 31 | { 32 | for (std::uint32_t maxLeft = 0; maxLeft < 8; ++maxLeft) 33 | { 34 | for (std::uint32_t maxRight = 0; maxRight < 8; ++maxRight) 35 | { 36 | for (std::uint32_t left = 0; left <= maxLeft; ++left) 37 | { 38 | for (std::uint32_t right = 0; right <= maxRight; ++right) 39 | { 40 | if ((maxLeft == 2) && (maxRight==2) && ((left+right)==1) && (left==1)) 41 | int y = 9; 42 | std::uint32_t total = left + right; 43 | if (total < 8) 44 | { 45 | std::uint32_t l = left; 46 | std::uint32_t r = right; 47 | std::uint32_t ml = maxLeft; 48 | std::uint32_t mr = maxRight; 49 | std::uint32_t t = (l + r); 50 | if (t > ml) 51 | { 52 | std::uint32_t inferredRight = (t - ml); 53 | mr -= inferredRight; 54 | t -= inferredRight; 55 | } 56 | if (t > mr) 57 | { 58 | std::uint32_t inferredLeft = (t - mr); 59 | l -= inferredLeft; 60 | t -= inferredLeft; 61 | } 62 | std::uint32_t codeLength = 0; 63 | while ((1ull << ++codeLength) <= t) 64 | ; 65 | --codeLength; 66 | auto needMsb = ((l | (1ull << codeLength)) <= t); 67 | auto code = ((l << needMsb) | (l >> codeLength)); 68 | codeLength += needMsb; 69 | 70 | code &= ((1ull << codeLength) - 1); // TEMP 71 | result[maxLeft][maxRight][left][total] = {code, codeLength}; 72 | } 73 | } 74 | } 75 | } 76 | } 77 | return true; 78 | }(tinyEncodeTable); 79 | 80 | 81 | //====================================================================================================================== 82 | void pack_value 83 | ( 84 | m99_encode_stream & encodeStream, 85 | std::uint32_t left, 86 | std::uint32_t total, 87 | std::uint32_t maxLeft, 88 | std::uint32_t maxRight 89 | ) 90 | { 91 | if (total < 8) 92 | { 93 | auto const & encTableEntry = tinyEncodeTable[(maxLeft >= 8) ? 7 : maxLeft][(maxRight >= 8) ? 7 : maxRight][left][total]; 94 | encodeStream.push(encTableEntry.value_, encTableEntry.length_); 95 | return; 96 | } 97 | if (total > maxLeft) 98 | { 99 | auto inferredRight = (total - maxLeft); 100 | maxRight -= inferredRight; 101 | total -= inferredRight; 102 | } 103 | if (total > maxRight) 104 | { 105 | auto inferredLeft = (total - maxRight); 106 | left -= inferredLeft; 107 | total -= inferredLeft; 108 | } 109 | if (total) 110 | { 111 | std::uint32_t codeLength = 1; 112 | while (total >> ++codeLength) 113 | ; 114 | --codeLength; 115 | auto needMsb = ((left | (1ull << codeLength)) <= total); 116 | auto code = ((left << needMsb) | (left >> codeLength)); 117 | codeLength += needMsb; 118 | code &= ((1ull << codeLength) - 1); // TEMP 119 | encodeStream.push(code, codeLength); 120 | } 121 | } 122 | 123 | 124 | //========================================================================== 125 | void merge 126 | ( 127 | m99_encode_stream & encodeStream, 128 | std::uint8_t const * begin, 129 | std::uint32_t totalSize, 130 | std::uint32_t leftSize, 131 | symbol_info * result, 132 | std::uint32_t leadingRunLength 133 | ) 134 | { 135 | if (leadingRunLength >= totalSize) 136 | { 137 | result[0] = {begin[0], totalSize}; 138 | return; 139 | } 140 | if (totalSize <= 2) 141 | { 142 | if (totalSize == 2) 143 | { 144 | auto c = (unsigned)(begin[0] < begin[1]); 145 | result[0] = {begin[!c], 1 + (unsigned)(begin[0] == begin[1])}; 146 | result[1] = {begin[c], 1}; 147 | encodeStream.push(c, begin[0] != begin[1]); 148 | } 149 | else 150 | { 151 | result[0] = {begin[0], 1}; 152 | } 153 | return; 154 | } 155 | 156 | std::uint32_t rightSize = (totalSize - leftSize); 157 | symbol_info left[256]; 158 | symbol_info right[256]; 159 | symbol_info const * current[2] = {left, right}; 160 | symbol_info * resultCurrent = result; 161 | static auto constexpr leftSide = 0; 162 | static auto constexpr rightSide = 1; 163 | auto rightLeadingRunLength = (leadingRunLength > leftSize) ? (leadingRunLength - leftSize) : [](std::uint8_t const * begin, std::uint8_t const * end) 164 | { 165 | auto cur = begin; 166 | auto s = *cur; 167 | while ((cur < end) && (*cur == s)) 168 | ++cur; 169 | return std::distance(begin, cur); 170 | }(begin + leftSize, begin + totalSize); 171 | 172 | merge(encodeStream, begin + leftSize, rightSize, rightSize >> 1, right, rightLeadingRunLength); 173 | merge(encodeStream, begin, leftSize, leftSize >> 1, left, leadingRunLength); 174 | 175 | #pragma pack(push, 1) 176 | using size_union = union size_union 177 | { 178 | size_union(std::uint32_t left, std::uint32_t right):size_({left, right}){}; 179 | std::size_t union_; 180 | struct 181 | { 182 | std::uint32_t left_; 183 | std::uint32_t right_; 184 | } size_; 185 | }; 186 | #pragma pack(pop) 187 | 188 | std::array, 256> valuesToEncode; 189 | std::uint32_t numValuesToEncode{0}; 190 | 191 | size_union partitionSize_(leftSize, rightSize); 192 | while (partitionSize_.size_.left_ && partitionSize_.size_.right_) 193 | { 194 | size_union count( 195 | (-(current[leftSide]->symbol_ <= current[rightSide]->symbol_) & (std::uint32_t)current[leftSide]->count_), 196 | (-(current[rightSide]->symbol_ <= current[leftSide]->symbol_) & (std::uint32_t)current[rightSide]->count_) 197 | ); 198 | auto totalCount = (count.size_.left_ + count.size_.right_); 199 | valuesToEncode[numValuesToEncode++] = {count.size_.left_, totalCount, partitionSize_.size_.left_, partitionSize_.size_.right_}; 200 | partitionSize_.union_ -= count.union_; 201 | *resultCurrent++ = {current[(count.size_.left_ == 0)]->symbol_, totalCount}; 202 | current[leftSide] += (count.size_.left_ != 0); 203 | current[rightSide] += (count.size_.right_ != 0); 204 | } 205 | auto n = partitionSize_.size_.left_ + partitionSize_.size_.right_; 206 | symbol_info const * c = current[(partitionSize_.size_.left_ == 0)]; 207 | while (n > 0) 208 | { 209 | n -= c->count_; 210 | *resultCurrent++ = *c++; 211 | } 212 | 213 | 214 | while (numValuesToEncode) 215 | { 216 | auto [left, total, maxLeft, maxRight] = valuesToEncode[--numValuesToEncode]; 217 | pack_value(encodeStream, left, total, maxLeft, maxRight); 218 | } 219 | } 220 | 221 | } // namespace 222 | 223 | 224 | //========================================================================== 225 | void maniscalco::m99_encode 226 | ( 227 | std::uint8_t const * begin, 228 | std::uint8_t const * end, 229 | m99_encode_stream & encodeStream 230 | 231 | ) 232 | { 233 | // determine initial merge boundary (left size is largest power of 2 that is less than the input size). 234 | std::uint32_t bytesToEncode = std::distance(begin, end); 235 | std::uint32_t leftSize = 1; 236 | while (leftSize < bytesToEncode) 237 | leftSize <<= 1; 238 | symbol_info symbolList[256]; 239 | 240 | // do recursive merge and encode 241 | auto cur = begin; 242 | auto s = *cur; 243 | while ((cur < end) && (*cur == s)) 244 | ++cur; 245 | auto leadingRunLength = std::distance(begin, cur); 246 | merge(encodeStream, begin, bytesToEncode, leftSize >> 1, symbolList, leadingRunLength); 247 | 248 | // encode the symbols and their counts 249 | auto n = bytesToEncode; 250 | std::vector> headerValuesToEncode; 251 | headerValuesToEncode.reserve(256); 252 | for (auto & symbolInfo : symbolList) 253 | { 254 | if (n == 0) 255 | break; 256 | headerValuesToEncode.push_back({symbolInfo.symbol_, symbolInfo.count_, n}); 257 | n -= symbolInfo.count_; 258 | } 259 | std::reverse(headerValuesToEncode.begin(), headerValuesToEncode.end()); 260 | for (auto [symbol, count, maxCount] : headerValuesToEncode) 261 | { 262 | encodeStream.push(symbol, 8); 263 | pack_value(encodeStream, count, maxCount, maxCount, maxCount); 264 | } 265 | encodeStream.push(1, 1); 266 | } 267 | -------------------------------------------------------------------------------- /src/library/m99/m99_encode.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "./m99_encode_stream.h" 4 | 5 | #include 6 | 7 | 8 | namespace maniscalco 9 | { 10 | 11 | void m99_encode 12 | ( 13 | std::uint8_t const *, 14 | std::uint8_t const *, 15 | m99_encode_stream & 16 | ); 17 | 18 | } // namespace maniscalco 19 | 20 | -------------------------------------------------------------------------------- /src/library/m99/m99_encode_stream.cpp: -------------------------------------------------------------------------------- 1 | #include "./m99_encode_stream.h" 2 | 3 | 4 | //============================================================================= 5 | maniscalco::m99_encode_stream::m99_encode_stream 6 | ( 7 | ): 8 | stream_({ 9 | .bufferOutputHandler_ = [this](packet_type packet) 10 | { 11 | packets_.emplace_front(std::move(packet)); 12 | }, 13 | .bufferAllocationHandler_ = [](){return maniscalco::buffer((1 << 10) * 16);} 14 | }) 15 | { 16 | } 17 | 18 | 19 | //============================================================================= 20 | auto maniscalco::m99_encode_stream::begin 21 | ( 22 | ) const -> const_iterator 23 | { 24 | return packets_.begin(); 25 | } 26 | 27 | 28 | //============================================================================= 29 | auto maniscalco::m99_encode_stream::end 30 | ( 31 | ) const -> const_iterator 32 | { 33 | return packets_.end(); 34 | } 35 | 36 | 37 | //============================================================================= 38 | void maniscalco::m99_encode_stream::clear 39 | ( 40 | ) 41 | { 42 | stream_.flush(); 43 | packets_.clear(); 44 | } 45 | 46 | 47 | //============================================================================= 48 | void maniscalco::m99_encode_stream::flush 49 | ( 50 | ) 51 | { 52 | stream_.flush(); 53 | } 54 | 55 | 56 | //============================================================================= 57 | auto maniscalco::m99_encode_stream::size 58 | ( 59 | ) const -> size_type 60 | { 61 | return stream_.size(); 62 | } -------------------------------------------------------------------------------- /src/library/m99/m99_encode_stream.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | 9 | namespace maniscalco 10 | { 11 | 12 | class m99_encode_stream 13 | { 14 | public: 15 | 16 | static auto constexpr stream_direction = io::stream_direction::reverse; 17 | using stream_type = io::push_stream; 18 | using packet_type = stream_type::packet_type; 19 | using element_type = packet_type; 20 | using container_type = std::deque; 21 | using iterator = container_type::iterator; 22 | using const_iterator = container_type::const_iterator; 23 | using size_type = std::size_t; 24 | 25 | m99_encode_stream(); 26 | 27 | template 28 | auto push(T && ... args); 29 | 30 | size_type size() const; 31 | 32 | const_iterator begin() const; 33 | 34 | const_iterator end() const; 35 | 36 | void clear(); 37 | 38 | void flush(); 39 | 40 | //private: 41 | 42 | container_type packets_; 43 | 44 | stream_type stream_; 45 | 46 | }; // class m99_encode_stream 47 | 48 | 49 | } // namespace maniscalco 50 | 51 | 52 | //============================================================================= 53 | template 54 | auto maniscalco::m99_encode_stream::push 55 | ( 56 | T && ... args 57 | ) 58 | { 59 | return stream_.push(std::forward(args) ...); 60 | } 61 | --------------------------------------------------------------------------------