├── CMakeLists.txt
├── LICENSE
├── README.md
└── src
    ├── CMakeLists.txt
    ├── executable
        ├── CMakeLists.txt
        └── m99
        │   ├── CMakeLists.txt
        │   └── main.cpp
    ├── include
        ├── endian.h
        └── endian
        │   ├── byte_swap.h
        │   ├── endian.h
        │   ├── endian_swap.h
        │   └── endian_type.h
    └── library
        ├── CMakeLists.txt
        ├── m99.h
        └── m99
            ├── CMakeLists.txt
            ├── m99_decode.cpp
            ├── m99_decode.h
            ├── m99_decode_stream.cpp
            ├── m99_decode_stream.h
            ├── m99_encode.cpp
            ├── m99_encode.h
            ├── m99_encode_stream.cpp
            └── m99_encode_stream.h


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.16.3)
 2 | 
 3 | project(m99)
 4 | 
 5 | 
 6 | if(NOT CMAKE_BUILD_TYPE)
 7 |     set(CMAKE_BUILD_TYPE Release)
 8 |     message("*** Build type not set.  defaulting to Release")
 9 | endif()
10 | 
11 | if (CMAKE_BUILD_TYPE STREQUAL "Debug")
12 |     add_compile_options(
13 |         -g
14 |         -O0
15 |         -march=native
16 |     )
17 | else()
18 |     add_compile_options(
19 |         -O3
20 |         -march=native
21 |     )
22 | endif()
23 | 
24 | 
25 | option(M99_BUILD_DEMO "Build the CLI demo" ON)
26 | 
27 | 
28 | include(FetchContent)
29 | 
30 | set(IO_BUILD_DEMO OFF CACHE INTERNAL "")
31 | FetchContent_Declare(
32 |     io
33 |     GIT_REPOSITORY https://github.com/michaelmaniscalco/io.git
34 |     GIT_TAG master
35 |     SOURCE_DIR        "${CMAKE_BINARY_DIR}/io-src"
36 |     BINARY_DIR        "${CMAKE_BINARY_DIR}/io-build"
37 |     INSTALL_DIR       "${CMAKE_BINARY_DIR}"
38 |     INSTALL_COMMAND   ""
39 | )
40 | FetchContent_MakeAvailable(io)
41 | FetchContent_GetProperties(io)
42 | 
43 | 
44 | set(ENTROPY_BUILD_DEMO OFF CACHE INTERNAL "")
45 | FetchContent_Declare(
46 |     entropy
47 |     GIT_REPOSITORY https://github.com/michaelmaniscalco/entropy.git
48 |     GIT_TAG master
49 |     SOURCE_DIR        "${CMAKE_BINARY_DIR}/entropy-src"
50 |     BINARY_DIR        "${CMAKE_BINARY_DIR}/entropy-build"
51 |     INSTALL_DIR       "${CMAKE_BINARY_DIR}"
52 |     INSTALL_COMMAND   ""
53 | )
54 | FetchContent_MakeAvailable(entropy)
55 | FetchContent_GetProperties(entropy)
56 | 
57 | 
58 | 
59 | add_subdirectory(src)
60 | 
61 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
62 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
63 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
64 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Michael Maniscalco
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # m99
 2 | 
 3 | novel high performance BWT compression algorithm
 4 | 
 5 | ```
 6 | mkdir build
 7 | cd build
 8 | cmake ..
 9 | make
10 | ```
11 | 
12 | 
13 | To build demo (default=ON):
14 | 
15 | ```
16 | mkdir build
17 | cd build
18 | cmake -DM99_BUILD_DEMO=ON ..
19 | make 
20 | ```
21 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | set(_m99_include_dir ${CMAKE_CURRENT_SOURCE_DIR})
2 | 
3 | add_subdirectory(library)
4 | add_subdirectory(executable)
5 | 


--------------------------------------------------------------------------------
/src/executable/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | if (M99_BUILD_DEMO)
2 |     add_subdirectory(m99)
3 | endif()


--------------------------------------------------------------------------------
/src/executable/m99/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | include(FetchContent)
 3 | 
 4 | FetchContent_Declare(
 5 |     msufsort
 6 |     GIT_REPOSITORY https://github.com/michaelmaniscalco/msufsort.git
 7 |     GIT_TAG master
 8 |     SOURCE_DIR        "${CMAKE_BINARY_DIR}/msufsort-src"
 9 |     BINARY_DIR        "${CMAKE_BINARY_DIR}/msufsort-build"
10 |     INSTALL_DIR       "${CMAKE_BINARY_DIR}"
11 |     INSTALL_COMMAND   ""
12 | )
13 | FetchContent_MakeAvailable(msufsort)
14 | FetchContent_GetProperties(msufsort)
15 | 
16 | find_library(LIBCXX_LIB c++)
17 | find_package(Threads)
18 | find_library(LIBCXXABI_LIB c++abi)
19 | 
20 | link_libraries(
21 |     ${LIBCXX_LIB}
22 |     ${LIBCXXABI_LIB}
23 | )
24 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
25 | 
26 | add_executable(m99_demo main.cpp)
27 | 
28 | target_link_libraries(m99_demo ${CMAKE_THREAD_LIBS_INIT} m99 msufsort)


--------------------------------------------------------------------------------
/src/executable/m99/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <library/m99/m99_encode.h>
  2 | #include <library/m99/m99_decode.h>
  3 | #include <library/msufsort.h>
  4 | #include <cstdint>
  5 | #include <iostream>
  6 | #include <memory>
  7 | #include <vector>
  8 | #include <fstream>
  9 | #include <chrono>
 10 | #include <thread>
 11 | #include <cstring>
 12 | #include <string>
 13 | #include <atomic>
 14 | #include <mutex>
 15 | 
 16 | 
 17 | namespace
 18 | {
 19 | 
 20 |     static auto constexpr max_encode_block_size = (1ull << 20);
 21 | 
 22 |     struct block_header
 23 |     {
 24 |         std::uint32_t blockSize_;
 25 |         std::uint32_t sentinelIndex_;
 26 |     };
 27 | 
 28 | 
 29 |     //==================================================================================================================
 30 |     std::vector<char> load_file
 31 |     (
 32 |         char const * path
 33 |     )
 34 |     {
 35 |         // read data from file
 36 |         std::vector<char> input;
 37 |         std::ifstream inputStream(path, std::ios_base::in | std::ios_base::binary);
 38 |         if (!inputStream.is_open())
 39 |         {
 40 |             std::cout << "failed to open file \"" << path << "\"" << std::endl;
 41 |             return std::vector<char>();
 42 |         }
 43 | 
 44 |         inputStream.seekg(0, std::ios_base::end);
 45 |         std::size_t size = inputStream.tellg();
 46 |         input.resize(size);
 47 |         inputStream.seekg(0, std::ios_base::beg);
 48 |         inputStream.read(input.data(), input.size());
 49 |         inputStream.close();
 50 |         return input;
 51 |     }
 52 | 
 53 | 
 54 |     //==================================================================================================================
 55 |     void encode_block
 56 |     (
 57 |         // single threaded
 58 |         std::uint8_t const * inputBegin,
 59 |         std::uint8_t const * inputEnd,
 60 |         std::ofstream & outStream
 61 |     )
 62 |     {
 63 |         // transform input (BWT)
 64 |         auto sentinelIndex = maniscalco::forward_burrows_wheeler_transform(inputBegin, inputEnd, 1);
 65 | 
 66 |         // write header for input
 67 |         block_header blockHeader
 68 |         {
 69 |             .blockSize_ = std::distance(inputBegin, inputEnd),
 70 |             .sentinelIndex_ = sentinelIndex
 71 |         };
 72 |         outStream.write((char const *)&blockHeader, sizeof(blockHeader));
 73 | 
 74 |         std::uint32_t subBlockId{0};
 75 |         // encode next available sub block until there are none remaining
 76 |         std::uint32_t currentSubBlockId = subBlockId++;
 77 |         auto blockBegin = inputBegin + (currentSubBlockId * max_encode_block_size);
 78 |         while (blockBegin < inputEnd)
 79 |         {
 80 |             auto blockEnd = (blockBegin + max_encode_block_size);
 81 |             if (blockEnd > inputEnd)
 82 |                 blockEnd = inputEnd;
 83 |             // create encode stream and encode this subblock
 84 |             maniscalco::m99_encode_stream encodeStream;
 85 |             maniscalco::m99_encode(blockBegin, blockEnd, encodeStream);
 86 |             encodeStream.flush();
 87 |             // write this encoded sub block to the destination
 88 |             auto encodedSize = ((encodeStream.size() + 7) / 8);
 89 |             outStream.write((char const *)&encodedSize, 4);
 90 |             outStream.write((char const *)&currentSubBlockId, 4);
 91 |             // write the encoded data for the stream
 92 |             for (auto const & packet : encodeStream)
 93 |             {
 94 |                 auto bytesToWrite = ((packet.size() + 7) / 8);
 95 |                 auto address = (packet.data() + packet.capacity() - bytesToWrite);
 96 |                 outStream.write((char const *)address, bytesToWrite);
 97 |             }
 98 |             currentSubBlockId = subBlockId++;
 99 |             blockBegin = inputBegin + (currentSubBlockId * max_encode_block_size);
100 |         }
101 |     }
102 | 
103 | 
104 | 
105 |     //==================================================================================================================
106 |     void encode_block
107 |     (
108 |         std::uint8_t const * inputBegin,
109 |         std::uint8_t const * inputEnd,
110 |         std::ofstream & outStream,
111 |         std::size_t numThreads
112 |     )
113 |     {
114 |         // transform input (BWT)
115 |         auto sentinelIndex = maniscalco::forward_burrows_wheeler_transform(inputBegin, inputEnd, numThreads);
116 | 
117 |         // write header for input
118 |         block_header blockHeader
119 |         {
120 |             .blockSize_ = std::distance(inputBegin, inputEnd),
121 |             .sentinelIndex_ = sentinelIndex
122 |         };
123 |         outStream.write((char const *)&blockHeader, sizeof(blockHeader));
124 | 
125 |         // create worker threads for encoding
126 |         std::vector<std::thread> threads;
127 |         threads.resize(numThreads);
128 |         
129 |         // set threads to process sub blocks of the input
130 |         std::atomic<std::uint32_t> subBlockId{0};
131 |         std::mutex mutex;
132 |         for (auto & thread : threads)
133 |         {
134 |             thread = std::thread([&]()
135 |             {
136 |                 // encode next available sub block until there are none remaining
137 |                 std::uint32_t currentSubBlockId = subBlockId++;
138 |                 auto blockBegin = inputBegin + (currentSubBlockId * max_encode_block_size);
139 |                 while (blockBegin < inputEnd)
140 |                 {
141 |                     auto blockEnd = (blockBegin + max_encode_block_size);
142 |                     if (blockEnd > inputEnd)
143 |                         blockEnd = inputEnd;
144 |                     // create encode stream and encode this subblock
145 |                     maniscalco::m99_encode_stream encodeStream;
146 |                     maniscalco::m99_encode(blockBegin, blockEnd, encodeStream);
147 |                     encodeStream.flush();
148 |                     // write this encoded sub block to the destination
149 |                     std::lock_guard lockGuard(mutex);
150 |                     auto encodedSize = ((encodeStream.size() + 7) / 8);
151 |                     outStream.write((char const *)&encodedSize, 4);
152 |                     outStream.write((char const *)&currentSubBlockId, 4);
153 |                     // write the encoded data for the stream
154 |                     for (auto const & packet : encodeStream)
155 |                     {
156 |                         auto bytesToWrite = ((packet.size() + 7) / 8);
157 |                         auto address = (packet.data() + packet.capacity() - bytesToWrite);
158 |                         outStream.write((char const *)address, bytesToWrite);
159 |                     }
160 |                     currentSubBlockId = subBlockId++;
161 |                     blockBegin = inputBegin + (currentSubBlockId * max_encode_block_size);
162 |                 }
163 |             });
164 |         }
165 |         // wait for threads to complete encoding
166 |         for (auto & thread : threads)
167 |             thread.join();
168 |     }
169 | 
170 | 
171 |     //==================================================================================================================
172 |     void decode_block
173 |     (
174 |         std::ifstream & inStream,
175 |         std::ofstream & outStream,
176 |         std::uint32_t numThreads
177 |     )
178 |     {
179 |         // read header for block
180 |         block_header blockHeader;
181 |         inStream.read((char *)&blockHeader, sizeof(blockHeader));
182 |         std::uint32_t bytesPerSubBlock = ((blockHeader.blockSize_ + numThreads - 1) / numThreads);
183 | 
184 |         // allocate space for decoded block data
185 |         std::vector<std::uint8_t> output;
186 |         output.resize(blockHeader.blockSize_);
187 |         auto outputBegin = output.data();
188 |         auto outputEnd = (outputBegin + output.size());
189 | 
190 |         // create decode threads
191 |         std::vector<std::thread> threads;
192 |         threads.resize(numThreads - 1);
193 | 
194 |         std::atomic<std::uint32_t> numSubBlocksToDecode((blockHeader.blockSize_ + max_encode_block_size - 1) / max_encode_block_size);
195 |         auto n = numSubBlocksToDecode.load();
196 | 
197 |         std::mutex mutex;
198 |         for (auto & thread : threads)
199 |         {
200 |             thread = std::thread([&]()
201 |                 {
202 |                     while (true)
203 |                     {
204 |                         maniscalco::buffer encodedData;
205 |                         std::uint32_t encodedSize = 0;
206 |                         std::uint32_t subBlockId = 0;
207 |                         {
208 |                             std::lock_guard lockGuard(mutex);
209 |                             if (numSubBlocksToDecode < 1)
210 |                                 return; // no more work to do
211 | 
212 |                             --numSubBlocksToDecode;
213 |                             // read next compress subblock from source
214 |                             inStream.read((char *)&encodedSize, 4);
215 |                             inStream.read((char *)&subBlockId, 4);
216 |                             // read encoded sub block data
217 |                             encodedData = std::move(maniscalco::buffer(encodedSize));
218 |                             inStream.read((char *)encodedData.data(), encodedSize);
219 |                         }
220 |                         auto destinationBegin = (outputBegin + (subBlockId * max_encode_block_size));
221 |                         auto destinationEnd = (destinationBegin + max_encode_block_size);
222 |                         if (destinationEnd > outputEnd)
223 |                             destinationEnd = outputEnd;
224 |                         maniscalco::m99_decode_stream decodeStream(std::move(encodedData), encodedSize);
225 |                         maniscalco::m99_decode(decodeStream, destinationBegin, destinationEnd);
226 |                     }
227 |                 });
228 |         }
229 | 
230 |         // wait for all subblocks to be decoded
231 |         for (auto & thread : threads)
232 |             thread.join();
233 | 
234 |         // reverse the BWT
235 |         maniscalco::reverse_burrows_wheeler_transform(output.begin(), output.end(), blockHeader.sentinelIndex_, numThreads);
236 |         outStream.write((char const *)&*outputBegin, output.size());
237 |     }
238 | 
239 | 
240 |     //==================================================================================================================
241 |     void decode_block
242 |     (
243 |         // single threaded
244 |         std::ifstream & inStream,
245 |         std::ofstream & outStream
246 |     )
247 |     {
248 |         // read header for block
249 |         block_header blockHeader;
250 |         inStream.read((char *)&blockHeader, sizeof(blockHeader));
251 |         std::uint32_t bytesPerSubBlock = blockHeader.blockSize_;
252 | 
253 |         // allocate space for decoded block data
254 |         std::vector<std::uint8_t> output;
255 |         output.resize(blockHeader.blockSize_);
256 |         auto outputBegin = output.data();
257 |         auto outputEnd = (outputBegin + output.size());
258 | 
259 |         std::uint32_t numSubBlocksToDecode((blockHeader.blockSize_ + max_encode_block_size - 1) / max_encode_block_size);
260 |         while (numSubBlocksToDecode-- > 0)
261 |         {
262 |             maniscalco::buffer encodedData;
263 |             std::uint32_t encodedSize = 0;
264 |             std::uint32_t subBlockId = 0;
265 |             // read next compress subblock from source
266 |             inStream.read((char *)&encodedSize, 4);
267 |             inStream.read((char *)&subBlockId, 4);
268 |             // read encoded sub block data
269 |             encodedData = std::move(maniscalco::buffer(encodedSize));
270 |             inStream.read((char *)encodedData.data(), encodedSize);
271 |             auto destinationBegin = (outputBegin + (subBlockId * max_encode_block_size));
272 |             auto destinationEnd = (destinationBegin + max_encode_block_size);
273 |             if (destinationEnd > outputEnd)
274 |                 destinationEnd = outputEnd;
275 |             maniscalco::m99_decode_stream decodeStream(std::move(encodedData), encodedSize);
276 |             maniscalco::m99_decode(decodeStream, destinationBegin, destinationEnd);
277 |         }
278 |         // reverse the BWT
279 |         maniscalco::reverse_burrows_wheeler_transform(output.begin(), output.end(), blockHeader.sentinelIndex_, 1);
280 |         outStream.write((char const *)&*outputBegin, output.size());
281 |     }
282 | 
283 | 
284 |     //==================================================================================================================
285 |     void print_about
286 |     (
287 |     )
288 |     {
289 |         std::cout << "m99 - high performance BWT compressor.  Author: M.A. Maniscalco (1999 - 2020)" << std::endl;
290 |     }
291 | 
292 | 
293 |     //==================================================================================================================
294 |     std::int32_t print_usage
295 |     (
296 |     )
297 |     {
298 |         std::cout << "Usage: m99 [e|d] inputFile outputFile [switches]" << std::endl;
299 |         std::cout << "\t -t = threadCount" << std::endl;
300 |         std::cout << "\t -b = blockSize (max = 1GB)" << std::endl; 
301 | 
302 |         std::cout << "example: m99 e inputFile outputFile -t8 -b100000" << std::endl;
303 |         std::cout << "example: m99 d inputFile outputFile -t8" << std::endl; 
304 |         return 0;
305 |     }
306 | 
307 | 
308 |     //==========================================================================
309 |     void decode
310 |     (
311 |         char const * inputPath,
312 |         char const * outputPath,
313 |         int numThreads
314 |     )
315 |     {
316 |         // read data from file
317 |         std::ifstream inputStream(inputPath, std::ios_base::in | std::ios_base::binary);
318 |         if (!inputStream.is_open())
319 |         {
320 |             std::cout << "failed to open file \"" << inputPath << "\"" << std::endl;
321 |             return;
322 |         }
323 | 
324 |         // create the output stream
325 |         std::ofstream outStream(outputPath, std::ios_base::out | std::ios_base::binary);
326 |         if (!outStream.is_open())
327 |         {
328 |             std::cout << "failed to create output file \"" << outputPath << "\"" << std::endl;
329 |             return;
330 |         }
331 | 
332 |         auto startTime = std::chrono::system_clock::now();
333 |         inputStream.seekg(0, std::ios_base::end);
334 |         auto end = inputStream.tellg();
335 |         inputStream.seekg(0, std::ios_base::beg);
336 | 
337 |         if (numThreads == 1)
338 |         {
339 |             while (inputStream.tellg() != end)
340 |                 decode_block(inputStream, outStream);
341 |         }
342 |         else
343 |         {
344 |             while (inputStream.tellg() != end)
345 |                 decode_block(inputStream, outStream, numThreads);
346 |         }
347 | 
348 |         auto finishTime = std::chrono::system_clock::now();
349 |         auto elapsedTime = std::chrono::duration_cast<std::chrono::milliseconds>(finishTime - startTime).count();
350 |         std::cout << "Elapsed time: " << ((long double)elapsedTime / 1000) << " seconds" << std::endl;
351 | 
352 |         inputStream.close();
353 |         outStream.close();
354 |     }
355 | 
356 | 
357 |     //=================================================================================
358 |     void encode
359 |     (
360 |         char const * inputPath,
361 |         char const * outputPath,
362 |         int numThreads,
363 |         int blockSize
364 |     )
365 |     {
366 |         // create the output stream
367 |         std::ofstream outStream(outputPath, std::ios_base::out | std::ios_base::binary);
368 |         if (!outStream.is_open())
369 |         {
370 |             std::cout << "failed to create output file \"" << outputPath << "\"" << std::endl;
371 |             return;
372 |         }
373 | 
374 |         auto startTime = std::chrono::system_clock::now();
375 | 
376 |         // read data from file
377 |         std::vector<std::uint8_t> input;
378 |         input.reserve(blockSize);
379 |         std::ifstream inputStream(inputPath, std::ios_base::in | std::ios_base::binary);
380 |         if (!inputStream.is_open())
381 |         {
382 |             std::cout << "failed to open file \"" << inputPath << "\"" << std::endl;
383 |             return;
384 |         }
385 | 
386 |         std::size_t bytesEncoded = 0;
387 |         inputStream.seekg(0, std::ios_base::beg);
388 |         while (true)
389 |         {
390 |             inputStream.read((char *)input.data(), input.capacity());
391 |             auto size = inputStream.gcount();
392 |             if (size == 0)
393 |                 break;
394 |             bytesEncoded += size;
395 |             if (numThreads == 1)
396 |                 encode_block(input.data(), input.data() + size, outStream);
397 |             else
398 |                 encode_block(input.data(), input.data() + size, outStream, numThreads);
399 |         }
400 |         auto finishTime = std::chrono::system_clock::now();
401 |         auto elapsedOverallEncode = std::chrono::duration_cast<std::chrono::milliseconds>(finishTime - startTime).count();
402 | 
403 |         std::size_t inputSize = bytesEncoded;
404 |         std::size_t outputSize = outStream.tellp();
405 | 
406 |         std::cout << "compressed: " << inputSize << " -> " << outputSize << " bytes.  ratio = " << (((long double)outputSize / inputSize) * 100) << "%" << std::endl;
407 |         std::cout << "Elapsed time: " << ((long double)elapsedOverallEncode / 1000) << " seconds : " <<  (((long double)inputSize / (1 << 20)) / ((double)elapsedOverallEncode / 1000)) << " MB/sec" << std::endl;
408 | 
409 |         outStream.close();
410 |         inputStream.close();
411 |     }
412 | 
413 | }
414 | 
415 | 
416 | //======================================================================================================================
417 | std::int32_t main
418 | (
419 |     std::int32_t argCount,
420 |     char const * argValue[]
421 | )
422 | {
423 |     print_about();
424 | 
425 |     if ((argCount < 4) || (strlen(argValue[1]) != 1))
426 |         return print_usage();
427 | 
428 |     std::size_t numThreads = 0;
429 |     std::size_t maxBlockSize = (1 << 30);
430 |     for (auto argIndex = 4; argIndex < argCount; ++argIndex)
431 |     {
432 |         if (argValue[argIndex][0] != '-')
433 |             return print_usage();
434 | 
435 |         switch (argValue[argIndex][1])
436 |         {
437 |             case 'b':
438 |             {
439 |                 // block size
440 |                 maxBlockSize = 0;
441 |                 auto cur = argValue[argIndex] + 2;
442 |                 while (*cur != 0)
443 |                 {
444 |                     if ((*cur < '0') || (*cur > '9'))
445 |                     {
446 |                         std::cout << "invalid block size" << std::endl;
447 |                         print_usage();
448 |                         return -1;
449 |                     }
450 |                     maxBlockSize *= 10;
451 |                     maxBlockSize += (*cur - '0');
452 |                     ++cur;
453 |                 }
454 |                 if (maxBlockSize > (1 << 30))
455 |                     maxBlockSize = (1 << 30);
456 |                 break;
457 |             }
458 |             case 't':
459 |             {
460 |                 // thread count
461 |                 numThreads = 0;
462 |                 auto cur = argValue[argIndex] + 2;
463 |                 while (*cur != 0)
464 |                 {
465 |                     if ((*cur < '0') || (*cur > '9'))
466 |                     {
467 |                         std::cout << "invalid thread count" << std::endl;
468 |                         print_usage();
469 |                         return -1;
470 |                     }
471 |                     numThreads *= 10;
472 |                     numThreads += (*cur - '0');
473 |                     ++cur;
474 |                 }
475 |                 break;
476 |             }
477 |             default:
478 |             {
479 |                 std::cout << "unknown switch: " << argValue[argIndex] << std::endl;
480 |                 return print_usage();
481 |             }
482 |         }
483 |     }
484 |     if ((numThreads == 0) || (numThreads > std::thread::hardware_concurrency()))
485 |         numThreads = std::thread::hardware_concurrency();
486 | 
487 |     switch (argValue[1][0])
488 |     {
489 |         case 'e':
490 |         {
491 |             encode(argValue[2], argValue[3], numThreads, maxBlockSize);
492 |             break;
493 |         }
494 | 
495 |         case 'd':
496 |         {
497 |             decode(argValue[2], argValue[3], numThreads);
498 |             break;
499 |         }
500 | 
501 |         default:
502 |         {
503 |             print_usage();
504 |             break;
505 |         }
506 |     }
507 | 
508 |     return 0;
509 | }
510 | 
511 | 


--------------------------------------------------------------------------------
/src/include/endian.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | 
4 | #include "./endian/endian.h"
5 | 
6 | 


--------------------------------------------------------------------------------
/src/include/endian/byte_swap.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | #include <type_traits>
 5 | 
 6 | #ifdef __APPLE__
 7 |     #include <libkern/OSByteOrder.h>
 8 | #else
 9 |     #include <byteswap.h>
10 | #endif
11 | 
12 | 
13 | namespace maniscalco
14 | {
15 | 
16 |     //==============================================================================
17 |     template <typename T>
18 |     auto byte_swap
19 |     (
20 |         T value
21 |     ) -> typename std::enable_if<sizeof(T) == sizeof(std::uint8_t), T>::type
22 |     {
23 |         return value;
24 |     }
25 | 
26 | 
27 |     //==============================================================================
28 |     template <typename T>
29 |     auto byte_swap
30 |     (
31 |         T value
32 |     ) -> typename std::enable_if<sizeof(T) == sizeof(std::uint16_t), T>::type
33 |     {
34 |         auto v = static_cast<std::uint16_t>(value);
35 |         return static_cast<T>((v >> 8) | (v << 8));
36 |     }
37 | 
38 | 
39 |     //==============================================================================
40 |     template <typename T>
41 |     auto byte_swap
42 |     (
43 |         T value
44 |     ) -> typename std::enable_if<sizeof(T) == sizeof(std::uint32_t), T>::type
45 |     {
46 |         #ifdef __APPLE__
47 |             return static_cast<T>(OSSwapInt32(static_cast<uint32_t>(value)));
48 |         #else
49 |             return static_cast<T>(__builtin_bswap32(static_cast<uint32_t>(value)));
50 |         #endif
51 |     }
52 | 
53 | 
54 |     //==============================================================================
55 |     template <typename T>
56 |     auto byte_swap
57 |     (
58 |         T value
59 |     ) -> typename std::enable_if<sizeof(T) == sizeof(std::uint64_t), T>::type
60 |     {
61 |         #ifdef __APPLE__
62 |             return static_cast<T>(OSSwapInt64(static_cast<uint64_t>(value)));
63 |         #else
64 |             return static_cast<T>(__builtin_bswap64(static_cast<uint64_t>(value)));
65 |         #endif
66 |     }
67 | 
68 | } // namespace maniscalco
69 | 


--------------------------------------------------------------------------------
/src/include/endian/endian.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | 
  4 | 
  5 | namespace maniscalco
  6 | {
  7 |     
  8 |     template <typename, typename> class endian;
  9 | 
 10 | } // namespace maniscalco
 11 | 
 12 | 
 13 | #include "./byte_swap.h"
 14 | #include "./endian_type.h"
 15 | #include "./endian_swap.h"
 16 | 
 17 | #include <type_traits>
 18 | 
 19 | 
 20 | namespace maniscalco
 21 | {
 22 | 
 23 |     //==============================================================================
 24 |     template <typename data_type, typename endian_type>
 25 |     class endian
 26 |     {
 27 |     public:
 28 | 
 29 |         using underlying_type = data_type;
 30 |         using type = endian_type;
 31 | 
 32 |         template <typename, typename>
 33 |         friend class endian;
 34 | 
 35 |         endian();
 36 | 
 37 |         endian
 38 |         (
 39 |             endian const &
 40 |         );
 41 | 
 42 |         endian
 43 |         (
 44 |             endian &&
 45 |         );
 46 | 
 47 |         endian
 48 |         (
 49 |             underlying_type
 50 |         );
 51 | 
 52 |         endian & operator =
 53 |         (
 54 |             endian const &
 55 |         );
 56 | 
 57 |         endian & operator =
 58 |         (
 59 |             endian &&
 60 |         );
 61 | 
 62 |         endian & operator =
 63 |         (
 64 |             underlying_type
 65 |         );
 66 | 
 67 |         operator underlying_type() const;
 68 | 
 69 |         underlying_type get() const;
 70 | 
 71 |     protected:
 72 | 
 73 |     private:
 74 | 
 75 |         underlying_type  value_;
 76 | 
 77 |     };
 78 | 
 79 |     template <typename data_type> using big_endian = endian<data_type, big_endian_type>;
 80 |     template <typename data_type> using little_endian = endian<data_type, little_endian_type>;
 81 |     template <typename data_type> using network_order = endian<data_type, network_order_type>;
 82 |     template <typename data_type> using host_order = endian<data_type, host_order_type>;
 83 | 
 84 |     // global operator overloads involving endian types
 85 |     template <typename input_type, typename data_type, typename endian_type> inline static bool operator < (input_type a, endian<data_type, endian_type> b){return (a < (data_type)b);}
 86 |     template <typename data_type, typename endian_type> inline static bool operator < (endian<data_type, endian_type> a, data_type b){return ((data_type)a < b);}
 87 |     template <typename data_type, typename endian_type> inline static bool operator <= (data_type a, endian<data_type, endian_type> b){return (a <= (data_type)b);}
 88 |     template <typename data_type, typename endian_type> inline static bool operator <= (endian<data_type, endian_type> a, data_type b){return ((data_type)a <= b);}
 89 |     template <typename data_type, typename endian_type> inline static bool operator == (data_type a, endian<data_type, endian_type> b){return (a == (data_type)b);}
 90 |     template <typename data_type, typename endian_type> inline static bool operator == (endian<data_type, endian_type> a, data_type b){return ((data_type)a == b);}
 91 |     template <typename data_type, typename endian_type> inline static bool operator >= (data_type a, endian<data_type, endian_type> b){return (a >= (data_type)b);}
 92 |     template <typename data_type, typename endian_type> inline static bool operator >= (endian<data_type, endian_type> a, data_type b){return ((data_type)a >= b);}
 93 |     template <typename data_type, typename endian_type> inline static bool operator > (data_type a, endian<data_type, endian_type> b){return (a > (data_type)b);}
 94 |     template <typename data_type, typename endian_type> inline static bool operator > (endian<data_type, endian_type> a, data_type b){return ((data_type)a > b);}
 95 |     template <typename data_type, typename endian_type> inline static bool operator != (data_type a, endian<data_type, endian_type> b){return (a != (data_type)b);}
 96 |     template <typename data_type, typename endian_type> inline static bool operator != (endian<data_type, endian_type> a, data_type b){return ((data_type)a != b);}
 97 | 
 98 |     // static make functions
 99 |     template <typename data_type, typename endian_type> big_endian<data_type> make_big_endian(endian<data_type, endian_type>);
100 |     template <typename data_type> big_endian<data_type> make_big_endian(data_type);
101 |     template <typename data_type, typename endian_type> little_endian<data_type> make_little_endian(endian<data_type, endian_type>);
102 |     template <typename data_type> little_endian<data_type> make_little_endian(data_type);
103 |     template <typename data_type, typename endian_type> host_order<data_type> make_host_order(endian<data_type, endian_type>);
104 |     template <typename data_type> host_order<data_type> make_host_order(data_type);
105 |     template <typename data_type, typename endian_type> network_order<data_type> make_network_order(endian<data_type, endian_type>);
106 |     template <typename data_type> network_order<data_type> make_network_order(data_type);
107 | 
108 | }
109 | 
110 | 
111 | //==============================================================================
112 | template <typename data_type, typename endian_type>
113 | maniscalco::endian<data_type, endian_type>::endian
114 | (
115 | ):
116 |     value_()
117 | {
118 | }
119 | 
120 | 
121 | //==============================================================================
122 | template <typename data_type, typename endian_type>
123 | maniscalco::endian<data_type, endian_type>::endian
124 | (
125 |     endian && input
126 | ):
127 |     value_(input.value_)
128 | {
129 | }
130 | 
131 | 
132 | //==============================================================================
133 | template <typename data_type, typename endian_type>
134 | maniscalco::endian<data_type, endian_type>::endian
135 | (
136 |     endian const & input
137 | ):
138 |     value_(input.value_)
139 | {
140 | }
141 | 
142 | 
143 | //==============================================================================
144 | template <typename data_type, typename endian_type>
145 | maniscalco::endian<data_type, endian_type>::endian
146 | (
147 |     data_type input
148 | ):
149 |     value_(endian_swap<host_order_type, endian_type>(input))
150 | {
151 | }
152 | 
153 | 
154 | //==============================================================================
155 | template <typename data_type, typename endian_type>
156 | auto maniscalco::endian<data_type, endian_type>::operator =
157 | (
158 |     endian const & input
159 | ) -> endian &
160 | {
161 |     value_ = input.value_;
162 |     return *this;
163 | }
164 | 
165 | 
166 | //==============================================================================
167 | template <typename data_type, typename endian_type>
168 | auto maniscalco::endian<data_type, endian_type>::operator =
169 | (
170 |     endian && input
171 | ) -> endian &
172 | {
173 |     value_ = input.value_;
174 |     return *this;
175 | }
176 | 
177 | 
178 | //==============================================================================
179 | template <typename data_type, typename endian_type>
180 | auto maniscalco::endian<data_type, endian_type>::operator =
181 | (
182 |     data_type input
183 | ) -> endian &
184 | {
185 |     value_ = endian_swap<host_order_type, endian_type>(input);
186 |     return *this;
187 | }
188 | 
189 | 
190 | //==============================================================================
191 | template <typename data_type, typename endian_type>
192 | maniscalco::endian<data_type, endian_type>::operator underlying_type
193 | (
194 | ) const
195 | {
196 |     return endian_swap<endian_type, host_order_type>(value_);
197 | }
198 | 
199 | 
200 | //==============================================================================
201 | template <typename data_type, typename endian_type>
202 | auto maniscalco::endian<data_type, endian_type>::get
203 | (
204 | ) const -> underlying_type
205 | {
206 |     return endian_swap<endian_type, host_order_type>(value_);
207 | }
208 | 
209 | 
210 | //==============================================================================
211 | template <typename T, typename E>
212 | auto maniscalco::make_big_endian
213 | (
214 |     maniscalco::endian<T, E> value
215 | ) -> big_endian<T>
216 | {
217 |     return big_endian<T>((T)value);
218 | }
219 | 
220 | 
221 | //==============================================================================
222 | template <typename T>
223 | auto maniscalco::make_big_endian
224 | (
225 |     T value
226 | ) -> big_endian<T>
227 | {
228 |     return big_endian<T>((T)value);
229 | }
230 | 
231 | 
232 | //==============================================================================
233 | template <typename T, typename E>
234 | auto maniscalco::make_little_endian
235 | (
236 |     endian<T, E> value
237 | ) -> little_endian<T>
238 | {
239 |     return little_endian<T>((T)value);
240 | }
241 | 
242 | 
243 | //==============================================================================
244 | template <typename T>
245 | auto maniscalco::make_little_endian
246 | (
247 |     T value
248 | ) -> little_endian<T>
249 | {
250 |     return little_endian<T>((T)value);
251 | }
252 | 
253 | 
254 | //==============================================================================
255 | template <typename T, typename E>
256 | auto maniscalco::make_host_order
257 | (
258 |     endian<T, E> value
259 | ) -> host_order<T>
260 | {
261 |     return host_order<T>((T)value);
262 | }
263 | 
264 | 
265 | //==============================================================================
266 | template <typename T>
267 | auto maniscalco::make_host_order
268 | (
269 |     T value
270 | ) -> host_order<T>
271 | {
272 |     return host_order<T>((T)value);
273 | }
274 | 
275 | 
276 | //==============================================================================
277 | template <typename T, typename E>
278 | auto maniscalco::make_network_order
279 | (
280 |     endian<T, E> value
281 | ) -> network_order<T>
282 | {
283 |     return network_order<T>((T)value);
284 | }
285 | 
286 | 
287 | //==============================================================================
288 | template <typename T>
289 | auto maniscalco::make_network_order
290 | (
291 |     T value
292 | ) -> network_order<T>
293 | {
294 |     return network_order<T>((T)value);
295 | }
296 | 


--------------------------------------------------------------------------------
/src/include/endian/endian_swap.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "./endian_type.h"
  4 | #include "./byte_swap.h"
  5 | #include <type_traits>
  6 | 
  7 | 
  8 | namespace maniscalco
  9 | {
 10 | 
 11 |     namespace impl
 12 |     {
 13 | 
 14 |         template
 15 |         <
 16 |             typename,
 17 |             typename,
 18 |             typename = void
 19 |         >
 20 |         struct endian_swap;
 21 | 
 22 | 
 23 |         //======================================================================
 24 |         // specialization for from == to (no byte swap)
 25 |         template
 26 |         <
 27 |             typename from_endian,
 28 |             typename to_endian
 29 |         >
 30 |         struct endian_swap
 31 |         <
 32 |             from_endian,
 33 |             to_endian,
 34 |             typename std::enable_if
 35 |             <
 36 |                 std::is_same
 37 |                 <
 38 |                     from_endian,
 39 |                     to_endian
 40 |                 >::value
 41 |             >::type
 42 |         >
 43 |         {
 44 |             template <typename data_type>
 45 |             inline data_type operator()
 46 |             (
 47 |                 data_type input
 48 |             ) const
 49 |             {
 50 |                 return input;
 51 |             }
 52 |         };
 53 | 
 54 | 
 55 |         //======================================================================
 56 |         // specialization for from != to (do byte swap)
 57 |         template
 58 |         <
 59 |             typename from_endian,
 60 |             typename to_endian
 61 |         >
 62 |         struct endian_swap
 63 |         <
 64 |             from_endian,
 65 |             to_endian,
 66 |             typename std::enable_if
 67 |             <
 68 |                 !std::is_same
 69 |                 <
 70 |                     from_endian,
 71 |                     to_endian
 72 |                 >::value
 73 |             >::type
 74 |         >
 75 |         {
 76 |             template <typename data_type>
 77 |             inline data_type operator()
 78 |             (
 79 |                 data_type input
 80 |             ) const
 81 |             {
 82 |                 return byte_swap(input);
 83 |             }
 84 |         };
 85 | 
 86 |     }
 87 | 
 88 | 
 89 |     //==========================================================================
 90 |     // static
 91 |     // do a byte swap from one endian to another as speicified
 92 |     template
 93 |     <
 94 |         typename from_endian,
 95 |         typename to_endian,
 96 |         typename data_type
 97 |     >
 98 |     static inline data_type endian_swap
 99 |     (
100 |         data_type input
101 |     )
102 |     {
103 |         return maniscalco::impl::endian_swap<from_endian, to_endian>()(input);
104 |     }
105 | 
106 | }
107 | 


--------------------------------------------------------------------------------
/src/include/endian/endian_type.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | 
 4 | namespace maniscalco
 5 | {
 6 | 
 7 |     struct big_endian_type;
 8 |     struct little_endian_type;
 9 | 
10 |     using network_order_type = big_endian_type;
11 |     using host_order_type = little_endian_type;
12 |    //    using host_order_type = big_endian_type;
13 | }
14 | 


--------------------------------------------------------------------------------
/src/library/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(m99)
2 | 


--------------------------------------------------------------------------------
/src/library/m99.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "./m99/m99.h"
4 | 


--------------------------------------------------------------------------------
/src/library/m99/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_library(m99
 2 |     m99_decode.cpp
 3 |     m99_encode.cpp
 4 |     m99_encode_stream.cpp
 5 |     m99_decode_stream.cpp
 6 | )
 7 | 
 8 | target_link_libraries(m99 io entropy)
 9 | 
10 | target_include_directories(m99
11 |     PUBLIC
12 |         $<BUILD_INTERFACE:${_m99_include_dir}>
13 |         $<INSTALL_INTERFACE:include/m99>
14 | )
15 | 
16 | target_compile_features(m99 PUBLIC cxx_std_17)
17 | 


--------------------------------------------------------------------------------
/src/library/m99/m99_decode.cpp:
--------------------------------------------------------------------------------
  1 | #include "./m99_decode.h"
  2 | 
  3 | #include <fstream>
  4 | 
  5 | 
  6 | namespace
  7 | {
  8 |     using namespace maniscalco;
  9 | 
 10 |     struct symbol_info
 11 |     {
 12 |         symbol_info(){}
 13 |         symbol_info(std::uint8_t symbol, std::uint32_t count):symbol_(symbol), count_(count){}
 14 |         std::uint8_t    symbol_;
 15 |         std::uint32_t   count_;
 16 |     };
 17 | 
 18 | 
 19 |     //======================================================================================================================
 20 |     std::uint32_t unpack_value
 21 |     (
 22 |         m99_decode_stream & decodeStream,
 23 |         std::uint32_t total,
 24 |         std::uint32_t maxLeft,
 25 |         std::uint32_t maxRight
 26 |     )
 27 |     {
 28 |         if (total > maxLeft)
 29 |         {
 30 |             auto inferredRight = (total - maxLeft);
 31 |             maxRight -= inferredRight;
 32 |             total -= inferredRight;
 33 |         }
 34 |         auto left = 0;
 35 |         if (total > maxRight)
 36 |         {
 37 |             left = (total - maxRight);
 38 |             total -= left;
 39 |         }
 40 |         if (total)
 41 |         {
 42 |             std::uint32_t codeLength = 1;
 43 |             while (total >> ++codeLength)
 44 |                 ;
 45 |             auto code = decodeStream.pop(--codeLength);
 46 |             if (((code | (1ull << codeLength)) <= total))
 47 |                 code |= (decodeStream.pop_bit() << codeLength);
 48 |             left += code;
 49 |         }    
 50 |         return left;
 51 |     }
 52 | 
 53 | 
 54 |     //======================================================================================================================
 55 |     void split
 56 |     (
 57 |         m99_decode_stream & decodeStream,
 58 |         std::uint8_t * decodedData,
 59 |         std::uint32_t totalSize,
 60 |         std::uint32_t leftSize,
 61 |         symbol_info const * parentSymbolInfo
 62 |     )
 63 |     {
 64 |         if (parentSymbolInfo[0].count_ >= totalSize)
 65 |         {
 66 |             while (totalSize--)
 67 |                 *decodedData++ = parentSymbolInfo[0].symbol_;
 68 |             return;
 69 |         }
 70 | 
 71 |         if (totalSize <= 2)
 72 |         {
 73 |             if (totalSize == 2)
 74 |             {
 75 |                 auto c = decodeStream.pop_bit();
 76 |                 decodedData[c == 1] = parentSymbolInfo[1].symbol_;
 77 |                 decodedData[c == 0] = parentSymbolInfo[0].symbol_; 
 78 |             }
 79 |             else
 80 |             {
 81 |                 decodedData[0] = parentSymbolInfo[0].symbol_;
 82 |             }
 83 |             return;
 84 |         }
 85 | 
 86 |         std::uint32_t rightSize = (totalSize - leftSize);
 87 |         symbol_info leftSymbolInfo[256];
 88 |         symbol_info rightSymbolInfo[256];
 89 |         symbol_info * result[2] = {leftSymbolInfo, rightSymbolInfo};
 90 |         symbol_info const * currentSymbolInfo = parentSymbolInfo;
 91 |         static auto constexpr leftSide = 0;
 92 |         static auto constexpr rightSide = 1;
 93 | 
 94 |         auto leftSizeRemaining = leftSize;
 95 |         auto rightSizeRemaining = rightSize;
 96 |         while (leftSizeRemaining && rightSizeRemaining)
 97 |         {
 98 |             symbol_info symbolInfo = *currentSymbolInfo++;
 99 |             auto totalCount = symbolInfo.count_;
100 |             auto leftCount = unpack_value(decodeStream, totalCount, leftSizeRemaining, rightSizeRemaining);
101 |             auto rightCount = (totalCount - leftCount);
102 |             leftSizeRemaining -= leftCount;
103 |             rightSizeRemaining -= rightCount;
104 |             *result[leftSide] = {symbolInfo.symbol_, leftCount};
105 |             *result[rightSide] = {symbolInfo.symbol_, rightCount};
106 |             result[leftSide] += (leftCount != 0);
107 |             result[rightSide] += (rightCount != 0);
108 |         }
109 |         auto n = leftSizeRemaining + rightSizeRemaining;
110 |         symbol_info * c = result[(leftSizeRemaining == 0)];
111 |         while (n > 0)
112 |         {
113 |             n -= currentSymbolInfo->count_;
114 |             *c++ = *currentSymbolInfo++;
115 |         }
116 |         split(decodeStream, decodedData, leftSize, leftSize >> 1, leftSymbolInfo);
117 |         split(decodeStream, decodedData + leftSize, rightSize, rightSize >> 1, rightSymbolInfo);
118 |     }
119 | 
120 | 
121 | } // namespace
122 | 
123 | 
124 | //======================================================================================================================
125 | void maniscalco::m99_decode
126 | (
127 |     m99_decode_stream & decodeStream,
128 |     std::uint8_t * outputBegin,
129 |     std::uint8_t * outputEnd
130 | )
131 | {
132 |     while (!decodeStream.pop(1))
133 |         ; // pop until a 1 bit is decoded. this is start of stream marker.
134 | 
135 |     // decode the header stream
136 |     symbol_info symbolInfo[256];
137 |     auto bytesToDecode = std::distance(outputBegin, outputEnd);
138 |     auto n = bytesToDecode;
139 |     for (auto i = 0; i < 256; ++i)
140 |     {
141 |         if (n == 0)
142 |             break;
143 |         symbolInfo[i].count_ = unpack_value(decodeStream, n, n, n);
144 |         symbolInfo[i].symbol_ = decodeStream.pop(8);
145 |         n -= symbolInfo[i].count_;
146 |     }
147 |     
148 |     std::uint32_t leftSize = 1;
149 |     while (leftSize < bytesToDecode)
150 |         leftSize <<= 1;
151 |     split(decodeStream, outputBegin, bytesToDecode, leftSize >> 1, symbolInfo);
152 | }
153 | 
154 | 


--------------------------------------------------------------------------------
/src/library/m99/m99_decode.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "./m99_decode_stream.h"
 4 | 
 5 | #include <cstdint>
 6 | 
 7 | 
 8 | namespace maniscalco
 9 | {
10 | 
11 |     void m99_decode
12 |     (
13 |         m99_decode_stream &,
14 |         std::uint8_t *,
15 |         std::uint8_t *
16 |     );
17 | 
18 | } // namespace maniscalco
19 | 
20 | 


--------------------------------------------------------------------------------
/src/library/m99/m99_decode_stream.cpp:
--------------------------------------------------------------------------------
1 | #include "./m99_decode_stream.h"


--------------------------------------------------------------------------------
/src/library/m99/m99_decode_stream.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <library/io.h>
 4 | 
 5 | #include <vector>
 6 | #include <cstdint>
 7 | 
 8 | 
 9 | namespace maniscalco
10 | {
11 | 
12 |     class m99_decode_stream
13 |     {
14 |     public:
15 | 
16 |         static auto constexpr stream_direction = io::stream_direction::forward;
17 |         using stream_type = io::push_stream<stream_direction>;
18 |         using packet_type = stream_type::packet_type;
19 | 
20 |         m99_decode_stream
21 |         (
22 |             buffer b,
23 |             buffer::size_type size
24 |         ):
25 |             stream_
26 |             (
27 |                 {.inputHandler_ = [this](){return std::move(packet_);}}
28 |             ),
29 |             packet_(std::move(b), 0, size * 8)
30 |         {
31 |         }
32 | 
33 |         auto pop
34 |         (
35 |             std::size_t codeLength
36 |         )
37 |         {
38 |             return stream_.pop(codeLength);
39 |         }
40 | 
41 |         auto pop_bit()
42 |         {
43 |             return stream_.pop_bit();
44 |         }
45 |         
46 |     private:
47 | 
48 |         io::forward_pop_stream stream_;
49 | 
50 |         packet_type packet_;
51 | 
52 |     };
53 | } // namespace maniscalco


--------------------------------------------------------------------------------
/src/library/m99/m99_encode.cpp:
--------------------------------------------------------------------------------
  1 | #include "./m99_encode.h"
  2 | 
  3 | 
  4 | namespace
  5 | {
  6 | 
  7 |     using namespace maniscalco;
  8 | 
  9 |     struct symbol_info
 10 |     {
 11 |         symbol_info(){}
 12 |         symbol_info(std::uint8_t symbol, std::uint32_t count):symbol_(symbol), count_(count){}
 13 |         std::uint8_t    symbol_;
 14 |         std::uint32_t   count_;
 15 |     };
 16 | 
 17 |     struct tiny_encode_table_entry_type
 18 |     {
 19 |         std::uint32_t value_;
 20 |         std::uint32_t length_;
 21 |     };
 22 | 
 23 |     using tiny_encode_table_type = tiny_encode_table_entry_type[8][8][8][8];
 24 | 
 25 | 
 26 |     tiny_encode_table_type tinyEncodeTable;
 27 |     auto const initialize = []
 28 |     (
 29 |         tiny_encode_table_type & result
 30 |     ) -> bool
 31 |     {
 32 |         for (std::uint32_t maxLeft = 0; maxLeft < 8; ++maxLeft)
 33 |         {
 34 |             for (std::uint32_t maxRight = 0; maxRight < 8; ++maxRight)
 35 |             {
 36 |                 for (std::uint32_t left = 0; left <= maxLeft; ++left)
 37 |                 {
 38 |                     for (std::uint32_t right = 0; right <= maxRight; ++right)
 39 |                     {
 40 |                         if ((maxLeft == 2) && (maxRight==2) && ((left+right)==1) && (left==1))
 41 |                             int y = 9;
 42 |                         std::uint32_t total = left + right;
 43 |                         if (total < 8)
 44 |                         {
 45 |                             std::uint32_t l = left;
 46 |                             std::uint32_t r = right;
 47 |                             std::uint32_t ml = maxLeft;
 48 |                             std::uint32_t mr = maxRight;
 49 |                             std::uint32_t t = (l + r);
 50 |                             if (t > ml)
 51 |                             {
 52 |                                 std::uint32_t inferredRight = (t - ml);
 53 |                                 mr -= inferredRight;
 54 |                                 t -= inferredRight;
 55 |                             }
 56 |                             if (t > mr)
 57 |                             {
 58 |                                 std::uint32_t inferredLeft = (t - mr);
 59 |                                 l -= inferredLeft;
 60 |                                 t -= inferredLeft;
 61 |                             }
 62 |                             std::uint32_t codeLength = 0;
 63 |                             while ((1ull << ++codeLength) <= t)
 64 |                                 ;
 65 |                             --codeLength;
 66 |                             auto needMsb = ((l | (1ull << codeLength)) <= t);
 67 |                             auto code = ((l << needMsb) | (l >> codeLength));
 68 |                             codeLength += needMsb;
 69 |                 
 70 |                                         code &= ((1ull << codeLength) - 1); // TEMP
 71 |                             result[maxLeft][maxRight][left][total] = {code, codeLength};
 72 |                         }
 73 |                     }
 74 |                 }
 75 |             }
 76 |         }
 77 |         return true;
 78 |     }(tinyEncodeTable);
 79 | 
 80 | 
 81 |     //======================================================================================================================
 82 |     void pack_value
 83 |     (
 84 |         m99_encode_stream & encodeStream,
 85 |         std::uint32_t left,
 86 |         std::uint32_t total,
 87 |         std::uint32_t maxLeft,
 88 |         std::uint32_t maxRight
 89 |     )
 90 |     {
 91 |         if (total < 8)
 92 |         {
 93 |             auto const & encTableEntry = tinyEncodeTable[(maxLeft >= 8) ? 7 : maxLeft][(maxRight >= 8) ? 7 : maxRight][left][total];
 94 |             encodeStream.push(encTableEntry.value_, encTableEntry.length_);
 95 |             return;
 96 |         }
 97 |         if (total > maxLeft)
 98 |         {
 99 |             auto inferredRight = (total - maxLeft);
100 |             maxRight -= inferredRight;
101 |             total -= inferredRight;
102 |         }
103 |         if (total > maxRight)
104 |         {
105 |             auto inferredLeft = (total - maxRight);
106 |             left -= inferredLeft;
107 |             total -= inferredLeft;
108 |         }
109 |         if (total)
110 |         {
111 |             std::uint32_t codeLength = 1;
112 |             while (total >> ++codeLength)
113 |                 ;
114 |             --codeLength;
115 |             auto needMsb = ((left | (1ull << codeLength)) <= total);
116 |             auto code = ((left << needMsb) | (left >> codeLength));
117 |             codeLength += needMsb;
118 |                         code &= ((1ull << codeLength) - 1); // TEMP
119 |             encodeStream.push(code, codeLength);
120 |         }
121 |     }
122 | 
123 | 
124 |     //==========================================================================
125 |     void merge
126 |     (
127 |         m99_encode_stream & encodeStream,
128 |         std::uint8_t const * begin,
129 |         std::uint32_t totalSize,
130 |         std::uint32_t leftSize,
131 |         symbol_info * result,
132 |         std::uint32_t leadingRunLength
133 |     )
134 |     {
135 |         if (leadingRunLength >= totalSize)
136 |         {
137 |             result[0] = {begin[0], totalSize};
138 |             return;
139 |         }
140 |         if (totalSize <= 2)
141 |         {
142 |             if (totalSize == 2)
143 |             {
144 |                 auto c = (unsigned)(begin[0] < begin[1]);
145 |                 result[0] = {begin[!c], 1 + (unsigned)(begin[0] == begin[1])};
146 |                 result[1] = {begin[c], 1};
147 |                 encodeStream.push(c, begin[0] != begin[1]);
148 |             }
149 |             else
150 |             {
151 |                 result[0] = {begin[0], 1};
152 |             }
153 |             return;
154 |         }
155 | 
156 |         std::uint32_t rightSize = (totalSize - leftSize);
157 |         symbol_info left[256];
158 |         symbol_info right[256];
159 |         symbol_info const * current[2] = {left, right};
160 |         symbol_info * resultCurrent = result;
161 |         static auto constexpr leftSide = 0;
162 |         static auto constexpr rightSide = 1;
163 |         auto rightLeadingRunLength = (leadingRunLength > leftSize) ? (leadingRunLength - leftSize) : [](std::uint8_t const * begin, std::uint8_t const * end)
164 |         {
165 |             auto cur = begin;
166 |             auto s = *cur;
167 |             while ((cur < end) && (*cur == s))
168 |                 ++cur;
169 |             return std::distance(begin, cur);
170 |         }(begin + leftSize, begin + totalSize);
171 | 
172 |         merge(encodeStream, begin + leftSize, rightSize, rightSize >> 1, right, rightLeadingRunLength);
173 |         merge(encodeStream, begin, leftSize, leftSize >> 1, left, leadingRunLength);
174 | 
175 |         #pragma pack(push, 1)
176 |         using size_union = union size_union
177 |         {
178 |             size_union(std::uint32_t left, std::uint32_t right):size_({left, right}){};
179 |             std::size_t union_;
180 |             struct
181 |             {
182 |                 std::uint32_t left_;
183 |                 std::uint32_t right_;
184 |             } size_;
185 |         };
186 |         #pragma pack(pop)
187 | 
188 |         std::array<std::tuple<std::uint32_t, std::uint32_t, std::uint32_t, std::uint32_t>, 256> valuesToEncode;
189 |         std::uint32_t numValuesToEncode{0};
190 | 
191 |         size_union partitionSize_(leftSize, rightSize);
192 |         while (partitionSize_.size_.left_ && partitionSize_.size_.right_)
193 |         {
194 |             size_union count(
195 |                 (-(current[leftSide]->symbol_ <= current[rightSide]->symbol_) & (std::uint32_t)current[leftSide]->count_),
196 |                 (-(current[rightSide]->symbol_ <= current[leftSide]->symbol_) & (std::uint32_t)current[rightSide]->count_)
197 |             );
198 |             auto totalCount = (count.size_.left_ + count.size_.right_);
199 |             valuesToEncode[numValuesToEncode++] = {count.size_.left_, totalCount, partitionSize_.size_.left_, partitionSize_.size_.right_};
200 |             partitionSize_.union_ -= count.union_;
201 |             *resultCurrent++ = {current[(count.size_.left_ == 0)]->symbol_, totalCount};
202 |             current[leftSide] += (count.size_.left_ != 0);
203 |             current[rightSide] += (count.size_.right_ != 0);
204 |         }
205 |         auto n = partitionSize_.size_.left_ + partitionSize_.size_.right_;
206 |         symbol_info const * c = current[(partitionSize_.size_.left_ == 0)];
207 |         while (n > 0)
208 |         {
209 |             n -= c->count_;
210 |             *resultCurrent++ = *c++;
211 |         }
212 | 
213 | 
214 |         while (numValuesToEncode)
215 |         {
216 |             auto [left, total, maxLeft, maxRight] = valuesToEncode[--numValuesToEncode];
217 |             pack_value(encodeStream, left, total, maxLeft, maxRight);
218 |         }
219 |     }
220 | 
221 | } // namespace
222 | 
223 | 
224 | //==========================================================================
225 | void maniscalco::m99_encode
226 | (
227 |     std::uint8_t const * begin,
228 |     std::uint8_t const * end,
229 |     m99_encode_stream & encodeStream
230 | 
231 | )
232 | {
233 |     // determine initial merge boundary (left size is largest power of 2 that is less than the input size).
234 |     std::uint32_t bytesToEncode = std::distance(begin, end);
235 |     std::uint32_t leftSize = 1;
236 |     while (leftSize < bytesToEncode)
237 |         leftSize <<= 1;
238 |     symbol_info symbolList[256];
239 | 
240 |     // do recursive merge and encode
241 |     auto cur = begin;
242 |     auto s = *cur;
243 |     while ((cur < end) && (*cur == s))
244 |         ++cur;
245 |     auto leadingRunLength = std::distance(begin, cur);
246 |     merge(encodeStream, begin, bytesToEncode, leftSize >> 1, symbolList, leadingRunLength);
247 | 
248 |     // encode the symbols and their counts 
249 |     auto n = bytesToEncode;
250 |     std::vector<std::tuple<std::uint8_t, std::uint32_t, std::uint32_t>> headerValuesToEncode;
251 |     headerValuesToEncode.reserve(256);
252 |     for (auto & symbolInfo : symbolList)
253 |     {
254 |         if (n == 0)
255 |             break;
256 |         headerValuesToEncode.push_back({symbolInfo.symbol_, symbolInfo.count_, n});
257 |         n -= symbolInfo.count_;
258 |     }
259 |     std::reverse(headerValuesToEncode.begin(), headerValuesToEncode.end());
260 |     for (auto [symbol, count, maxCount] : headerValuesToEncode)
261 |     {
262 |         encodeStream.push(symbol, 8);
263 |         pack_value(encodeStream, count, maxCount, maxCount, maxCount);
264 |     }
265 |     encodeStream.push(1, 1);
266 | }
267 | 


--------------------------------------------------------------------------------
/src/library/m99/m99_encode.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "./m99_encode_stream.h"
 4 | 
 5 | #include <cstdint>
 6 | 
 7 | 
 8 | namespace maniscalco
 9 | {
10 | 
11 |     void m99_encode
12 |     (
13 |         std::uint8_t const *,
14 |         std::uint8_t const *,
15 |         m99_encode_stream &
16 |     );
17 | 
18 | } // namespace maniscalco
19 | 
20 | 


--------------------------------------------------------------------------------
/src/library/m99/m99_encode_stream.cpp:
--------------------------------------------------------------------------------
 1 | #include "./m99_encode_stream.h"
 2 | 
 3 | 
 4 | //=============================================================================
 5 | maniscalco::m99_encode_stream::m99_encode_stream
 6 | (
 7 | ):
 8 |     stream_({
 9 |         .bufferOutputHandler_ = [this](packet_type packet)
10 |         {
11 |             packets_.emplace_front(std::move(packet));
12 |         },
13 |         .bufferAllocationHandler_ = [](){return maniscalco::buffer((1 << 10) * 16);}
14 |     })
15 | {
16 | }
17 | 
18 | 
19 | //=============================================================================
20 | auto maniscalco::m99_encode_stream::begin
21 | (
22 | ) const -> const_iterator
23 | {
24 |     return packets_.begin();
25 | }
26 | 
27 | 
28 | //=============================================================================
29 | auto maniscalco::m99_encode_stream::end
30 | (
31 | ) const -> const_iterator
32 | {
33 |     return packets_.end();
34 | }
35 | 
36 | 
37 | //=============================================================================
38 | void maniscalco::m99_encode_stream::clear
39 | (
40 | )
41 | {
42 |     stream_.flush();
43 |     packets_.clear();
44 | }
45 | 
46 | 
47 | //=============================================================================
48 | void maniscalco::m99_encode_stream::flush
49 | (
50 | )   
51 | {
52 |     stream_.flush();
53 | }
54 | 
55 | 
56 | //=============================================================================
57 | auto maniscalco::m99_encode_stream::size
58 | (
59 | ) const -> size_type
60 | {
61 |     return stream_.size();
62 | }


--------------------------------------------------------------------------------
/src/library/m99/m99_encode_stream.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <library/io.h>
 4 | 
 5 | #include <cstdint>
 6 | #include <queue>
 7 | 
 8 | 
 9 | namespace maniscalco
10 | {
11 | 
12 |     class m99_encode_stream
13 |     {
14 |     public:
15 | 
16 |         static auto constexpr stream_direction = io::stream_direction::reverse;
17 |         using stream_type = io::push_stream<stream_direction>;
18 |         using packet_type = stream_type::packet_type;
19 |         using element_type = packet_type;
20 |         using container_type = std::deque<element_type>;
21 |         using iterator = container_type::iterator;
22 |         using const_iterator = container_type::const_iterator;
23 |         using size_type = std::size_t;
24 | 
25 |         m99_encode_stream();
26 | 
27 |         template <typename ... T>
28 |         auto push(T && ... args);
29 | 
30 |         size_type size() const;
31 | 
32 |         const_iterator begin() const;
33 | 
34 |         const_iterator end() const;
35 | 
36 |         void clear();
37 | 
38 |         void flush();
39 | 
40 |     //private:
41 | 
42 |         container_type packets_;
43 |         
44 |         stream_type stream_;
45 | 
46 |     }; // class m99_encode_stream
47 | 
48 | 
49 | } // namespace maniscalco
50 | 
51 | 
52 | //=============================================================================
53 | template <typename ... T>
54 | auto maniscalco::m99_encode_stream::push
55 | (
56 |     T && ... args
57 | )
58 | {
59 |     return stream_.push(std::forward<T>(args) ...);
60 | }
61 | 


--------------------------------------------------------------------------------