├── CMakeLists.txt ├── LICENSE ├── README.md ├── inc ├── imageMarlin.hpp ├── marlin.h └── marlin.hpp ├── src ├── configuration.cc ├── dictionary.cc ├── distribution.hpp ├── entropyCoder.cc ├── entropyDecoder.cc ├── imageBlockEC.cc ├── imageBlockEC.hpp ├── imageCoder.cc ├── imageDecoder.cc ├── imageHeader.cc ├── imageTransformer.cc ├── imageTransformer.hpp ├── instantiations.h ├── marlin.cc ├── prebuilt.cc ├── profiler.cc └── profiler.hpp ├── test └── correctness.cc └── utils ├── buildPrecalculatedDictionaries.cc ├── imageMarlin.cc └── testImage.cc /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(Marlin VERSION 0.01) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Wall -Wextra -Wcast-qual -Wcast-align -Wstrict-aliasing=1 -Wswitch-enum -Wundef -pedantic -Wfatal-errors -Wshadow") 6 | 7 | ################################ 8 | # Marlin library (entropy codec only) 9 | file(GLOB MAIN_SRC_FILES ${PROJECT_SOURCE_DIR}/src/*.cc) 10 | file(GLOB IMAGE_ONLY_SRC_FILES ${PROJECT_SOURCE_DIR}/src/image*.cc) 11 | list(REMOVE_ITEM MAIN_SRC_FILES ${IMAGE_ONLY_SRC_FILES}) 12 | add_library(marlin ${MAIN_SRC_FILES}) 13 | set_target_properties(marlin PROPERTIES 14 | VERSION ${PROJECT_VERSION} 15 | PUBLIC_HEADER inc/marlin.h) 16 | target_include_directories(marlin PRIVATE inc) 17 | 18 | # ImageMarlin library (image codec + entropy codec) 19 | file(GLOB MAIN_SRC_FILES ${PROJECT_SOURCE_DIR}/src/*.cc) 20 | add_library(imarlin ${MAIN_SRC_FILES} inc/imageMarlin.hpp) 21 | find_package( OpenCV REQUIRED ) 22 | set_target_properties(imarlin PROPERTIES 23 | VERSION ${PROJECT_VERSION} 24 | PUBLIC_HEADER inc/imageMarlin.hpp) 25 | target_include_directories(imarlin PRIVATE inc ${OpenCV_INCLUDE_DIRS}) 26 | 27 | ################################ 28 | # Samples 29 | 30 | option(WITH_UTILS "Build Utilities" ON) 31 | if(WITH_UTILS) 32 | file(GLOB UTILS_SRC_FILES ${PROJECT_SOURCE_DIR}/utils/*.cc) 33 | find_package( OpenCV REQUIRED ) 34 | 35 | foreach(_util_file ${UTILS_SRC_FILES}) 36 | get_filename_component(_util_name ${_util_file} NAME_WE) 37 | add_executable(${_util_name} ${_util_file}) 38 | target_include_directories(${_util_name} PRIVATE inc utils src ${OpenCV_INCLUDE_DIRS}) 39 | target_link_libraries(${_util_name} imarlin ${OpenCV_LIBS}) 40 | endforeach() 41 | endif() 42 | 43 | ################################ 44 | # Testing 45 | 46 | option(WITH_TESTS "Build Tests" ON) 47 | if(WITH_TESTS) 48 | enable_testing() 49 | file(GLOB TEST_SRC_FILES ${PROJECT_SOURCE_DIR}/test/*.cc) 50 | 51 | ## from list of files we'll create tests test_name.cpp -> test_name 52 | foreach(_test_file ${TEST_SRC_FILES}) 53 | get_filename_component(_test_name ${_test_file} NAME_WE) 54 | add_executable(${_test_name} ${_test_file}) 55 | target_include_directories(${_test_name} PRIVATE inc utils ${OpenCV_INCLUDE_DIRS}) 56 | target_link_libraries(${_test_name} marlin) 57 | add_test(${_test_name} ${_test_name}) 58 | endforeach() 59 | endif() 60 | 61 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Manuel Martinez Torres 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Marlin: a high throughput entropy compressor 2 | 3 | #### Update: Precomputed Dictionaries 4 | 5 | We added the (limited) ability to precompute dictionaries. 6 | At this moment we provide 16 precomputed dictionaries for Laplacian, Gaussian, and Exponential distribution. 7 | The precomputed file is in `src/prebuilt.c`. 8 | 9 | This allows to use Marlin without a long starting time where dictionaries are built. 10 | 11 | #### Update: Standalone Utility program. 12 | 13 | We added a standalone Utlity program at: `utils\marlinUtility.cc`. 14 | Right now, it is capable of compressing from png images and back. 15 | 16 | It is designed as a technology demonstrator, and its interface will be updated in the future as (if) more utilities are added. 17 | 18 | 19 | #### Update: the benchmark code has been moved to the marlin_eval repository. 20 | 21 | #### To Build: 22 | 23 | mkdir Release 24 | cd Release 25 | cmake -DCMAKE_BUILD_TYPE=Release .. 26 | make 27 | 28 | #### Publications: 29 | Please, check the following papers for details, and please cite them if you use Marlin in your project: 30 | 31 | 32 | 33 | 34 | #### Disclaimer: 35 | 36 | Please note that, although I tried to make the code as clear as possible, this is still research code, and thus it is not as thouroughly documented as it should be. 37 | -------------------------------------------------------------------------------- /inc/imageMarlin.hpp: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | imageMarlin: an image compressor based on the Marlin entropy coder 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres, portions by Miguel Hernández-Cabronero 8 | 9 | Marlin: A Fast Entropy Codec 10 | 11 | MIT License 12 | 13 | Copyright (c) 2018 Manuel Martinez Torres 14 | 15 | Permission is hereby granted, free of charge, to any person obtaining a copy 16 | of this software and associated documentation files (the "Software"), to deal 17 | in the Software without restriction, including without limitation the rights 18 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 19 | copies of the Software, and to permit persons to whom the Software is 20 | furnished to do so, subject to the following conditions: 21 | 22 | The above copyright notice and this permission notice shall be included in all 23 | copies or substantial portions of the Software. 24 | 25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 30 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 | SOFTWARE. 32 | 33 | ***********************************************************************/ 34 | 35 | #ifndef IMAGEMARLIN_HPP 36 | #define IMAGEMARLIN_HPP 37 | 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | 44 | #include 45 | 46 | namespace marlin { 47 | 48 | class ImageMarlinHeader; 49 | class ImageMarlinCoder; 50 | class ImageMarlinDecoder; 51 | class ImageMarlinTransformer; 52 | class ImageMarlinBlockEC; 53 | 54 | /** 55 | * Header defining the image properties and the codec configuration paramenters. 56 | */ 57 | class ImageMarlinHeader { 58 | 59 | public: 60 | 61 | enum class QuantizerType : uint8_t {Uniform = 0, Deadzone = 1}; 62 | enum class ReconstructionType : uint8_t {Midpoint = 0, Lowpoint = 1}; 63 | enum class TransformType : uint8_t {North=0, FastLeft=1}; 64 | 65 | // Default values 66 | static const uint32_t DEFAULT_BLOCK_WIDTH = 64; 67 | static const uint32_t DEFAULT_QSTEP = 1; 68 | static const uint32_t DEFAULT_ENTROPY_FREQUENCY = 1; 69 | static const QuantizerType DEFAULT_QTYPE = QuantizerType::Uniform; 70 | static const ReconstructionType DEFAULT_RECONSTRUCTION_TYPE = ReconstructionType::Midpoint; 71 | static const TransformType DEFAULT_TRANSFORM_TYPE = TransformType::North; 72 | 73 | // Image dimensions 74 | uint32_t rows, cols, channels; 75 | // width (and height) of each block into which the image is divided for entropy coding 76 | uint32_t blockWidth; 77 | // Quantization step. Use 1 for lossless compression. 78 | uint32_t qstep; 79 | // Quantization type 80 | QuantizerType qtype; 81 | // Quantization reconstruction type 82 | ReconstructionType rectype; 83 | // Type of transformation 84 | TransformType transtype; 85 | uint32_t blockEntropyFrequency; 86 | 87 | /** 88 | * Empty constructor 89 | */ 90 | ImageMarlinHeader() {} 91 | 92 | /** 93 | * Constructor from known parameters 94 | */ 95 | ImageMarlinHeader( 96 | uint32_t rows_, 97 | uint32_t cols_, 98 | uint32_t channels_, 99 | uint32_t blockWidth_=DEFAULT_BLOCK_WIDTH, 100 | uint32_t qstep_=DEFAULT_QSTEP, 101 | QuantizerType qtype_=DEFAULT_QTYPE, 102 | ReconstructionType rectype_=DEFAULT_RECONSTRUCTION_TYPE, 103 | TransformType transtype_=DEFAULT_TRANSFORM_TYPE, 104 | uint32_t blockEntropyFrequency_=DEFAULT_ENTROPY_FREQUENCY) : 105 | rows(rows_), 106 | cols(cols_), 107 | channels(channels_), 108 | blockWidth(blockWidth_), 109 | qstep(qstep_), 110 | qtype(qtype_), 111 | rectype(rectype_), 112 | transtype(transtype_), 113 | blockEntropyFrequency(blockEntropyFrequency_){ 114 | validate(); 115 | } 116 | 117 | /** 118 | * Constructor from an istream of compressed data 119 | */ 120 | ImageMarlinHeader(std::istream& data) { 121 | load_from(data); 122 | validate(); 123 | } 124 | 125 | /** 126 | * Constructor from a string containing the compressed data 127 | * @param str 128 | */ 129 | ImageMarlinHeader(const std::string& str) { 130 | std::istringstream data(str); 131 | load_from(data); 132 | validate(); 133 | } 134 | 135 | /** 136 | * @return a new ImageMarlinCoder reference based on the header parameters, 137 | * which must be destroyed manually 138 | */ 139 | ImageMarlinCoder* newCoder(); 140 | 141 | /** 142 | * @return an ImageMarlinDecoder reference based on the header parameters, 143 | * which must be destroyed manually 144 | */ 145 | ImageMarlinDecoder* newDecoder(); 146 | 147 | /** 148 | * Write the header parameters to out in a platform-independent way. 149 | * 150 | * Data are not validated before writing. 151 | * 152 | * @return the number of bytes written. 153 | */ 154 | void dump_to(std::ostream& out) const; 155 | 156 | /** 157 | * Read and update the header parameters by reading them from in 158 | * (format must be as produced by dump_to). 159 | * 160 | * Data are not validated after reading. 161 | * 162 | * @return the number of bytes consumed. 163 | */ 164 | void load_from(std::istream& in); 165 | 166 | /** 167 | * Return the number of bytes that it takes to store the header 168 | */ 169 | size_t size() const; 170 | 171 | /** 172 | * Check header parameters and throw std::domain_error if 173 | * a problem is detected. 174 | */ 175 | void validate(); 176 | 177 | /** 178 | * Print the header to out 179 | */ 180 | void show(std::ostream& out = std::cout); 181 | 182 | protected: 183 | /** 184 | * Write an unsigned field value to out in a platform-independent manner. 185 | * 186 | * @throws std::domain_error if field is negative or cannot be represented in num_bytes 187 | * 188 | * @tparam num_bytes number of bytes to use to store the value. 189 | */ 190 | template 191 | void write_field(std::ostream& out, uint32_t field) const; 192 | 193 | /** 194 | * Read an unsigned field from in, assuming it was written with write_field 195 | * @tparam num_bytes number of bytes used to store the value. 196 | */ 197 | template 198 | uint32_t read_field(std::istream& in); 199 | }; 200 | 201 | /** 202 | * Class to compress images 203 | */ 204 | class ImageMarlinCoder { 205 | public: 206 | /** 207 | * Initialize an image compressor with the parameters given in header 208 | * (parameters are copied, and do not change if header_ changes). 209 | * 210 | * The transformer_ and blockEC_ are deleted on the dtor. 211 | */ 212 | ImageMarlinCoder( 213 | const ImageMarlinHeader& header_, ImageMarlinTransformer* transformer_, ImageMarlinBlockEC* blockEC_) 214 | : header(header_), transformer(transformer_), blockEC(blockEC_) {} 215 | 216 | /** 217 | * Delete the transformer and blockEC objects and release any other used resource. 218 | */ 219 | ~ImageMarlinCoder(); 220 | 221 | /** 222 | * Compress an image with the parameters specified in header. 223 | * 224 | * @return a string with the compressed format bytes 225 | */ 226 | std::string compress(const cv::Mat& img); 227 | 228 | /** 229 | * Compress an image with the parameters specified in header 230 | * and write the results to out. 231 | * 232 | * @return a string with the compressed format bytes 233 | */ 234 | void compress(const cv::Mat& img, std::ostream& out); 235 | 236 | protected: 237 | // Header with all configuration parameters 238 | const ImageMarlinHeader header; 239 | // Image transformer (includes any prediction and quantization) 240 | ImageMarlinTransformer *const transformer; 241 | // Image splitting into blocks and their entropy coding 242 | ImageMarlinBlockEC *const blockEC; 243 | }; 244 | 245 | class ImageMarlinDecoder { 246 | 247 | public: 248 | /** 249 | * Initialize an image decompressor with the parameters given in header 250 | * (parameters are copied, and do not change if header_ changes) 251 | */ 252 | ImageMarlinDecoder( 253 | ImageMarlinHeader& header_, 254 | ImageMarlinTransformer * transformer_, ImageMarlinBlockEC * blockEC_) : 255 | header(header_), transformer(transformer_), blockEC(blockEC_) {} 256 | 257 | ~ImageMarlinDecoder(); 258 | 259 | /** 260 | * Entropy decode and inverse transform the bitstream in compressedString and store 261 | * the reconstructed samples in decompressedData. 262 | * 263 | * @param compressedString a string containing the compressed bitstream 264 | * @param reconstructedData pre-allocated vector where the reconstructed data is to be 265 | * stored, each component sequentially and using raster order (one row after the other, 266 | * from top to bottom). 267 | */ 268 | void decompress( 269 | const std::string &compressedString, 270 | std::vector& reconstructedData, 271 | ImageMarlinHeader& decompressedHeader); 272 | 273 | protected: 274 | const ImageMarlinHeader header; 275 | // Image transformer (includes any prediction and quantization) 276 | ImageMarlinTransformer *const transformer; 277 | // Image splitting into blocks and their entropy coding 278 | ImageMarlinBlockEC *const blockEC; 279 | }; 280 | 281 | /** 282 | * Image transformer (includes any prediction and quantization) 283 | */ 284 | class ImageMarlinTransformer { 285 | public: 286 | /** 287 | * Apply the direct transformation of img and store the results in preprocessed, 288 | * and store any necessary side information in side_information 289 | */ 290 | virtual void transform_direct( 291 | uint8_t *original_data, 292 | std::vector &side_information, 293 | std::vector &preprocessed) = 0; 294 | 295 | /** 296 | * Perform the inverse transformation of entropy_decoded_data 297 | * and store the reconstructed samples in reconstructedData (which will 298 | * be resized to the needed size) 299 | */ 300 | virtual void transform_inverse( 301 | std::vector &entropy_decoded_data, 302 | View &side_information, 303 | std::vector &reconstructedData) = 0; 304 | 305 | virtual ~ImageMarlinTransformer() {} 306 | }; 307 | 308 | /** 309 | * Image splitting into blocks and their entropy coding. 310 | * 311 | * The decodeBlocks method is provided, encodeBlocks must be defined 312 | * in subclasses. 313 | * 314 | * encodeblocks Must be compatible with the format expected by 315 | * decodeBlocks, or provide an alternative implementation. 316 | */ 317 | class ImageMarlinBlockEC { 318 | public: 319 | /// Divide a transformed image into blocks, entropy code them and obtain a bitstream 320 | virtual std::vector encodeBlocks( 321 | const std::vector &uncompressed, 322 | size_t blockSize) = 0; 323 | 324 | /// Recover a transformed image from a bitstream 325 | virtual size_t decodeBlocks( 326 | marlin::View uncompressed, 327 | marlin::View &compressed, 328 | size_t blockSize); 329 | 330 | virtual ~ImageMarlinBlockEC() {} 331 | }; 332 | 333 | } 334 | 335 | #endif /* IMAGEMARLIN_HPP */ 336 | -------------------------------------------------------------------------------- /inc/marlin.h: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | Marlin: A Fast Entropy Codec 4 | 5 | MIT License 6 | 7 | Copyright (c) 2017 Manuel Martinez Torres 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | ***********************************************************************/ 28 | 29 | 30 | #ifndef MARLIN_H 31 | #define MARLIN_H 32 | 33 | #include 34 | #include 35 | #include 36 | 37 | #define MARLIN_VERSION_MAJOR 0 38 | #define MARLIN_VERSION_MINOR 3 39 | 40 | 41 | #if defined (__cplusplus) 42 | #include "marlin.hpp" 43 | extern "C" { 44 | #else 45 | struct Marlin; 46 | #endif 47 | 48 | /*! 49 | * Compresses src to dst using dictionary dict. 50 | * 51 | * \param dst output buffer 52 | * \param dstCapacity allocated capacity of dst 53 | * \param src input buffer 54 | * \param srcSize input buffer size 55 | * \param dict dictionary to use for compression 56 | * 57 | * \return negative: error occurred 58 | * 0: if data is not compressible 59 | * 1: if data is a repetition of a single byte 60 | * positive: size of the compressed buffer 61 | */ 62 | ssize_t Marlin_compress(const Marlin *dict, uint8_t* dst, size_t dstCapacity, const uint8_t* src, size_t srcSize); 63 | 64 | /*! 65 | * Uncompresses src to dst using dictionary dict. 66 | * 67 | * \param dst output buffer 68 | * \param dstSize ouput buffer size 69 | * \param src input buffer 70 | * \param srcSize input buffer size 71 | * \param dict dictionary to use for decompression 72 | * 73 | * \return negative: error occurred 74 | * positive: number of uncompressed bytes (must match dstSize 75 | */ 76 | ssize_t Marlin_decompress(const Marlin *dict, uint8_t* dst, size_t dstSize, const uint8_t* src, size_t srcSize); 77 | 78 | /*! 79 | * Builds an optimal for a 8 bit memoryless source. Dictionary must be freed with Marlin_free_dictionary. 80 | * 81 | * \param hist histogram of symbols in the 8 bit alphabet 82 | * \param name an identificator for the dictionary (max size 15 bytes). 83 | * \param indexSizeBits number of bits on the index. Must be larger than 8-rawStorageBits. 84 | * \param indexOverlapBits number of bits of overlap. Suggested small. 85 | * \param maxWordSizeSymbols maximum amount of non zero symbols per word. 86 | * \param rawStorageBits number of bits to store uncompressed. 87 | * 88 | * \return null: error occurred 89 | * otherwise: newly allocated dictionary 90 | */ 91 | Marlin *Marlin_build_dictionary(const char *name, const double hist[256]); 92 | 93 | /*! 94 | * Frees a previously built Marlin Dictionary 95 | * 96 | * \param dict dictionary to free 97 | */ 98 | void Marlin_free_dictionary(Marlin *dict); 99 | 100 | /*! 101 | * Obtains a set of pre-built dictionaries (THose must not be freed). 102 | * 103 | * \return pointer to a vector of dictionary pointers ended in nullptr 104 | */ 105 | const Marlin **Marlin_get_prebuilt_dictionaries(); 106 | 107 | /*! 108 | * Estimates how much space a dictionary will take to compress a source with a histogram hist. 109 | * 110 | * \return negative: error occurred 111 | * positive: expected space used to compress hist using dictionary dict 112 | */ 113 | const Marlin * Marlin_estimate_best_dictionary(const Marlin **dict, const uint8_t* src, size_t srcSize); 114 | 115 | #if defined (__cplusplus) 116 | } 117 | #endif 118 | 119 | #endif 120 | 121 | -------------------------------------------------------------------------------- /inc/marlin.hpp: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | Marlin: A Fast Entropy Codec 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | ***********************************************************************/ 28 | 29 | 30 | #ifndef MARLIN_HPP 31 | #define MARLIN_HPP 32 | #if defined (__cplusplus) 33 | 34 | #include "marlin.h" 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | // TSource is a type that represents the data type of the source 42 | // (i.e., uint8_t and uint16_t are supported now) 43 | 44 | // MarlinIdx is a type that can hold all symbols that will be encoded by Marlin 45 | // (which usually will be less than the Source symbols). 46 | // uint8_t should be enough for all practical cases. 47 | namespace marlin { 48 | 49 | template 50 | struct View { 51 | T *start, *end; 52 | View(T *start_, T *end_) : start(start_), end(end_) {} 53 | size_t nElements() const { return end - start; } 54 | size_t nBytes() const { return sizeof(T)*(end - start); } 55 | T &operator[](size_t v) { return start[v]; } 56 | const T &operator[](size_t v) const { return start[v]; } 57 | }; 58 | template static View make_view(T *start, T *end) { return View(start,end); } 59 | template static View make_view(std::vector &v) { return View(&v[0], &v[v.size()]); } 60 | template static View make_view(const std::vector &v) { return View(&v[0], &v[v.size()]); } 61 | 62 | 63 | template 64 | struct MarlinSymbol_ { 65 | TSource sourceSymbol; 66 | double p; 67 | }; 68 | 69 | template 70 | struct Word_ : std::vector { 71 | using std::vector::vector; 72 | double p = 0; 73 | MarlinIdx state = 0; 74 | }; 75 | 76 | typedef std::map Configuration; 77 | 78 | template 79 | struct TMarlinDictionary{ 80 | 81 | typedef Word_ Word; 82 | typedef MarlinSymbol_ MarlinSymbol; 83 | 84 | const std::vector sourceAlphabet; 85 | const Configuration conf; 86 | 87 | const size_t K = conf.at("K"); // Non overlapping bits of codeword. 88 | const size_t O = conf.at("O"); // Bits that overlap between codewprds. 89 | const size_t shift = conf.at("shift"); // Bits that can be stored raw 90 | const size_t maxWordSize = conf.at("maxWordSize"); // Maximum number of symbols per word. 91 | 92 | struct MarlinAlphabet : public std::vector { 93 | using std::vector::vector; 94 | double probabilityOfUnrepresentedSymbol; 95 | }; 96 | 97 | /// ALPHABETS 98 | const double sourceEntropy = calcSourceEntropy(sourceAlphabet); 99 | const MarlinAlphabet marlinAlphabet = buildMarlinAlphabet(); 100 | 101 | /// DICTIONARY 102 | //Marlin only encodes a subset of the possible source symbols. 103 | //Marlin symbols are sorted by probability in descending order, 104 | //so the Marlin Symbol 0 is always corresponds to the most probable alphabet symbol. 105 | const std::vector words = buildDictionary(); // All dictionary words. 106 | const double efficiency = calcEfficiency(); // Expected efficiency of the dictionary. 107 | const double compressionRatio = efficiency/sourceEntropy; //Expected compression ratio 108 | const bool isSkip = calcSkip(); // If all words are small, we can do a faster decoding algorithm; 109 | 110 | 111 | /// CONSTRUCTOR 112 | TMarlinDictionary( 113 | const std::vector &sourceAlphabet_, 114 | Configuration conf_ = Configuration()) 115 | : 116 | sourceAlphabet(sanitizeAlphabet(sourceAlphabet_)), 117 | conf(updateConf(sourceAlphabet, conf_)) 118 | {} 119 | 120 | private: 121 | // Sets default configurations 122 | static std::map updateConf(const std::vector &sourceAlphabet, Configuration conf); 123 | 124 | static std::vector sanitizeAlphabet(const std::vector &sourceAlphabet); 125 | MarlinAlphabet buildMarlinAlphabet() const; 126 | 127 | std::vector buildDictionary() const; 128 | static double calcSourceEntropy(const std::vector &sourceAlphabet); 129 | double calcEfficiency() const; 130 | bool calcSkip() const; 131 | }; 132 | 133 | 134 | template 135 | struct TMarlinCompress { 136 | 137 | typedef Word_ Word; 138 | typedef MarlinSymbol_ MarlinSymbol; 139 | const size_t K,O,shift,maxWordSize; 140 | double efficiency; 141 | 142 | // Structured as: FLAG_NEXT_WORD Where to jump next 143 | typedef uint32_t CompressorTableIdx; 144 | const MarlinIdx unrepresentedSymbolToken; 145 | const std::array source2marlin; 146 | const std::shared_ptr> compressorTableVector; 147 | const CompressorTableIdx* const compressorTablePointer; 148 | const std::shared_ptr> compressorTableInitVector; 149 | const CompressorTableIdx* const compressorTableInitPointer; 150 | 151 | ssize_t compress(View src, View dst) const; 152 | ssize_t compress(const std::vector &src, std::vector &dst) const { 153 | ssize_t r = compress(make_view(src), make_view(dst)); 154 | if (r<0) return r; 155 | dst.resize(r); 156 | return dst.size(); 157 | } 158 | 159 | TMarlinCompress(const TMarlinDictionary &dictionary) : 160 | K(dictionary.K), O(dictionary.O), shift(dictionary.shift), maxWordSize(dictionary.maxWordSize), 161 | efficiency(dictionary.efficiency), 162 | unrepresentedSymbolToken(dictionary.marlinAlphabet.size()), 163 | source2marlin(buildSource2marlin(dictionary)), 164 | compressorTableVector(buildCompressorTable(dictionary)), 165 | compressorTablePointer(compressorTableVector->data()), 166 | compressorTableInitVector(buildCompressorTableInit(dictionary)), 167 | compressorTableInitPointer(compressorTableInitVector->data()) 168 | {} 169 | 170 | TMarlinCompress( 171 | size_t K_, size_t O_, size_t shift_, size_t maxWordSize_, double efficiency_, 172 | MarlinIdx unrepresentedSymbolToken_, 173 | const std::array &source2marlin_, 174 | const CompressorTableIdx* const compressorTablePointer_, 175 | const CompressorTableIdx* const compressorTableInitPointer_ 176 | ) : 177 | K(K_), O(O_), shift(shift_), maxWordSize(maxWordSize_), efficiency(efficiency_), 178 | unrepresentedSymbolToken(unrepresentedSymbolToken_), 179 | source2marlin(source2marlin_), 180 | compressorTableVector(), 181 | compressorTablePointer(compressorTablePointer_), 182 | compressorTableInitVector(), 183 | compressorTableInitPointer(compressorTableInitPointer_) 184 | {} 185 | 186 | constexpr static const size_t FLAG_NEXT_WORD = 1UL<<(8*sizeof(CompressorTableIdx)-1); 187 | 188 | private: 189 | std::array buildSource2marlin(const TMarlinDictionary &dictionary) const; 190 | std::unique_ptr> buildCompressorTable(const TMarlinDictionary &dictionary) const; 191 | std::unique_ptr> buildCompressorTableInit(const TMarlinDictionary &dictionary) const; 192 | }; 193 | 194 | template 195 | struct TMarlinDecompress { 196 | 197 | typedef Word_ Word; 198 | typedef MarlinSymbol_ MarlinSymbol; 199 | const size_t K,O,shift,maxWordSize; 200 | 201 | const std::unique_ptr> decompressorTableVector; 202 | const TSource* const decompressorTablePointer; 203 | const TSource marlinMostCommonSymbol; 204 | const bool isSkip; 205 | 206 | ssize_t decompress(View src, View dst) const; 207 | ssize_t decompress(const std::vector &src, std::vector &dst) const { 208 | return decompress(make_view(src), make_view(dst)); 209 | } 210 | 211 | TMarlinDecompress(const TMarlinDictionary &dictionary) : 212 | K(dictionary.K), O(dictionary.O), shift(dictionary.shift), maxWordSize(dictionary.maxWordSize), 213 | decompressorTableVector(buildDecompressorTable(dictionary)), 214 | decompressorTablePointer(decompressorTableVector->data()), 215 | marlinMostCommonSymbol(dictionary.marlinAlphabet.front().sourceSymbol), 216 | isSkip(dictionary.isSkip) 217 | {} 218 | 219 | TMarlinDecompress( 220 | size_t K_, size_t O_, size_t shift_, size_t maxWordSize_, 221 | const TSource* const decompressorTablePointer_, 222 | const TSource marlinMostCommonSymbol_, 223 | const bool isSkip_ 224 | ) : 225 | K(K_), O(O_), shift(shift_), maxWordSize(maxWordSize_), 226 | decompressorTableVector(), 227 | decompressorTablePointer(decompressorTablePointer_), 228 | marlinMostCommonSymbol(marlinMostCommonSymbol_), 229 | isSkip(isSkip_) 230 | {} 231 | private: 232 | std::unique_ptr> buildDecompressorTable(const TMarlinDictionary &dictionary) const; 233 | }; 234 | 235 | template 236 | struct TMarlin : 237 | public TMarlinCompress, 238 | public TMarlinDecompress { 239 | 240 | const std::string name; 241 | const size_t K,O,shift,maxWordSize; 242 | 243 | 244 | TMarlin( 245 | std::string name_, 246 | const std::vector &sourceAlphabet_, 247 | Configuration conf_ = Configuration() ) : 248 | TMarlin( name_, TMarlinDictionary(sourceAlphabet_, conf_) ) {} 249 | 250 | 251 | TMarlin( 252 | std::string name_, 253 | TMarlinDictionary dictionary ) : 254 | TMarlinCompress(dictionary), 255 | TMarlinDecompress(dictionary), 256 | name(name_), 257 | K(dictionary.K), O(dictionary.O), shift(dictionary.shift), maxWordSize(dictionary.maxWordSize) 258 | {} 259 | 260 | 261 | TMarlin( 262 | std::string name_, 263 | size_t K_, size_t O_, size_t shift_, size_t maxWordSize_, double efficiency_, 264 | MarlinIdx unrepresentedSymbolToken_, 265 | const std::array source2marlin_, 266 | const typename TMarlinCompress::CompressorTableIdx* compressorTablePointer_, 267 | const typename TMarlinCompress::CompressorTableIdx* compressorTableInitPointer_, 268 | const TSource* decompressorTablePointer_, 269 | const TSource marlinMostCommonSymbol_, 270 | const bool isSkip_ 271 | ) : 272 | TMarlinCompress( 273 | K_, O_, shift_, maxWordSize_, efficiency_, 274 | unrepresentedSymbolToken_, source2marlin_, compressorTablePointer_, compressorTableInitPointer_), 275 | TMarlinDecompress( 276 | K_, O_, shift_, maxWordSize_, decompressorTablePointer_, marlinMostCommonSymbol_, isSkip_), 277 | name(name_), 278 | K(K_), O(O_), shift(shift_), maxWordSize(maxWordSize_) {} 279 | }; 280 | 281 | 282 | 283 | } 284 | 285 | 286 | typedef marlin::TMarlin Marlin; 287 | 288 | #endif 289 | #endif 290 | 291 | -------------------------------------------------------------------------------- /src/configuration.cc: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | Marlin: A Fast Entropy Codec 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | ***********************************************************************/ 28 | 29 | #include 30 | 31 | using namespace marlin; 32 | 33 | template 34 | std::map TMarlinDictionary::updateConf( 35 | const std::vector &sourceAlphabet, 36 | Configuration conf) { 37 | 38 | conf.emplace("K",8); 39 | conf.emplace("O",4); 40 | 41 | conf.emplace("debug",1); 42 | // conf.emplace("purgeProbabilityThreshold",1e-99); 43 | // conf.emplace("purgeProbabilityThreshold",1e-6); 44 | // conf.emplace("purgeProbabilityThreshold",0.5/4096/32); 45 | conf.emplace("purgeProbabilityThreshold",0.5/4096/32); 46 | conf.emplace("iterations",3); 47 | // conf.emplace("minMarlinSymbols", std::max(1U<::calcSourceEntropy(sourceAlphabet); 57 | 58 | if (not conf.count("shift")) { 59 | 60 | double best = 0; 61 | size_t shift = 0; 62 | for (int i=0; i<6; i++) { 63 | 64 | auto testConf = conf; 65 | testConf["shift"] = shift; 66 | double test = TMarlinDictionary(sourceAlphabet, testConf).compressionRatio; 67 | if (test > 1.0001*best) { 68 | best = test; 69 | conf = testConf; 70 | }; 71 | shift ++; 72 | } 73 | } 74 | 75 | conf["maxWordSize"] = maxWordSize; 76 | 77 | if (conf["maxWordSize"]==0) { 78 | 79 | conf.emplace("autoMaxWordSize",64); 80 | 81 | double best = 0.; 82 | size_t sz = 4; 83 | while (sz <= conf["autoMaxWordSize"]) { 84 | 85 | auto testConf = conf; 86 | testConf["maxWordSize"] = sz-1; 87 | double test = TMarlinDictionary(sourceAlphabet, testConf).compressionRatio; 88 | if (test > 1.0001*best) { 89 | best = test; 90 | conf = testConf; 91 | } else break; 92 | sz*=2; 93 | } 94 | } 95 | 96 | //printf("%lf %lf\n", conf["maxWordSize"], conf["shift"]); 97 | 98 | return conf; 99 | } 100 | 101 | 102 | //////////////////////////////////////////////////////////////////////// 103 | // 104 | // Explicit Instantiations 105 | #include "instantiations.h" 106 | INSTANTIATE(TMarlinDictionary) 107 | //typedef std::map phonyMap; // Commas do not fit well within macros 108 | //INSTANTIATE_MEMBER(TMarlinDictionary, updateConf(const std::vector &sourceAlphabet, Configuration conf) -> phonyMap) 109 | -------------------------------------------------------------------------------- /src/dictionary.cc: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | Marlin: A Fast Entropy Codec 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | ***********************************************************************/ 28 | 29 | #include 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | using namespace marlin; 40 | 41 | namespace { 42 | 43 | struct Node; 44 | typedef std::shared_ptr SNode; 45 | struct Node : std::vector { 46 | double p=0; 47 | size_t sz=0; 48 | }; 49 | 50 | template 51 | SNode buildTree(const TMarlinDictionary &dictionary, std::vector Pstates) { 52 | 53 | // Normalizing the state probabilities makes the algorithm more stable 54 | double factor = 1e-10; 55 | for (auto &&p : Pstates) factor += p; 56 | for (auto &&p : Pstates) p/=factor; 57 | for (auto &&p : Pstates) if (std::abs(p-1.)<0.0001) p=1.; 58 | for (auto &&p : Pstates) if (std::abs(p-0.)<0.0001) p=0.; 59 | 60 | 61 | std::vector PN; 62 | for (auto &&a : dictionary.marlinAlphabet) PN.push_back(a.p); 63 | for (size_t i=PN.size()-1; i; i--) 64 | PN[i-1] += PN[i]; 65 | 66 | std::vector Pchild(PN.size()); 67 | for (size_t i=0; ip - rhs->p) > 1e-10) 72 | return lhs->pp; 73 | return false; 74 | }; 75 | std::priority_queue, decltype(cmp)> pq(cmp); 76 | 77 | // DICTIONARY INITIALIZATION 78 | SNode root = std::make_shared(); 79 | 80 | // Include empty word 81 | // pq.push(root); 82 | root->p = 1; 83 | int retiredNodes = 0; 84 | 85 | for (size_t c=0; cpush_back(std::make_shared()); 91 | root->back()->p = sum * dictionary.marlinAlphabet[c].p; 92 | root->p -= root->back()->p; 93 | root->back()->sz = 1; 94 | if (root->back()->p == 0) { 95 | root->back()->p = -1; 96 | retiredNodes--; // This node will eventually be eliminated, but acts now as a placeholder 97 | } 98 | pq.push(root->back()); 99 | } 100 | 101 | // DICTIONARY GROWING 102 | size_t numWordsPerChapter = 1U<sz >= dictionary.maxWordSize and not node->empty()) { 111 | retiredNodes++; 112 | continue; 113 | } 114 | 115 | if (node->sz == 255) { 116 | retiredNodes++; 117 | continue; 118 | } 119 | 120 | if (node->size() == dictionary.marlinAlphabet.size()) { 121 | retiredNodes++; 122 | continue; 123 | } 124 | 125 | double p = node->p * Pchild[node->size()]; 126 | node->push_back(std::make_shared()); 127 | node->back()->p = p; 128 | node->back()->sz = node->sz+1; 129 | pq.push(node->back()); 130 | node->p -= p; 131 | pq.push(node); 132 | } 133 | 134 | // Renormalize probabilities. 135 | { 136 | std::queue q(std::deque{ root }); 137 | double sum=0, num=0; 138 | while (not q.empty()) { 139 | sum += q.front()->p; num++; 140 | q.front()->p *= factor; 141 | for (auto &&child : *q.front()) 142 | q.push(child); 143 | q.pop(); 144 | } 145 | //std::cerr << sum << " sum - num " << num << std::endl; 146 | } 147 | return root; 148 | } 149 | 150 | 151 | template::Word> 152 | std::vector buildChapterWords(const TMarlinDictionary &, const SNode root) { 153 | 154 | std::vector ret; 155 | 156 | std::stack> q; 157 | Word rootWord; 158 | rootWord.p = root->p; 159 | q.emplace(root, rootWord); 160 | 161 | while (not q.empty()) { 162 | SNode n = q.top().first; 163 | Word w = q.top().second; 164 | q.pop(); 165 | if (not w.empty() and w.p>=0) 166 | ret.push_back(w); 167 | for (size_t i = 0; isize(); i++) { 168 | 169 | Word w2 = w; 170 | // w2.push_back(marlinAlphabet[i].sourceSymbol); 171 | w2.push_back(i); 172 | w2.p = n->at(i)->p; 173 | w2.state = n->at(i)->size(); 174 | 175 | assert(n->at(i)->sz == w2.size()); 176 | q.emplace(n->at(i), w2); 177 | } 178 | } 179 | 180 | //std::cout << ret.size() << std::endl; 181 | return ret; 182 | } 183 | 184 | 185 | template::Word> 186 | std::vector arrangeAndFuse(const TMarlinDictionary &dictionary, const std::vector chapters) { 187 | 188 | std::vector ret; 189 | for (auto &&chapter : chapters) { 190 | 191 | std::vector sortedDictionary = buildChapterWords(dictionary,chapter); 192 | 193 | auto cmp = [](const Word &lhs, const Word &rhs) { 194 | if (lhs.state != rhs.state) return lhs.state 1e-10) return lhs.p > rhs.p; 196 | return lhs w(numWordsPerChapter,Word()); 205 | for (size_t i=0,j=0,k=0; i=w.size()) 208 | j=++k; 209 | 210 | w[j] = sortedDictionary[i++]; 211 | } 212 | ret.insert(ret.end(), w.begin(), w.end()); 213 | } 214 | return ret; 215 | } 216 | 217 | 218 | // Debug functions 219 | template::Word> 220 | void print(const TMarlinDictionary &dictionary, std::vector debugWords) { 221 | 222 | if (dictionary.conf.at("debug")<3) return; 223 | if (dictionary.conf.at("debug")<4 and debugWords.size()/(1U< 40) return; 224 | 225 | for (size_t i=0; i 259 | void print(const TMarlinDictionary &dictionary, std::vector> Pstates) { 260 | 261 | if (dictionary.conf.at("debug")<3) return; 262 | for (size_t i=0; i 275 | std::vector TMarlinDictionary::sanitizeAlphabet(const std::vector &sourceAlphabet) { 276 | 277 | std::vector res = sourceAlphabet; 278 | 279 | if (res.size()<1) res.push_back(0); 280 | double sum = 0; 281 | for (auto &v : res) { 282 | if (v<0) v=0; 283 | if (std::isnan(v)) v=0; 284 | sum += v; 285 | } 286 | if (sum<1e-10) res[0] = 1; 287 | 288 | return res; 289 | } 290 | 291 | 292 | template 293 | auto TMarlinDictionary::buildMarlinAlphabet() const -> MarlinAlphabet { 294 | 295 | // Group symbols by their high bits 296 | std::map symbolsShifted; 297 | for (size_t i=0; i>shift] += sourceAlphabet[i]; 299 | 300 | MarlinAlphabet ret; 301 | for (auto &&symbol : symbolsShifted) 302 | ret.push_back(MarlinSymbol{TSource(symbol.first<rhs.p; // Descending in probability 307 | return lhs.sourceSymbolconf.at("minMarlinSymbols") and 314 | (ret.size()>conf.at("maxMarlinSymbols") or 315 | (ret.probabilityOfUnrepresentedSymbol+ret.back().p) 328 | double TMarlinDictionary::calcSourceEntropy(const std::vector &sourceAlphabet) { 329 | 330 | double sourceEntropy = 0; 331 | for (size_t i=0; i0.) 333 | sourceEntropy += -sourceAlphabet[i]*std::log2(sourceAlphabet[i]); 334 | 335 | return sourceEntropy; 336 | } 337 | 338 | 339 | template 340 | double TMarlinDictionary::calcEfficiency() const { 341 | 342 | double meanLength = 0; 343 | for (auto &&w : words) 344 | meanLength += w.p * w.size(); 345 | 346 | int badWords = 0; 347 | for (auto &&w : words) 348 | if (w.p < 1e-10) badWords++; 349 | 350 | size_t numWordsPerChapter = 1U<1) printf("E:%3.8lf IE:%3.8lf K:%2ld S:%2ld m:%4.2lf bw:%d\n", 362 | sourceEntropy/meanBitsPerSymbol, 363 | sourceEntropy/idealMeanBitsPerSymbol, 364 | marlinAlphabet.size(),shift,meanLength,badWords); 365 | 366 | return sourceEntropy / meanBitsPerSymbol; 367 | } 368 | 369 | template 370 | bool TMarlinDictionary::calcSkip() const { 371 | 372 | bool valid = true; 373 | for (auto w : words) 374 | if (w.size()>maxWordSize) 375 | valid=false; 376 | return valid; 377 | } 378 | 379 | 380 | template 381 | auto TMarlinDictionary::buildDictionary() const -> std::vector { 382 | 383 | std::vector> Pstates; 384 | for (size_t k=0; k<(1U< PstatesSingle(marlinAlphabet.size()+1, 0.); 386 | PstatesSingle[0] = 1./(1U< dictionaries; 391 | for (size_t k=0; k<(1U< ret = arrangeAndFuse(*this,dictionaries); 395 | 396 | print(*this,ret); 397 | 398 | size_t iterations = conf.at("iterations"); 399 | 400 | while (iterations--) { 401 | 402 | // UPDATING STATE PROBABILITIES 403 | { 404 | for (auto &&pk : Pstates) 405 | for (auto &&p : pk) 406 | p = 0.; 407 | 408 | for (size_t i=0; i2) printf("Efficiency: %3.4lf\n", calcEfficiency(ret)); 423 | } 424 | if (conf.at("debug")>1) for (auto &&c : conf) std::cout << c.first << ": " << c.second << std::endl; 425 | //if (conf.at("debug")>0) printf("Efficiency: %3.4lf\n", calcEfficiency(ret)); 426 | 427 | return ret; 428 | } 429 | 430 | //////////////////////////////////////////////////////////////////////// 431 | // 432 | // Explicit Instantiations 433 | #include "instantiations.h" 434 | INSTANTIATE(TMarlinDictionary) 435 | -------------------------------------------------------------------------------- /src/distribution.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | class Distribution { 9 | 10 | 11 | static inline std::vector norm1(std::vector pdf) { 12 | 13 | double sum = std::accumulate(pdf.rbegin(), pdf.rend(), 0.); 14 | for (auto &v : pdf) v/=sum; 15 | return pdf; 16 | } 17 | 18 | static inline std::vector PDFGaussian(size_t N, double b) { 19 | 20 | std::vector pdf(N, 1e-100); 21 | for (int i=-10*int(N)+1; i PDFLaplace(size_t N, double b) { 27 | 28 | std::vector pdf(N, 1e-100); 29 | pdf[0] += 1.; 30 | for (size_t i=1; i<10*N; i++) { 31 | pdf[ i % N] += std::exp(-double( i)/b ); 32 | pdf[(10*N-i) % N] += std::exp(-double( i)/b ); 33 | } 34 | return norm1(pdf); 35 | } 36 | 37 | static inline std::vector PDFExponential(size_t N, double b) { 38 | 39 | std::vector pdf(N, 1e-100); 40 | pdf[0] += 1.; 41 | for (size_t i=1; i<10*N; i++) 42 | pdf[ i % N] += std::exp(-double( i)/b ); 43 | 44 | return norm1(pdf); 45 | } 46 | 47 | public: 48 | 49 | enum Type { Gaussian, Laplace, Exponential }; 50 | 51 | 52 | static inline double entropy(const std::vector &pdf) { 53 | 54 | double distEntropy=0; 55 | for (size_t i=0;i0.) 57 | distEntropy += -pdf[i]*std::log2(pdf[i]); //Should'n I use log2? 58 | 59 | return distEntropy; 60 | } 61 | 62 | template 63 | static inline double entropy(const std::array &pdf) { 64 | return entropy(std::vector(pdf.begin(), pdf.end())); 65 | } 66 | 67 | static inline std::vector pdf(size_t N, Type type, double h) { 68 | 69 | auto *dist = &PDFGaussian; 70 | if (type == Gaussian ) dist = &PDFGaussian; 71 | else if (type == Laplace ) dist = &PDFLaplace; 72 | else if (type == Exponential) dist = &PDFExponential; 73 | else throw std::runtime_error("Unsupported distribution"); 74 | 75 | double b=1<<16; 76 | // Estimate parameter b from p using dicotomic search 77 | double stepSize = 1<<15; 78 | while (stepSize>1E-12) { 79 | if (h > entropy(dist(N,b))/std::log2(N) ) b+=stepSize; 80 | else b-=stepSize; 81 | stepSize/=2.; 82 | } 83 | 84 | //std::cerr << "b: " << b << std::endl; 85 | 86 | return dist(N,b); 87 | } 88 | 89 | static inline std::array pdf(Type type, double h) { 90 | 91 | auto P = pdf(256, type, h); 92 | std::array A; 93 | for (size_t i=0; i<256; i++) A[i]=P[i]; 94 | return A; 95 | } 96 | 97 | static inline std::vector getResiduals(const std::vector &pdf, size_t S) { 98 | 99 | int8_t cdf[0x10000]; 100 | uint j=0; 101 | double lim=0; 102 | for (uint i=0; i ret; 110 | uint32_t rnd = 135154; 111 | for (size_t i=0; i> 16); 113 | ret.push_back(cdf[rnd&0xFFFF]); 114 | } 115 | 116 | return ret; 117 | } 118 | 119 | static inline std::vector getResiduals(const std::array &pdf, size_t S) { 120 | return getResiduals(std::vector(pdf.begin(), pdf.end()), S); 121 | } 122 | 123 | }; 124 | -------------------------------------------------------------------------------- /src/entropyCoder.cc: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | Marlin: A Fast Entropy Codec 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | ***********************************************************************/ 28 | 29 | #include 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "profiler.hpp" 37 | 38 | #define LIKELY(condition) (__builtin_expect(static_cast(condition), 1)) 39 | #define UNLIKELY(condition) (__builtin_expect(static_cast(condition), 0)) 40 | 41 | using namespace marlin; 42 | 43 | namespace { 44 | 45 | 46 | template 47 | __attribute__ ((target ("bmi2"))) 48 | ssize_t shift8(const TMarlinCompress &compressor, View src, View dst) { 49 | 50 | uint64_t mask=0; 51 | for (size_t i=0; i<8; i++) 52 | mask |= ((1ULL<(src.start); 55 | const uint64_t *i64end = reinterpret_cast(src.end); 56 | 57 | uint8_t *o8 = dst.start; 58 | 59 | while (i64 != i64end) { 60 | *reinterpret_cast(o8) = _pext_u64(*i64++, mask); 61 | o8 += compressor.shift; 62 | } 63 | return o8 - dst.start; 64 | } 65 | 66 | 67 | 68 | class JumpTable { 69 | 70 | const size_t alphaStride; // Bit stride of the jump table corresponding to the word dimension 71 | const size_t wordStride; // Bit stride of the jump table corresponding to the word dimension 72 | public: 73 | 74 | JumpTable(size_t keySize, size_t overlap, size_t nAlpha) : 75 | alphaStride(std::ceil(std::log2(nAlpha))), 76 | wordStride(keySize+overlap) {} 77 | 78 | template 79 | void initTable(std::vector &table) { 80 | table = std::vector(((1< 84 | inline T &operator()(T *table, const T0 &word, const T1 &nextLetter) const { 85 | auto v = (word&((1< 93 | ssize_t compressMarlin8 ( 94 | const TMarlinCompress &compressor, 95 | View src, 96 | View dst, 97 | std::vector &unrepresentedSymbols) 98 | { 99 | JumpTable jump(compressor.K, compressor.O, compressor.unrepresentedSymbolToken+1); 100 | 101 | uint8_t *out = dst.start; 102 | const TSource *in = src.start; 103 | 104 | typename TMarlinCompress::CompressorTableIdx j = 0; 105 | 106 | 107 | //We look for the word that sets up the machine state. 108 | { 109 | TSource ss = *in++; 110 | 111 | MarlinIdx ms = compressor.source2marlin[ss>>compressor.shift]; 112 | if (ms==compressor.unrepresentedSymbolToken) { 113 | unrepresentedSymbols.push_back(in-src.start-1); 114 | ms = 0; // 0 must be always the most probable symbol; 115 | //printf("%04x %02x\n", in-src.start-1, ss); 116 | } 117 | 118 | j = compressor.compressorTableInitPointer[ms]; 119 | } 120 | 121 | MarlinIdx ms; 122 | while (in>compressor.shift]; 125 | if (ms==compressor.unrepresentedSymbolToken) { 126 | unrepresentedSymbols.push_back(in-src.start-1); 127 | ms = 0; // 0 must be always the most probable symbol; 128 | //printf("%04x %02x\n", in-src.start-1, ss); 129 | } 130 | 131 | *out = j & 0xFF; 132 | j = jump(compressor.compressorTablePointer, j, ms); 133 | 134 | if (j & compressor.FLAG_NEXT_WORD) { 135 | out++; 136 | } 137 | 138 | if (dst.end-out<16) { 139 | return -1; // TODO: find the exact value 140 | } 141 | } 142 | //if (not (j & FLAG_NEXT_WORD)) 143 | *out++ = j & 0xFF; 144 | 145 | return out - dst.start; 146 | } 147 | 148 | template 149 | ssize_t compressMarlinFast( 150 | const TMarlinCompress &compressor, 151 | View src, 152 | View dst, 153 | std::vector &unrepresentedSymbols) 154 | { 155 | 156 | JumpTable jump(compressor.K, compressor.O, compressor.unrepresentedSymbolToken+1); 157 | 158 | uint8_t *out = dst.start; 159 | const TSource *in = src.start; 160 | 161 | typename TMarlinCompress::CompressorTableIdx j = 0; 162 | 163 | 164 | //We look for the word that sets up the machine state. 165 | { 166 | TSource ss = *in++; 167 | 168 | MarlinIdx ms = compressor.source2marlin[ss>>compressor.shift]; 169 | if (ms==compressor.unrepresentedSymbolToken) { 170 | unrepresentedSymbols.push_back(in-src.start-1); 171 | ms = 0; // 0 must be always the most probable symbol; 172 | //printf("%04x %02x\n", in-src.start-1, ss); 173 | } 174 | 175 | j = compressor.compressorTableInitPointer[ms]; 176 | } 177 | 178 | uint32_t value = 0; 179 | int32_t valueBits = 0; 180 | while (in>compressor.shift]; 187 | if (ms==compressor.unrepresentedSymbolToken) { 188 | unrepresentedSymbols.push_back(in-src.start-1); 189 | ms = 0; // 0 must be always the most probable symbol; 190 | //printf("%04x %02x\n", in-src.start-1, ss); 191 | } 192 | 193 | auto jOld = j; 194 | j = jump(compressor.compressorTablePointer, j, ms); 195 | 196 | if (j & compressor.FLAG_NEXT_WORD) { 197 | 198 | value |= ((jOld | compressor.FLAG_NEXT_WORD) ^ compressor.FLAG_NEXT_WORD) << (32 - compressor.K - valueBits); 199 | valueBits += compressor.K; 200 | } 201 | 202 | while (valueBits>8) { 203 | *out++ = value >> 24; 204 | value = value << 8; 205 | valueBits -= 8; 206 | } 207 | } 208 | 209 | value |= ((j | compressor.FLAG_NEXT_WORD) ^ compressor.FLAG_NEXT_WORD) << (32 - compressor.K - valueBits); 210 | valueBits += compressor.K; 211 | 212 | while (valueBits>0) { 213 | *out++ = value >> 24; 214 | value = value << 8; 215 | valueBits -= 8; 216 | } 217 | 218 | return out - dst.start; 219 | } 220 | 221 | } 222 | 223 | template 224 | auto TMarlinCompress::buildCompressorTableInit(const TMarlinDictionary &dictionary) const -> std::unique_ptr> { 225 | 226 | auto ret = std::make_unique>(); 227 | 228 | for (size_t ms=0; mspush_back(i); 233 | break; 234 | } 235 | } 236 | } 237 | return ret; 238 | } 239 | 240 | template 241 | auto TMarlinCompress::buildCompressorTable(const TMarlinDictionary &dictionary) const -> std::unique_ptr> { 242 | 243 | auto ret = std::make_unique>(); 244 | JumpTable jump(K, O, unrepresentedSymbolToken+1); 245 | jump.initTable(*ret); 246 | 247 | const size_t NumChapters = 1<> positions(NumChapters); 250 | 251 | // Init the mapping (to know where each word goes) 252 | for (size_t k=0; k 1) { 262 | TSource lastSymbol = word.back(); 263 | word.pop_back(); 264 | if (not positions[k].count(word)) throw(std::runtime_error("This word has no parent. SHOULD NEVER HAPPEN!!!")); 265 | size_t parentIdx = positions[k][word]; 266 | jump(&ret->front(), parentIdx, lastSymbol) = wordIdx; 267 | wordIdx = parentIdx; 268 | } 269 | } 270 | } 271 | 272 | //Link between inner dictionaries 273 | for (size_t k=0; kfront(),i,j) == CompressorTableIdx(-1)) // words that are not parent of anyone else. 277 | jump(&ret->front(),i,j) = positions[i%NumChapters][Word(1,j)] + FLAG_NEXT_WORD; 278 | 279 | return ret; 280 | } 281 | 282 | template 283 | ssize_t TMarlinCompress::compress(View src, View dst) const { 284 | // Assertions 285 | if (dst.nBytes() < src.nBytes()) return -1; //TODO: Real error codes 286 | 287 | // Special case: empty! Nothing to compress. 288 | if (src.nElements()==0) return 0; 289 | 290 | // Special case: the entire block is made of one symbol! 291 | { 292 | size_t count = 0; 293 | for (size_t i=0; i(dst.start)[0] = src.start[0]; 298 | return sizeof(TSource); 299 | } 300 | } 301 | 302 | // Special case: if srcSize is not multiple of 8, we force it to be. 303 | size_t padding = 0; 304 | while (src.nBytes() % 8 != 0) { 305 | *reinterpret_cast(dst.start)++ = *src.start++; 306 | padding += sizeof(TSource); 307 | } 308 | 309 | const size_t srcElementCount = src.nElements(); 310 | 311 | size_t residualSize = srcElementCount*shift/8; 312 | 313 | 314 | std::vector unrepresentedSymbols; 315 | // This part, we encode the number of unrepresented symbols in a byte. 316 | // We are optimistic and we hope that no unrepresented symbols are required. 317 | *dst.start = 0; 318 | 319 | // Valid portion available to encode the marlin message. 320 | View marlinDst = marlin::make_view(dst.start+1,dst.end-residualSize); 321 | ssize_t marlinSize; 322 | if (false) { 323 | //marlinSize = compressMarlinReference(src, marlinDst, unrepresentedSymbols); 324 | } else if (K==8) { 325 | marlinSize = compressMarlin8(*this, src, marlinDst, unrepresentedSymbols); 326 | } else { 327 | marlinSize = compressMarlinFast(*this, src, marlinDst, unrepresentedSymbols); 328 | } 329 | 330 | size_t unrepresentedSize = unrepresentedSymbols.size() * ( sizeof(TSource) + ( 331 | srcElementCount < 0x100 ? sizeof(uint8_t) : 332 | srcElementCount < 0x10000 ? sizeof(uint16_t) : 333 | srcElementCount < 0x100000000ULL ? sizeof(uint32_t) :sizeof(uint64_t) 334 | )); 335 | 336 | 337 | //if (unrepresentedSize) printf("%d \n", unrepresentedSize); 338 | // If not worth encoding, we store raw. 339 | if (marlinSize < 0 // If the encoded size is negative means that Marlin could not provide any meaningful compression, and the whole stream will be copied. 340 | or unrepresentedSymbols.size() > 255 341 | or 1 + marlinSize + unrepresentedSize + residualSize > src.nBytes()) { 342 | 343 | memcpy(dst.start,src.start,src.nBytes()); 344 | return padding + src.nBytes(); 345 | } 346 | 347 | 348 | *dst.start++ = unrepresentedSymbols.size(); 349 | dst.start += marlinSize; 350 | 351 | 352 | // Encode unrepresented symbols 353 | for (auto &s : unrepresentedSymbols) { 354 | if (src.nElements() < 0x100) { 355 | *reinterpret_cast(dst.start)++ = s; 356 | } else if (src.nElements() < 0x10000) { 357 | *reinterpret_cast(dst.start)++ = s; 358 | } else if (src.nElements() < 0x100000000ULL) { 359 | *reinterpret_cast(dst.start)++ = s; 360 | } else { 361 | *reinterpret_cast(dst.start)++ = s; 362 | } 363 | *reinterpret_cast(dst.start)++ = src.start[s]; 364 | } 365 | 366 | // Encode residuals 367 | shift8(*this, src, dst); 368 | 369 | return padding + 1 + marlinSize + unrepresentedSize + residualSize; 370 | } 371 | 372 | template 373 | std::array TMarlinCompress::buildSource2marlin( 374 | const TMarlinDictionary &dictionary) const { 375 | 376 | std::array source2marlin_; 377 | source2marlin_.fill(unrepresentedSymbolToken); 378 | for (size_t i=0; i>shift] = i; 380 | return source2marlin_; 381 | } 382 | 383 | 384 | //////////////////////////////////////////////////////////////////////// 385 | // 386 | // Explicit Instantiations 387 | #include "instantiations.h" 388 | INSTANTIATE(TMarlinCompress) 389 | 390 | //INSTANTIATE_MEMBER(buildCompressorTableInit() const -> std::unique_ptr>) 391 | //INSTANTIATE_MEMBER(buildCompressorTable() const -> std::unique_ptr>) 392 | //INSTANTIATE_MEMBER(compress(View src, View dst) const -> ssize_t) 393 | 394 | -------------------------------------------------------------------------------- /src/entropyDecoder.cc: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | Marlin: A Fast Entropy Codec 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | ***********************************************************************/ 28 | 29 | #include 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | 37 | using namespace marlin; 38 | 39 | namespace { 40 | 41 | template 42 | __attribute__ ((target ("bmi2"))) 43 | ssize_t shift8(const TMarlinDecompress &decompressor, View src, View dst) { 44 | 45 | // Decode residuals 46 | uint64_t mask=0; 47 | for (size_t i=0; i<8; i++) 48 | mask |= ((1ULL<(src.start); 51 | uint64_t *o64 = reinterpret_cast(dst.start); 52 | uint64_t *o64end = reinterpret_cast(dst.end); 53 | 54 | while (o64 != o64end) { 55 | *o64++ |= _pdep_u64(*reinterpret_cast(i8), mask); 56 | i8 += decompressor.shift; 57 | } 58 | 59 | return reinterpret_cast(o64) - dst.start; 60 | } 61 | 62 | template 63 | size_t decompress8_skip( 64 | const TMarlinDecompress &decompressor, 65 | View src, 66 | View dst) { 67 | 68 | const uint8_t *i8 = src.start; 69 | TSource *o8 = dst.start; 70 | 71 | const uint32_t overlappingMask = (1<<(8+decompressor.O))-1; 72 | uint64_t value = 0; 73 | 74 | auto D = decompressor.decompressorTablePointer; 75 | 76 | while (i8>24) & overlappingMask]; 83 | *((T *)o8) = v; 84 | o8 += v >> ((sizeof(T)-1)*8); 85 | 86 | } 87 | 88 | { 89 | T v = ((const T *)D)[(value>>16) & overlappingMask]; 90 | *((T *)o8) = v; 91 | o8 += v >> ((sizeof(T)-1)*8); 92 | } 93 | 94 | { 95 | T v = ((const T *)D)[(value>>8) & overlappingMask]; 96 | *((T *)o8) = v; 97 | o8 += v >> ((sizeof(T)-1)*8); 98 | } 99 | 100 | { 101 | T v = ((const T *)D)[value & overlappingMask]; 102 | *((T *)o8) = v; 103 | o8 += v >> ((sizeof(T)-1)*8); 104 | } 105 | } 106 | 107 | while (i8> ((sizeof(T)-1)*8); 112 | memcpy(o8, &v, sz); 113 | // *((T *)o8) = v; 114 | 115 | o8 += sz; 116 | } 117 | } 118 | // if (endMarlin-i8 != 0) std::cerr << " {" << endMarlin-i8 << "} "; // SOLVED! PROBLEM IN THE CODE 119 | // if (o8end-o8 != 0) std::cerr << " [" << o8end-o8 << "] "; // SOLVED! PROBLEM IN THE CODE 120 | 121 | return dst.nElements(); 122 | } 123 | 124 | template 125 | size_t decompressKK( 126 | const TMarlinDecompress &decompressor, 127 | View src, 128 | View dst) { 129 | 130 | const uint8_t *i8 = src.start; 131 | TSource *o8 = dst.start; 132 | 133 | const uint64_t overlappingMask = (1<<(decompressor.K+decompressor.O))-1; 134 | const T clearSizeMask = T(-1)>>8; 135 | const T clearSizeOverlay = T(decompressor.marlinMostCommonSymbol) << ((sizeof(T)-1)*8); 136 | uint64_t value = 0; 137 | 138 | auto D = decompressor.decompressorTablePointer; 139 | 140 | constexpr size_t INCREMENT = KK<8?KK:KK/2; 141 | constexpr size_t INCREMENTSHIFT = INCREMENT*8; 142 | 143 | while (i8>((INCREMENT<=4?32:64)-INCREMENTSHIFT)); 151 | 152 | if (KK<8) { 153 | { 154 | T v = ((const T *)D)[(value>>(7*(KK%8))) & overlappingMask]; 155 | *((T *)o8) = (v & clearSizeMask) + clearSizeOverlay; 156 | o8 += v >> ((sizeof(T)-1)*8); 157 | 158 | } 159 | 160 | { 161 | T v = ((const T *)D)[(value>>(6*(KK%8))) & overlappingMask]; 162 | *((T *)o8) = (v & clearSizeMask) + clearSizeOverlay; 163 | o8 += v >> ((sizeof(T)-1)*8); 164 | } 165 | 166 | { 167 | T v = ((const T *)D)[(value>>(5*(KK%8))) & overlappingMask]; 168 | *((T *)o8) = (v & clearSizeMask) + clearSizeOverlay; 169 | o8 += v >> ((sizeof(T)-1)*8); 170 | } 171 | 172 | { 173 | T v = ((const T *)D)[(value>>(4*(KK%8))) & overlappingMask]; 174 | *((T *)o8) = (v & clearSizeMask) + clearSizeOverlay; 175 | o8 += v >> ((sizeof(T)-1)*8); 176 | } 177 | } 178 | 179 | { 180 | { 181 | T v = ((const T *)D)[(value>>(3*KK)) & overlappingMask]; 182 | *((T *)o8) = (v & clearSizeMask) + clearSizeOverlay; 183 | o8 += v >> ((sizeof(T)-1)*8); 184 | 185 | } 186 | 187 | { 188 | T v = ((const T *)D)[(value>>(2*KK)) & overlappingMask]; 189 | *((T *)o8) = (v & clearSizeMask) + clearSizeOverlay; 190 | o8 += v >> ((sizeof(T)-1)*8); 191 | } 192 | 193 | { 194 | T v = ((const T *)D)[(value>>(1*KK)) & overlappingMask]; 195 | *((T *)o8) = (v & clearSizeMask) + clearSizeOverlay; 196 | o8 += v >> ((sizeof(T)-1)*8); 197 | } 198 | 199 | { 200 | T v = ((const T *)D)[(value>>(0*KK)) & overlappingMask]; 201 | *((T *)o8) = (v & clearSizeMask) + clearSizeOverlay; 202 | o8 += v >> ((sizeof(T)-1)*8); 203 | } 204 | } 205 | } 206 | 207 | uint64_t valueBits = decompressor.O; 208 | while (i8 < src.end or valueBits>=decompressor.K+decompressor.O) { 209 | 210 | while (valueBits < decompressor.K+decompressor.O) { 211 | value = (value<<8) + uint64_t(*i8++); 212 | valueBits += 8; 213 | } 214 | 215 | size_t wordIdx = (value >> (valueBits-(decompressor.K+decompressor.O))) & overlappingMask; 216 | 217 | valueBits -= decompressor.K; 218 | 219 | { 220 | T v = ((const T *)D)[wordIdx]; 221 | // *((T *)o8) = (v & clearSizeMask) + clearSizeOverlay; 222 | // o8 += v >> ((sizeof(T)-1)*8); 223 | 224 | 225 | size_t sz = v >> ((sizeof(T)-1)*8); 226 | 227 | T vv = (v & clearSizeMask) + clearSizeOverlay; 228 | memcpy(o8, &vv, std::min(sz,sizeof(T)-1)); 229 | // *((T *)o8) = v; 230 | 231 | o8 += sz; 232 | 233 | 234 | } 235 | } 236 | // if (endMarlin-i8 != 0) std::cerr << " {" << endMarlin-i8 << "} "; // SOLVED! PROBLEM IN THE CODE 237 | // if (o8end-o8 != 0) std::cerr << " [" << o8end-o8 << "] "; // SOLVED! PROBLEM IN THE CODE 238 | 239 | return dst.nElements(); 240 | } 241 | 242 | template 243 | __attribute__((optimize("unroll-all-loops"))) 244 | size_t decompressTTKK( 245 | const TMarlinDecompress &decompressor, 246 | View src, 247 | View dst) { 248 | 249 | register const uint8_t *i8 = src.start; 250 | register TSource *o8 = dst.start; 251 | 252 | register const uint64_t overlappingMask = (1<<(decompressor.K+decompressor.O))-1; 253 | register const T clearSizeMask = T(-1)>>8; 254 | register const T clearSizeOverlay = T(decompressor.marlinMostCommonSymbol) << ((sizeof(T)-1)*8); 255 | register uint64_t value = 0; 256 | 257 | register auto D = decompressor.decompressorTablePointer; 258 | 259 | constexpr size_t INCREMENT = KK<8?KK:KK/2; 260 | constexpr size_t INCREMENTSHIFT = INCREMENT*8; 261 | 262 | while (i8>((INCREMENT<=4?32:64)-INCREMENTSHIFT)); 270 | 271 | for (uint64_t j=0; j<4; j++) { 272 | const T *vp = &(((const T *)D)[((value>>uint64_t((3-j)*KK)) & overlappingMask)*TT]); 273 | //for (size_t i=0; i> ((sizeof(T)-1)*8))-8*(TT-1); 276 | *((T *)o8) = (v & clearSizeMask) + clearSizeOverlay; 277 | o8 += sz; 278 | } 279 | } 280 | 281 | uint64_t valueBits = decompressor.O; 282 | while (i8 < src.end or valueBits>=decompressor.K+decompressor.O) { 283 | 284 | while (valueBits < decompressor.K+decompressor.O) { 285 | value = (value<<8) + uint64_t(*i8++); 286 | valueBits += 8; 287 | } 288 | 289 | size_t wordIdx = (value >> (valueBits-(decompressor.K+decompressor.O))) & overlappingMask; 290 | 291 | valueBits -= decompressor.K; 292 | 293 | { 294 | T v = ((const T *)D)[wordIdx]; 295 | // *((T *)o8) = (v & clearSizeMask) + clearSizeOverlay; 296 | // o8 += v >> ((sizeof(T)-1)*8); 297 | 298 | 299 | size_t sz = v >> ((sizeof(T)-1)*8); 300 | 301 | T vv = (v & clearSizeMask) + clearSizeOverlay; 302 | memcpy(o8, &vv, std::min(sz,sizeof(T)-1)); 303 | // *((T *)o8) = v; 304 | 305 | o8 += sz; 306 | 307 | 308 | } 309 | } 310 | // if (endMarlin-i8 != 0) std::cerr << " {" << endMarlin-i8 << "} "; // SOLVED! PROBLEM IN THE CODE 311 | // if (o8end-o8 != 0) std::cerr << " [" << o8end-o8 << "] "; // SOLVED! PROBLEM IN THE CODE 312 | 313 | 314 | return dst.nElements(); 315 | } 316 | 317 | template 318 | size_t decompressFast( 319 | const TMarlinDecompress &decompressor, 320 | View src, View dst) { 321 | 322 | auto K = decompressor.K; 323 | auto O = decompressor.O; 324 | 325 | if (K==8 and decompressor.isSkip) return decompress8_skip(decompressor,src,dst); 326 | if (K==8) return decompressKK(decompressor,src,dst); 327 | 328 | if (K==7) return decompressKK(decompressor,src,dst); 329 | if (K==6) return decompressKK(decompressor,src,dst); 330 | if (K==5) return decompressKK(decompressor,src,dst); 331 | if (K==4) return decompressKK(decompressor,src,dst); 332 | 333 | if (K==10) return decompressKK(decompressor,src,dst); 334 | if (K==12) return decompressKK(decompressor,src,dst); 335 | if (K==14) return decompressKK(decompressor,src,dst); 336 | 337 | const uint8_t *i8 = src.start; 338 | TSource *o8 = dst.start; 339 | 340 | const T clearSizeMask = T(-1)>>8; 341 | const T clearSizeOverlay = T(decompressor.marlinMostCommonSymbol) << ((sizeof(T)-1)*8); 342 | 343 | auto D = decompressor.decompressorTablePointer; 344 | 345 | uint64_t value = 0; 346 | uint64_t valueBits = O; 347 | while (i8 < src.end or valueBits>=K+O) { 348 | while (valueBits < K+O) { 349 | value += uint64_t(*i8++) << (64-8-valueBits); 350 | valueBits += 8; 351 | } 352 | 353 | size_t wordIdx = value >> (64-(K+O)); 354 | 355 | value = value << K; 356 | valueBits -= K; 357 | 358 | { 359 | T v = ((const T *)D)[wordIdx]; 360 | // *((T *)o8) = (v & clearSizeMask) + clearSizeOverlay; 361 | // o8 += v >> ((sizeof(T)-1)*8); 362 | 363 | size_t sz = v >> ((sizeof(T)-1)*8); 364 | 365 | T vv = (v & clearSizeMask) + clearSizeOverlay; 366 | memcpy(o8, &vv, std::min(sz,sizeof(T)-1)); 367 | // *((T *)o8) = v; 368 | 369 | o8 += sz; 370 | } 371 | } 372 | 373 | return dst.nElements(); 374 | } 375 | 376 | template 377 | size_t decompressSlow( 378 | const TMarlinDecompress &decompressor, 379 | View src, View dst) { 380 | 381 | auto K = decompressor.K; 382 | auto O = decompressor.O; 383 | auto maxWordSize = decompressor.maxWordSize; 384 | 385 | const uint8_t *i8 = src.start; 386 | TSource *o8 = dst.start; 387 | 388 | auto D = decompressor.decompressorTablePointer; 389 | 390 | size_t f = 1; 391 | while (size_t(1<=K+O) { 396 | while (valueBits < K+O) { 397 | value += uint64_t(*i8++) << (64-8-valueBits); 398 | valueBits += 8; 399 | } 400 | 401 | size_t wordIdx = value >> (64-(K+O)); 402 | 403 | value = value << K; 404 | valueBits -= K; 405 | 406 | { 407 | memcpy(o8,&D[wordIdx< 416 | size_t decompressSlow(const TMarlinCompress &decompressor, View src, View dst) { 417 | 418 | const uint8_t *i8 = src.start; 419 | TSource *o8 = dst.start; 420 | 421 | uint64_t value = 0; 422 | uint64_t valueBits = O; 423 | while (i8 < src.end or valueBits>=K+O) { 424 | while (valueBits < K+O) { 425 | value += uint64_t(*i8++) << (64-8-valueBits); 426 | valueBits += 8; 427 | } 428 | 429 | size_t wordIdx = value >> (64-(K+O)); 430 | 431 | value = value << K; 432 | valueBits -= K; 433 | 434 | for (auto &&c : words[wordIdx]) 435 | *o8++ = marlinAlphabet[c].sourceSymbol; 436 | } 437 | 438 | return dst.nElements(); 439 | }*/ 440 | 441 | 442 | } 443 | 444 | template 445 | std::unique_ptr> TMarlinDecompress::buildDecompressorTable( 446 | const TMarlinDictionary &dictionary) const { 447 | 448 | auto &&marlinAlphabet = dictionary.marlinAlphabet; 449 | auto &&words = dictionary.words; 450 | 451 | auto ret = std::make_unique>(words.size()*(maxWordSize+1)); 452 | 453 | TSource *d = &ret->front(); 454 | for (size_t i=0; ij ? marlinAlphabet[words[i][j]].sourceSymbol : TSource(0)); 457 | *d++ = words[i].size(); 458 | } 459 | return ret; 460 | } 461 | 462 | template 463 | ssize_t TMarlinDecompress::decompress(View src, View dst) const { 464 | 465 | // Special case: empty block! 466 | if (dst.nBytes() == 0 or src.nBytes() == 0) { 467 | if (src.nBytes() or dst.nBytes()) return -1; // TODO: Error code 468 | return 0; 469 | } 470 | 471 | // Special case: the entire block is smaller than the size of a single symbol! 472 | if (src.nBytes() < sizeof(TSource)) return -1; 473 | 474 | // Special case: the entire block is made of one symbol! 475 | if (src.nBytes() == sizeof(TSource)) { 476 | TSource s = *reinterpret_cast(src.start); 477 | for (size_t i=0; i(src.start)++; 493 | padding += sizeof(TSource); 494 | } 495 | 496 | ssize_t nUnrepresentedSymbols = *src.start++; 497 | 498 | ssize_t unrepresentedSize = nUnrepresentedSymbols * ( sizeof(TSource) + ( 499 | dst.nElements() < 0x100 ? sizeof(uint8_t) : 500 | dst.nElements() < 0x10000 ? sizeof(uint16_t) : 501 | dst.nElements() < 0x100000000ULL ? sizeof(uint32_t) :sizeof(uint64_t) 502 | )); 503 | 504 | ssize_t residualSize = dst.nElements()*shift/8; 505 | 506 | ssize_t marlinSize = src.end-src.start-unrepresentedSize-residualSize; 507 | 508 | 509 | // Initialization, which might be optional 510 | if (not isSkip) { 511 | TSource s = marlinMostCommonSymbol; 512 | for (size_t i=0; i marlinSrc = 517 | marlin::make_view(src.start,src.start+marlinSize); 518 | View unrepresentedSrc = 519 | marlin::make_view(marlinSrc.end,marlinSrc.end+unrepresentedSize); 520 | View shiftSrc = 521 | marlin::make_view(unrepresentedSrc.end,unrepresentedSrc.end+residualSize); 522 | 523 | if (false) { 524 | //decompressSlow(marlinSrc, dst); 525 | } else if (maxWordSize==3) { 526 | decompressFast(*this,marlinSrc, dst); 527 | } else if (maxWordSize==7) { 528 | decompressFast(*this,marlinSrc, dst); 529 | } else { 530 | //printf("Slow because: %lu %lu\n",K, maxWordSize); 531 | decompressSlow(*this, marlinSrc, dst); 532 | } 533 | 534 | //if (nUnrepresentedSymbols) printf("%u %u %u\n", nUnrepresentedSymbols, unrepresentedSize, dst.nElements()); 535 | // Place unrepresented symbols 536 | while (unrepresentedSrc.start < unrepresentedSrc.end) { 537 | 538 | size_t idx; 539 | if (dst.nElements() < 0x100) { 540 | idx = *reinterpret_cast(unrepresentedSrc.start)++; 541 | } else if (dst.nElements() < 0x10000) { 542 | idx = *reinterpret_cast(unrepresentedSrc.start)++; 543 | } else if (dst.nElements() < 0x100000000ULL) { 544 | idx = *reinterpret_cast(unrepresentedSrc.start)++; 545 | } else { 546 | idx = *reinterpret_cast(unrepresentedSrc.start)++; 547 | } 548 | // printf("%d \n", idx, *reinterpret_cast(unrepresentedSrc.start)); 549 | dst.start[idx] = *reinterpret_cast(unrepresentedSrc.start)++; 550 | // printf("%llu %llu\n", unrepresentedSrc.start, unrepresentedSrc.end); 551 | } 552 | 553 | return padding + shift8(*this, shiftSrc, dst); 554 | } 555 | 556 | //////////////////////////////////////////////////////////////////////// 557 | // 558 | // Explicit Instantiations 559 | #include "instantiations.h" 560 | INSTANTIATE(TMarlinDecompress) 561 | 562 | //INSTANTIATE_MEMBER(TMarlinDecompress,buildDecompressorTable(const TMarlinDictionary &dictionary) const -> std::unique_ptr>) 563 | //INSTANTIATE_MEMBER(TMarlinDecompress,decompress(View src, View dst) const -> ssize_t) 564 | 565 | -------------------------------------------------------------------------------- /src/imageBlockEC.cc: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | imageBlockEC: Implementation of the transformed image <-> bitstream functionality 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres, portions by Miguel Hernández-Cabronero 8 | 9 | Marlin: A Fast Entropy Codec 10 | 11 | MIT License 12 | 13 | Copyright (c) 2018 Manuel Martinez Torres 14 | 15 | Permission is hereby granted, free of charge, to any person obtaining a copy 16 | of this software and associated documentation files (the "Software"), to deal 17 | in the Software without restriction, including without limitation the rights 18 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 19 | copies of the Software, and to permit persons to whom the Software is 20 | furnished to do so, subject to the following conditions: 21 | 22 | The above copyright notice and this permission notice shall be included in all 23 | copies or substantial portions of the Software. 24 | 25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 30 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 | SOFTWARE. 32 | 33 | ***********************************************************************/ 34 | 35 | #include 36 | 37 | #include "imageBlockEC.hpp" 38 | #include "profiler.hpp" 39 | #include "distribution.hpp" 40 | 41 | namespace marlin { 42 | 43 | // Common block-based entropy decoding 44 | 45 | size_t ImageMarlinBlockEC::decodeBlocks( 46 | marlin::View uncompressed, 47 | marlin::View &compressed, 48 | size_t blockSize) { 49 | { 50 | 51 | const size_t nBlocks = (uncompressed.nBytes() + blockSize - 1) / blockSize; 52 | 53 | std::vector> blocksDictionary; 54 | std::vector blocksSize; 55 | std::vector blocksPosition; 56 | 57 | { 58 | size_t position = nBlocks * 3; // this is the header's size 59 | for (size_t i = 0; i < nBlocks; i++) { 60 | 61 | blocksDictionary.emplace_back(compressed[3 * i + 0], i); 62 | blocksSize.emplace_back((compressed[3 * i + 2] << 8) + compressed[3 * i + 1]); 63 | blocksPosition.emplace_back(position); 64 | position += blocksSize.back(); 65 | } 66 | } 67 | // To minimize cache mess, we uncompress together the blocks that use the same dictionary. 68 | std::sort(blocksDictionary.begin(), blocksDictionary.end()); 69 | 70 | for (size_t sd = 0; sd < nBlocks; sd++) { 71 | 72 | auto dict_index = blocksDictionary[sd].first; 73 | auto i = blocksDictionary[sd].second; 74 | 75 | auto in = marlin::make_view( 76 | &compressed[blocksPosition[i]], 77 | &compressed[blocksPosition[i] + blocksSize[i]]); 78 | 79 | size_t usz = std::min(blockSize, uncompressed.nBytes() - i * blockSize); 80 | auto out = marlin::make_view( 81 | &uncompressed[i * blockSize], 82 | &uncompressed[i * blockSize + usz]); 83 | 84 | Marlin_get_prebuilt_dictionaries()[dict_index]->decompress(in, out); 85 | } 86 | return uncompressed.nBytes(); 87 | } 88 | } 89 | 90 | 91 | 92 | // Laplacian Block EC (original marlinUtility) 93 | 94 | std::vector LaplacianBlockEC::encodeBlocks( 95 | const std::vector &uncompressed, 96 | size_t blockSize) { 97 | const size_t nBlocks = (uncompressed.size()+blockSize-1)/blockSize; 98 | 99 | Profiler::start("ec_block_entropy"); 100 | std::vector> blocksEntropy; 101 | // Calculate entropy only for 1 out of entropy_frequency block 102 | double calculated_entropy = 0; 103 | for (size_t i=0; i hist; 114 | hist.fill(0.); 115 | for (size_t j = 1; j < sz; j++) hist[uncompressed[i * blockSize + j]]++; 116 | for (auto &h : hist) h /= (sz - 1); 117 | 118 | calculated_entropy = Distribution::entropy(hist) / 8.; 119 | } 120 | // else: entropy is that of % entropy_frequency == 0 121 | blocksEntropy.emplace_back(std::max(0, std::min(255, int(calculated_entropy * 256))), i); 122 | } 123 | // Sort packets depending on increasing entropy 124 | std::sort(blocksEntropy.begin(), blocksEntropy.end()); 125 | Profiler::end("ec_block_entropy"); 126 | 127 | // Collect prebuilt dictionaries 128 | const Marlin **prebuilt_dictionaries = Marlin_get_prebuilt_dictionaries(); 129 | prebuilt_dictionaries+=32; // Harcoded, selects Laplacian Distribution 130 | 131 | // Compress 132 | Profiler::start("ec_dictionary_coding"); 133 | std::vector ec_header(nBlocks*3); 134 | std::vector scratchPad(nBlocks * blockSize); 135 | for (size_t b=0; bcompress(in, out); 145 | 146 | ec_header[3*i+0]=&prebuilt_dictionaries[(entropy*16)/256] - Marlin_get_prebuilt_dictionaries(); 147 | ec_header[3*i+1]=compressedSize & 0xFF; 148 | ec_header[3*i+2]=compressedSize >> 8; 149 | } 150 | Profiler::end("ec_dictionary_coding"); 151 | 152 | 153 | size_t fullCompressedSize = ec_header.size(); 154 | for (size_t i=0; i out(fullCompressedSize); 160 | 161 | memcpy(&out[0], ec_header.data(), ec_header.size()); 162 | { 163 | size_t p = ec_header.size(); 164 | for (size_t i=0; i ImageMarlinBestDictBlockEC::encodeBlocks( 177 | const std::vector &uncompressed, size_t blockSize) { 178 | 179 | const size_t nBlocks = (uncompressed.size()+blockSize-1)/blockSize; 180 | 181 | std::vector> blocksEntropy; 182 | 183 | std::vector ec_header(nBlocks*3); 184 | std::vector bestSizes(nBlocks, blockSize*2); 185 | std::vector> bestBlocks(nBlocks, std::vector(blockSize)); 186 | std::vector scratchPad(blockSize); 187 | 188 | for (auto **dict = Marlin_get_prebuilt_dictionaries(); *dict; dict++) { 189 | 190 | for (size_t i=0; icompress(in, out); 198 | 199 | if (compressedSize> 8; 205 | bestBlocks[i] = scratchPad; 206 | } 207 | } 208 | } 209 | 210 | std::vector compressedData = ec_header; 211 | for (size_t i=0; i bitstream functionality 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres, portions by Miguel Hernández-Cabronero 8 | 9 | Marlin: A Fast Entropy Codec 10 | 11 | MIT License 12 | 13 | Copyright (c) 2018 Manuel Martinez Torres 14 | 15 | Permission is hereby granted, free of charge, to any person obtaining a copy 16 | of this software and associated documentation files (the "Software"), to deal 17 | in the Software without restriction, including without limitation the rights 18 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 19 | copies of the Software, and to permit persons to whom the Software is 20 | furnished to do so, subject to the following conditions: 21 | 22 | The above copyright notice and this permission notice shall be included in all 23 | copies or substantial portions of the Software. 24 | 25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 30 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 | SOFTWARE. 32 | 33 | ***********************************************************************/ 34 | 35 | #ifndef IMAGEBLOCKEC_HPP 36 | #define IMAGEBLOCKEC_HPP 37 | 38 | #include 39 | 40 | namespace marlin { 41 | 42 | /** 43 | * Fast version of LaplacianBlockEC: entropy is calculated for a few pixels only, 44 | * instead of for every block. 45 | */ 46 | class LaplacianBlockEC : public ImageMarlinBlockEC { 47 | 48 | public: 49 | /** 50 | * @param block_entropy_frequency_ block entropy is calculated for 1 out of block_entropy_frequency_ 51 | * blocks 52 | */ 53 | LaplacianBlockEC(ImageMarlinHeader& header_) : header(header_) {} 54 | 55 | std::vector encodeBlocks( 56 | const std::vector &uncompressed, 57 | size_t blockSize); 58 | 59 | protected: 60 | ImageMarlinHeader header; 61 | }; 62 | 63 | /** 64 | * Image block entropy coder that choses the best dictionary for 65 | * compression. Slow. 66 | */ 67 | class ImageMarlinBestDictBlockEC : public ImageMarlinBlockEC { 68 | public: 69 | std::vector encodeBlocks( 70 | const std::vector &uncompressed, 71 | size_t blockSize); 72 | }; 73 | 74 | } 75 | #endif /* IMAGEBLOCKEC_HPP */ 76 | -------------------------------------------------------------------------------- /src/imageCoder.cc: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | imageCompressor: compressor part of the ImageMarlin codec 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres, portions by Miguel Hernández-Cabronero 8 | 9 | Marlin: A Fast Entropy Codec 10 | 11 | MIT License 12 | 13 | Copyright (c) 2018 Manuel Martinez Torres 14 | 15 | Permission is hereby granted, free of charge, to any person obtaining a copy 16 | of this software and associated documentation files (the "Software"), to deal 17 | in the Software without restriction, including without limitation the rights 18 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 19 | copies of the Software, and to permit persons to whom the Software is 20 | furnished to do so, subject to the following conditions: 21 | 22 | The above copyright notice and this permission notice shall be included in all 23 | copies or substantial portions of the Software. 24 | 25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 30 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 | SOFTWARE. 32 | 33 | ***********************************************************************/ 34 | 35 | #include 36 | 37 | #include "profiler.hpp" 38 | #include "distribution.hpp" 39 | 40 | using namespace marlin; 41 | 42 | std::string ImageMarlinCoder::compress(const cv::Mat& orig_img) { 43 | const size_t bs = header.blockWidth; 44 | const size_t brows = (orig_img.rows+bs-1)/bs; 45 | const size_t bcols = (orig_img.cols+bs-1)/bs; 46 | cv::Mat img; 47 | { 48 | if (brows * bs - orig_img.rows != 0 || bcols * bs - orig_img.cols != 0) { 49 | cv::copyMakeBorder(orig_img, img, 0, brows * bs - orig_img.rows, 0, bcols * bs - orig_img.cols, 50 | cv::BORDER_REPLICATE); 51 | } else { 52 | img = orig_img; 53 | } 54 | } 55 | 56 | std::vector side_information(bcols*brows*img.channels()); 57 | std::vector preprocessed(bcols*brows*bs*bs*img.channels()); 58 | 59 | if (header.channels != 1) { 60 | throw std::runtime_error("Images with more than one component are not yet supported"); 61 | } 62 | // TODO: add support for >1 components 63 | cv::Mat1b img1b = img; 64 | 65 | if (! img.isContinuous()) { 66 | throw std::runtime_error("This implementation supports only continuous matrix data"); 67 | } 68 | 69 | Profiler::start("transformation"); 70 | transformer->transform_direct(img1b.data, side_information, preprocessed); 71 | Profiler::end("transformation"); 72 | 73 | // Write configuration header 74 | std::ostringstream oss; 75 | header.dump_to(oss); 76 | 77 | // Write side information (block-representative pixels by default) 78 | oss.write((const char *) side_information.data(), side_information.size()); 79 | 80 | // Entropy code and write result 81 | Profiler::start("entropy_coding"); 82 | auto compressed = blockEC->encodeBlocks(preprocessed, bs* bs); 83 | Profiler::end("entropy_coding"); 84 | oss.write((const char *)compressed.data(), compressed.size()); 85 | 86 | return oss.str(); 87 | } 88 | 89 | void ImageMarlinCoder::compress(const cv::Mat& img, std::ostream& out) { 90 | const std::string compressed = compress(img); 91 | out.write(compressed.data(), compressed.size()); 92 | } 93 | 94 | ImageMarlinCoder::~ImageMarlinCoder() { 95 | delete transformer; 96 | delete blockEC; 97 | } -------------------------------------------------------------------------------- /src/imageDecoder.cc: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | imageDecompressor: decompressor part of the ImageMarlin codec 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres, portions by Miguel Hernández-Cabronero 8 | 9 | Marlin: A Fast Entropy Codec 10 | 11 | MIT License 12 | 13 | Copyright (c) 2018 Manuel Martinez Torres 14 | 15 | Permission is hereby granted, free of charge, to any person obtaining a copy 16 | of this software and associated documentation files (the "Software"), to deal 17 | in the Software without restriction, including without limitation the rights 18 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 19 | copies of the Software, and to permit persons to whom the Software is 20 | furnished to do so, subject to the following conditions: 21 | 22 | The above copyright notice and this permission notice shall be included in all 23 | copies or substantial portions of the Software. 24 | 25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 30 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 | SOFTWARE. 32 | 33 | ***********************************************************************/ 34 | 35 | #include 36 | 37 | #include "profiler.hpp" 38 | 39 | using namespace marlin; 40 | 41 | ImageMarlinDecoder::~ImageMarlinDecoder() { 42 | delete transformer; 43 | delete blockEC; 44 | } 45 | 46 | void ImageMarlinDecoder::decompress( 47 | const std::string &compressedString, 48 | std::vector& reconstructedData, 49 | ImageMarlinHeader& decompressedHeader) { 50 | decompressedHeader = ImageMarlinHeader(compressedString); 51 | 52 | const size_t bs = decompressedHeader.blockWidth; 53 | const size_t brows = (decompressedHeader.rows + bs - 1) / bs; 54 | const size_t bcols = (decompressedHeader.cols + bs - 1) / bs; 55 | const size_t channels = decompressedHeader.channels; 56 | 57 | auto side_information = marlin::make_view( 58 | (const uint8_t *) &compressedString[decompressedHeader.size()], 59 | (const uint8_t *) &compressedString[decompressedHeader.size() + channels * bcols * brows]); 60 | 61 | auto compressed = marlin::make_view( 62 | (const uint8_t *) &compressedString[decompressedHeader.size() + channels * bcols * brows], 63 | (const uint8_t *) &compressedString[compressedString.size()]); 64 | 65 | std::vector entropy_decoded_data(channels * bcols * brows * bs * bs); 66 | Profiler::start("entropy_decode"); 67 | blockEC->decodeBlocks(marlin::make_view(entropy_decoded_data), compressed, bs * bs); 68 | Profiler::end("entropy_decode"); 69 | 70 | Profiler::start("inverse_transform"); 71 | transformer->transform_inverse( 72 | entropy_decoded_data, 73 | side_information, 74 | reconstructedData); 75 | Profiler::end("inverse_transform"); 76 | } -------------------------------------------------------------------------------- /src/imageHeader.cc: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | imageHeader: header with image information and codec configuration 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres, portions by Miguel Hernández-Cabronero 8 | 9 | Marlin: A Fast Entropy Codec 10 | 11 | MIT License 12 | 13 | Copyright (c) 2018 Manuel Martinez Torres 14 | 15 | Permission is hereby granted, free of charge, to any person obtaining a copy 16 | of this software and associated documentation files (the "Software"), to deal 17 | in the Software without restriction, including without limitation the rights 18 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 19 | copies of the Software, and to permit persons to whom the Software is 20 | furnished to do so, subject to the following conditions: 21 | 22 | The above copyright notice and this permission notice shall be included in all 23 | copies or substantial portions of the Software. 24 | 25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 30 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 | SOFTWARE. 32 | 33 | ***********************************************************************/ 34 | 35 | #include 36 | 37 | #include "imageBlockEC.hpp" 38 | #include "imageTransformer.hpp" 39 | 40 | using namespace marlin; 41 | 42 | 43 | ImageMarlinCoder* ImageMarlinHeader::newCoder() { 44 | // Get the right subclass depending on the parameters 45 | ImageMarlinTransformer* transformer = nullptr; 46 | ImageMarlinBlockEC* blockEC = nullptr; 47 | if (transtype == TransformType::North) { 48 | if (qtype == QuantizerType::Uniform) { 49 | transformer = new NorthPredictionUniformQuantizer(*this); 50 | } else if (qtype == QuantizerType::Deadzone) { 51 | transformer = new NorthPredictionDeadzoneQuantizer(*this); 52 | } 53 | blockEC = new LaplacianBlockEC(*this); 54 | } else if (transtype == TransformType::FastLeft) { 55 | if (qtype == QuantizerType::Uniform) { 56 | transformer = new FastLeftUniformQuantizer(*this); 57 | } 58 | blockEC = new LaplacianBlockEC(*this); 59 | } 60 | if (transformer == nullptr || blockEC == nullptr) { 61 | throw std::runtime_error("Invalid transform / quantizer combination"); 62 | } 63 | 64 | return new ImageMarlinCoder(*this, transformer, blockEC); 65 | } 66 | 67 | ImageMarlinDecoder* ImageMarlinHeader::newDecoder() { 68 | // Get the right subclass depending on the parameters 69 | ImageMarlinTransformer* transformer = nullptr; 70 | ImageMarlinBlockEC* blockEC = nullptr; 71 | if (transtype == TransformType::North) { 72 | if (qtype == QuantizerType::Uniform) { 73 | transformer = new NorthPredictionUniformQuantizer(*this); 74 | } else if (qtype == QuantizerType::Deadzone) { 75 | transformer = new NorthPredictionDeadzoneQuantizer(*this); 76 | } 77 | blockEC = new LaplacianBlockEC(*this); 78 | } else if (transtype == TransformType::FastLeft) { 79 | if (qtype == QuantizerType::Uniform) { 80 | transformer = new FastLeftUniformQuantizer(*this); 81 | } 82 | blockEC = new LaplacianBlockEC(*this); 83 | } 84 | if (transformer == nullptr || blockEC == nullptr) { 85 | throw std::runtime_error("Invalid transform / quantizer combination"); 86 | } 87 | 88 | return new ImageMarlinDecoder(*this, transformer, blockEC); 89 | } 90 | 91 | void ImageMarlinHeader::dump_to(std::ostream &out) const { 92 | auto pos_before = out.tellp(); 93 | 94 | write_field<2>(out, rows); 95 | write_field<2>(out, cols); 96 | write_field<2>(out, channels); 97 | write_field<2>(out, blockWidth); 98 | write_field<1>(out, (uint8_t) transtype); 99 | write_field<1>(out, qstep); 100 | if (qstep > 1) { 101 | write_field<1>(out, (uint8_t) qtype); 102 | write_field<1>(out, (uint8_t) rectype); 103 | } 104 | 105 | if ((size_t) (out.tellp() - pos_before) != size()) { 106 | throw std::runtime_error("Invalid size or number of bytes written"); 107 | } 108 | } 109 | 110 | void ImageMarlinHeader::load_from(std::istream &in) { 111 | auto pos_before = in.tellg(); 112 | 113 | rows = read_field<2>(in); 114 | cols = read_field<2>(in); 115 | channels = read_field<2>(in); 116 | blockWidth = read_field<2>(in); 117 | uint32_t read_transtype = read_field<1>(in); 118 | if (read_transtype == (uint32_t) ImageMarlinHeader::TransformType::North) { 119 | transtype = ImageMarlinHeader::TransformType::North; 120 | } else if (read_transtype == (uint32_t) ImageMarlinHeader::TransformType::FastLeft) { 121 | transtype = ImageMarlinHeader::TransformType::FastLeft; 122 | } else { 123 | throw std::runtime_error("Invalid stored transtype"); 124 | } 125 | qstep = read_field<1>(in); 126 | qtype = ImageMarlinHeader::DEFAULT_QTYPE; 127 | rectype = ImageMarlinHeader::DEFAULT_RECONSTRUCTION_TYPE; 128 | if (qstep > 1) { 129 | uint32_t read_qtype = read_field<1>(in); 130 | if (read_qtype == (uint32_t) ImageMarlinHeader::QuantizerType::Uniform) { 131 | qtype = ImageMarlinHeader::QuantizerType::Uniform; 132 | } else if (read_qtype == (uint32_t) ImageMarlinHeader::QuantizerType::Deadzone) { 133 | qtype = ImageMarlinHeader::QuantizerType::Deadzone; 134 | } else { 135 | throw std::runtime_error("Invalid stored qtype"); 136 | } 137 | 138 | uint32_t read_rectype = read_field<1>(in); 139 | if (read_rectype == (uint32_t) ImageMarlinHeader::ReconstructionType::Midpoint) { 140 | rectype = ImageMarlinHeader::ReconstructionType::Midpoint; 141 | } else if (read_rectype == (uint32_t) ImageMarlinHeader::ReconstructionType::Lowpoint) { 142 | rectype = ImageMarlinHeader::ReconstructionType::Lowpoint; 143 | } else { 144 | throw std::runtime_error("Invalid stored rectype"); 145 | } 146 | } 147 | 148 | 149 | if ((size_t) (in.tellg() - pos_before) != size()) { 150 | throw std::runtime_error("Invalid size or number of bytes read"); 151 | } 152 | } 153 | 154 | size_t ImageMarlinHeader::size() const { 155 | size_t size = 2+2+2+2+1+1; 156 | if (qstep > 1) { 157 | size += 1+1; 158 | } 159 | return size; 160 | } 161 | 162 | void ImageMarlinHeader::validate() { 163 | if (rows == 0 || cols == 0 || channels == 0) { 164 | throw std::domain_error("All image dimensions must be positive"); 165 | } 166 | if (blockWidth == 0) { 167 | throw std::domain_error("Block size must be positive"); 168 | } 169 | if (qstep == 0) { 170 | throw std::domain_error("Only positive quantization steps can be used"); 171 | } 172 | if (qstep > 255) { 173 | throw std::domain_error("Quantization steps only up to 255 can be used"); 174 | } 175 | } 176 | 177 | template 178 | void ImageMarlinHeader::write_field(std::ostream& out, uint32_t field) const { 179 | if (num_bytes <= 0) { 180 | throw std::domain_error("num_bytes must be strictly positive"); 181 | } 182 | if (field < 0) { 183 | throw std::domain_error("field must be positive"); 184 | } 185 | if ((field >> 8*num_bytes) > 0) { 186 | std::stringstream msg; 187 | msg << "field value " << field << " cannot be written in " << num_bytes << "bytes"; 188 | throw std::domain_error(msg.str()); 189 | } 190 | 191 | for (size_t i=0; i>= 8; 195 | } 196 | } 197 | 198 | template 199 | uint32_t ImageMarlinHeader::read_field(std::istream& in) { 200 | if (num_bytes <= 0) { 201 | throw std::domain_error("num_bytes must be strictly positive"); 202 | } 203 | uint64_t field_value = 0; 204 | for (size_t i=0; i UINT32_MAX) { 209 | throw std::domain_error("the field value is too large"); 210 | } 211 | } 212 | 213 | return (uint32_t) field_value; 214 | } 215 | 216 | 217 | void ImageMarlinHeader::show(std::ostream& out) { 218 | out << "ImageMarlinHeader { " << std::endl; 219 | out << " rows = " << rows << std::endl; 220 | out << " cols = " << cols << std::endl; 221 | out << " channels = " << channels << std::endl; 222 | out << " blocksize = " << blockWidth << std::endl; 223 | out << " transtype = " << (uint32_t) transtype << std::endl; 224 | out << " qstep = " << qstep << std::endl; 225 | out << " qtype = " << (uint32_t) qtype << std::endl; 226 | out << " rectype = " << (uint32_t) rectype << std::endl; 227 | out << " blockEntropyFrequency = " << (uint32_t) blockEntropyFrequency << std::endl; 228 | out << "}" << std::endl; 229 | } -------------------------------------------------------------------------------- /src/imageTransformer.cc: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | imageTarnsformer: Implementation of direct and inverse image transformations 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres, portions by Miguel Hernández-Cabronero 8 | 9 | Marlin: A Fast Entropy Codec 10 | 11 | MIT License 12 | 13 | Copyright (c) 2018 Manuel Martinez Torres 14 | 15 | Permission is hereby granted, free of charge, to any person obtaining a copy 16 | of this software and associated documentation files (the "Software"), to deal 17 | in the Software without restriction, including without limitation the rights 18 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 19 | copies of the Software, and to permit persons to whom the Software is 20 | furnished to do so, subject to the following conditions: 21 | 22 | The above copyright notice and this permission notice shall be included in all 23 | copies or substantial portions of the Software. 24 | 25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 30 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 | SOFTWARE. 32 | 33 | ***********************************************************************/ 34 | 35 | #include "imageTransformer.hpp" 36 | #include "profiler.hpp" 37 | 38 | namespace { 39 | /** 40 | * @return -1, 0, 1 if val is <0, 0 or >0, respectively. 41 | */ 42 | template int sgn(T val) { 43 | return (T(0) < val) - (val < T(0)); 44 | } 45 | } 46 | 47 | namespace marlin { 48 | 49 | void NorthPredictionUniformQuantizer::transform_direct( 50 | uint8_t *original_data, std::vector &side_information, std::vector &preprocessed) { 51 | 52 | if (header.channels != 1) { 53 | throw std::runtime_error("only one channel supported at the time"); 54 | } 55 | 56 | if (header.transtype != ImageMarlinHeader::TransformType::North) { 57 | throw std::runtime_error("This class supports only North transform type"); 58 | } 59 | if (header.qtype != ImageMarlinHeader::QuantizerType::Uniform) { 60 | throw std::runtime_error("This class supports only Uniform quantization"); 61 | } 62 | 63 | switch (header.qstep) { 64 | case 0: 65 | throw std::runtime_error("Invalid qstep=0"); 66 | case 1: 67 | predict_and_quantize_direct<1>( 68 | original_data, side_information, preprocessed); 69 | break; 70 | case 2: 71 | predict_and_quantize_direct<2>( 72 | original_data, side_information, preprocessed); 73 | break; 74 | case 3: 75 | predict_and_quantize_direct<3>( 76 | original_data, side_information, preprocessed); 77 | break; 78 | case 4: 79 | predict_and_quantize_direct<4>( 80 | original_data, side_information, preprocessed); 81 | break; 82 | case 5: 83 | predict_and_quantize_direct<5>( 84 | original_data, side_information, preprocessed); 85 | break; 86 | case 6: 87 | predict_and_quantize_direct<6>( 88 | original_data, side_information, preprocessed); 89 | break; 90 | case 7: 91 | predict_and_quantize_direct<7>( 92 | original_data, side_information, preprocessed); 93 | break; 94 | case 8: 95 | predict_and_quantize_direct<8>( 96 | original_data, side_information, preprocessed); 97 | break; 98 | default: 99 | throw std::runtime_error("This implementation does not support this qstep value"); 100 | } 101 | } 102 | 103 | template 104 | void NorthPredictionUniformQuantizer::predict_and_quantize_direct( 105 | uint8_t *original_data, 106 | std::vector &side_information, 107 | std::vector &preprocessed) { 108 | 109 | // const size_t brows = (img.rows+blocksize-1)/blocksize; 110 | const size_t bcols = (header.cols+header.blockWidth-1)/header.blockWidth; 111 | const size_t imgRows = header.rows; 112 | const size_t imgCols = header.rows; 113 | const size_t blocksize = header.blockWidth; 114 | 115 | Profiler::start("quantization"); 116 | if (qs > 1) { 117 | const size_t pixelCount = header.rows * header.cols * header.channels; 118 | for (size_t i = 0; i < pixelCount; i++) { 119 | if (qs == 2) { 120 | original_data[i] >>= 1; 121 | } else if (qs == 4) { 122 | original_data[i] >>= 2; 123 | } else if (qs == 8) { 124 | original_data[i] >>= 3; 125 | } else if (qs == 16) { 126 | original_data[i] >>= 4; 127 | } else if (qs == 32) { 128 | original_data[i] >>= 5; 129 | } else { 130 | original_data[i] /= qs; 131 | } 132 | } 133 | } 134 | Profiler::end("quantization"); 135 | 136 | // PREPROCESS IMAGE INTO BLOCKS 137 | uint8_t *t = &preprocessed[0]; 138 | 139 | // Pointers to the original data 140 | Profiler::start("prediction"); 141 | const uint8_t* or0; 142 | const uint8_t* or1; 143 | for (size_t i=0; i &entropy_decoded_data, 179 | View &side_information, 180 | std::vector &reconstructedData) { 181 | reconstructedData.resize(header.rows * header.cols * header.channels); 182 | 183 | if (header.channels != 1) { 184 | throw std::runtime_error("only one channel supported at the time"); 185 | } 186 | 187 | const size_t imgRows = header.rows; 188 | const size_t imgCols = header.cols; 189 | const size_t bs = header.blockWidth; 190 | const size_t bcols = (header.cols + bs - 1) / bs; 191 | 192 | Profiler::start("prediction"); 193 | const uint8_t *t = &entropy_decoded_data[0]; 194 | uint8_t *r0; 195 | uint8_t *r1; 196 | for (size_t i = 0; i < imgRows - bs + 1; i += bs) { 197 | for (size_t j = 0; j < imgCols - bs + 1; j += bs) { 198 | r0 = &(reconstructedData[i * imgCols + j]); 199 | r1 = &(reconstructedData[i * imgCols + j]); 200 | 201 | *r0++ = side_information[(i / bs) * bcols + j / bs]; 202 | 203 | // Reconstruct first row 204 | t++; 205 | for (size_t jj = 1; jj < bs; jj++) { 206 | *r0++ = *t++ + *r1++; 207 | } 208 | 209 | // Reconstruct remaining rows 210 | for (size_t ii = 1; ii < bs; ii++) { 211 | r0 = &(reconstructedData[(i + ii) * imgCols + j]); 212 | r1 = &(reconstructedData[(i + ii - 1) * imgCols + j]); 213 | 214 | for (size_t jj = 0; jj < bs; jj++) { 215 | *r0++ = *r1++ + *t++; 216 | } 217 | } 218 | } 219 | } 220 | Profiler::end("prediction"); 221 | 222 | Profiler::start("quantization"); 223 | const size_t pixelCount = header.rows * header.cols * header.channels; 224 | const uint32_t interval_count = (256 + header.qstep - 1) / header.qstep; 225 | auto size_last_qinterval = (const uint8_t) 256 - header.qstep * (interval_count - 1); 226 | auto first_element_last_interval = (const uint8_t) (header.qstep * (interval_count - 1)); 227 | // offset for all but the last interval 228 | uint8_t offset; 229 | // offset for the last interval (might be a smaller interval) 230 | uint8_t offset_last_interval; 231 | if (header.rectype == ImageMarlinHeader::ReconstructionType::Midpoint) { 232 | offset = (uint8_t) header.qstep / 2; 233 | offset_last_interval = (uint8_t) size_last_qinterval / 2; 234 | } else { 235 | offset = 0; 236 | offset_last_interval = 0; 237 | } 238 | 239 | uint8_t* data = reconstructedData.data(); 240 | for (size_t i=0; i= first_element_last_interval) { 244 | data[i] = data[i] + offset_last_interval; 245 | } else { 246 | data[i] = data[i] + offset; 247 | } 248 | } 249 | Profiler::end("quantization"); 250 | } 251 | 252 | ///////// Deadzone quantizer 253 | 254 | void NorthPredictionDeadzoneQuantizer::transform_direct( 255 | uint8_t *original_data, std::vector &side_information, std::vector &preprocessed) { 256 | 257 | if (header.channels != 1) { 258 | throw std::runtime_error("only one channel supported at the time"); 259 | } 260 | 261 | if (header.transtype != ImageMarlinHeader::TransformType::North) { 262 | throw std::runtime_error("This class supports only North transform type"); 263 | } 264 | if (header.qtype != ImageMarlinHeader::QuantizerType::Deadzone) { 265 | throw std::runtime_error("This class supports only Deadzone quantization"); 266 | } 267 | 268 | switch (header.qstep) { 269 | case 0: 270 | throw std::runtime_error("Invalid qstep=0"); 271 | case 1: 272 | predict_and_quantize_direct<1>( 273 | original_data, side_information, preprocessed); 274 | break; 275 | case 2: 276 | predict_and_quantize_direct<2>( 277 | original_data, side_information, preprocessed); 278 | break; 279 | case 3: 280 | predict_and_quantize_direct<3>( 281 | original_data, side_information, preprocessed); 282 | break; 283 | case 4: 284 | predict_and_quantize_direct<4>( 285 | original_data, side_information, preprocessed); 286 | break; 287 | case 5: 288 | predict_and_quantize_direct<5>( 289 | original_data, side_information, preprocessed); 290 | break; 291 | case 6: 292 | predict_and_quantize_direct<6>( 293 | original_data, side_information, preprocessed); 294 | break; 295 | case 7: 296 | predict_and_quantize_direct<7>( 297 | original_data, side_information, preprocessed); 298 | break; 299 | case 8: 300 | predict_and_quantize_direct<8>( 301 | original_data, side_information, preprocessed); 302 | break; 303 | case 16: 304 | predict_and_quantize_direct<16>( 305 | original_data, side_information, preprocessed); 306 | break; 307 | case 32: 308 | predict_and_quantize_direct<32>( 309 | original_data, side_information, preprocessed); 310 | break; 311 | case 33: 312 | predict_and_quantize_direct<33>( 313 | original_data, side_information, preprocessed); 314 | break; 315 | case 67: 316 | predict_and_quantize_direct<67>( 317 | original_data, side_information, preprocessed); 318 | break; 319 | default: 320 | throw std::runtime_error("This implementation does not support this qstep value"); 321 | } 322 | } 323 | 324 | template 325 | void NorthPredictionDeadzoneQuantizer::predict_and_quantize_direct( 326 | uint8_t *original_data, 327 | std::vector &side_information, 328 | std::vector &preprocessed) { 329 | 330 | // const size_t brows = (img.rows+blocksize-1)/blocksize; 331 | const size_t bcols = (header.cols+header.blockWidth-1)/header.blockWidth; 332 | const size_t imgRows = header.rows; 333 | const size_t imgCols = header.rows; 334 | const size_t blocksize = header.blockWidth; 335 | 336 | uint8_t *t = &preprocessed[0]; 337 | 338 | // Pointers to the original data 339 | Profiler::start("prediction+quantization"); 340 | uint8_t* or0; 341 | uint8_t* or1; 342 | uint8_t prediction; 343 | uint8_t original_value; 344 | uint8_t coded_qi; 345 | int16_t prediction_error; 346 | int16_t reconstructed_pred_error; 347 | uint16_t reconstructed_value; 348 | 349 | uint8_t effective_offset; 350 | if (header.rectype == ImageMarlinHeader::ReconstructionType::Lowpoint) { 351 | effective_offset = 0; 352 | } else if (header.rectype == ImageMarlinHeader::ReconstructionType::Midpoint) { 353 | effective_offset = qs >> 1; 354 | } else { 355 | throw std::runtime_error("Unsupported reconstruction type"); 356 | } 357 | const uint8_t offset = effective_offset; 358 | 359 | 360 | for (size_t i=0; i 1) { 381 | if (qs == 2) { 382 | coded_qi = (uint8_t) (sgn(prediction_error) * (abs(prediction_error) >> 1)); 383 | reconstructed_pred_error = ((int16_t)((int8_t) coded_qi)) * 2; 384 | reconstructed_value = prediction + reconstructed_pred_error; 385 | reconstructed_value += sgn(reconstructed_pred_error) * offset; 386 | } else if (qs == 4) { 387 | coded_qi = (uint8_t) (sgn(prediction_error) * (abs(prediction_error) >> 2)); 388 | reconstructed_pred_error = ((int16_t)((int8_t) coded_qi)) * 4; 389 | reconstructed_value = prediction + reconstructed_pred_error; // Does not include offset 390 | reconstructed_value += sgn(reconstructed_pred_error) * offset; 391 | } else if (qs == 8) { 392 | coded_qi = (uint8_t) (sgn(prediction_error) * (abs(prediction_error) >> 3)); 393 | reconstructed_pred_error = ((int16_t)((int8_t) coded_qi)) * 8; 394 | reconstructed_value = prediction + reconstructed_pred_error; // Does not include offset 395 | reconstructed_value += sgn(reconstructed_pred_error) * offset; 396 | } else { 397 | coded_qi = (uint8_t) (sgn(prediction_error) * (abs(prediction_error) / qs)); 398 | reconstructed_pred_error = ((int16_t)((int8_t) coded_qi)) * qs; 399 | reconstructed_value = prediction + reconstructed_pred_error; // Does not include offset 400 | reconstructed_value += sgn(reconstructed_pred_error) * offset; 401 | } 402 | if (reconstructed_value < 0) { 403 | reconstructed_value = 0; 404 | } else if (reconstructed_value > 255) { 405 | reconstructed_value = 255; 406 | } 407 | } else { 408 | coded_qi = (uint8_t) prediction_error; 409 | reconstructed_value = original_value; 410 | } 411 | 412 | *t++ = coded_qi; 413 | *or0 = (uint8_t) reconstructed_value; 414 | or1++; 415 | or0++; 416 | } 417 | 418 | // Remaining columns are predicted with the top element 419 | // (ii starts at 1 because ii=0 is the first row, already processeD) 420 | for (size_t ii=1; ii 1) { 430 | if (qs == 2) { 431 | coded_qi = (uint8_t) (sgn(prediction_error) * (abs(prediction_error) >> 1)); 432 | reconstructed_pred_error = ((int16_t)((int8_t) coded_qi)) * 2; 433 | reconstructed_value = prediction + reconstructed_pred_error; 434 | reconstructed_value += sgn(reconstructed_pred_error) * offset; 435 | } else if (qs == 4) { 436 | coded_qi = (uint8_t) (sgn(prediction_error) * (abs(prediction_error) >> 2)); 437 | reconstructed_pred_error = ((int16_t)((int8_t) coded_qi)) * 4; 438 | reconstructed_value = prediction + reconstructed_pred_error; // Does not include offset 439 | reconstructed_value += sgn(reconstructed_pred_error) * offset; 440 | } else if (qs == 8) { 441 | coded_qi = (uint8_t) (sgn(prediction_error) * (abs(prediction_error) >> 3)); 442 | reconstructed_pred_error = ((int16_t)((int8_t) coded_qi)) * 8; 443 | reconstructed_value = prediction + reconstructed_pred_error; // Does not include offset 444 | reconstructed_value += sgn(reconstructed_pred_error) * offset; 445 | } else { 446 | coded_qi = (uint8_t) (sgn(prediction_error) * (abs(prediction_error) / qs)); 447 | reconstructed_pred_error = ((int16_t)((int8_t) coded_qi)) * qs; 448 | reconstructed_value = prediction + reconstructed_pred_error; // Does not include offset 449 | reconstructed_value += sgn(reconstructed_pred_error) * offset; 450 | } 451 | if (reconstructed_value < 0) { 452 | reconstructed_value = 0; 453 | } else if (reconstructed_value > 255) { 454 | reconstructed_value = 255; 455 | } 456 | } else { 457 | coded_qi = (uint8_t) prediction_error; 458 | reconstructed_value = original_value; 459 | } 460 | 461 | *or0 = (uint8_t) reconstructed_value; 462 | *t++ = coded_qi; 463 | or1++; 464 | or0++; 465 | } 466 | } 467 | } 468 | } 469 | Profiler::end("prediction+quantization"); 470 | } 471 | 472 | void NorthPredictionDeadzoneQuantizer::transform_inverse( 473 | std::vector &entropy_decoded_data, 474 | View &side_information, 475 | std::vector &reconstructedData) { 476 | reconstructedData.resize(header.rows * header.cols * header.channels); 477 | 478 | if (header.channels != 1) { 479 | throw std::runtime_error("only one channel supported at the time"); 480 | } 481 | 482 | const size_t imgRows = header.rows; 483 | const size_t imgCols = header.cols; 484 | const size_t bs = header.blockWidth; 485 | const size_t bcols = (header.cols + bs - 1) / bs; 486 | 487 | uint8_t offset; 488 | if (header.rectype == ImageMarlinHeader::ReconstructionType::Lowpoint) { 489 | offset = 0; 490 | } else if (header.rectype == ImageMarlinHeader::ReconstructionType::Midpoint) { 491 | offset = (uint8_t) header.qstep/2; 492 | } else { 493 | throw std::runtime_error("Unsupported reconstruction type"); 494 | } 495 | 496 | Profiler::start("prediction+quantization"); 497 | const uint8_t *t = &entropy_decoded_data[0]; 498 | uint8_t *r0; 499 | uint8_t *r1; 500 | int16_t prediction_error; 501 | int16_t prediction; 502 | int16_t reconstructed_value; 503 | for (size_t i = 0; i < imgRows - bs + 1; i += bs) { 504 | for (size_t j = 0; j < imgCols - bs + 1; j += bs) { 505 | r0 = &(reconstructedData[i * imgCols + j]); 506 | r1 = &(reconstructedData[i * imgCols + j]); 507 | 508 | *r0++ = side_information[(i / bs) * bcols + j / bs]; 509 | 510 | // Reconstruct first row 511 | t++; 512 | for (size_t jj = 1; jj < bs; jj++) { 513 | prediction = *r1++; 514 | prediction_error = (int8_t) *t++; 515 | prediction_error *= header.qstep; 516 | 517 | reconstructed_value = prediction + prediction_error; 518 | reconstructed_value += sgn(prediction_error) * offset; 519 | if (reconstructed_value < 0) { 520 | reconstructed_value = 0; 521 | } else if (reconstructed_value > 255) { 522 | reconstructed_value = 255; 523 | } 524 | 525 | *r0++ = (uint8_t) reconstructed_value; 526 | } 527 | 528 | // Reconstruct remaining rows 529 | for (size_t ii = 1; ii < bs; ii++) { 530 | r0 = &(reconstructedData[(i + ii) * imgCols + j]); 531 | r1 = &(reconstructedData[(i + ii - 1) * imgCols + j]); 532 | 533 | for (size_t jj = 0; jj < bs; jj++) { 534 | prediction = *r1++; 535 | prediction_error = (int8_t) *t++; 536 | prediction_error *= header.qstep; 537 | 538 | reconstructed_value = prediction + prediction_error; 539 | reconstructed_value += sgn(prediction_error) * offset; 540 | if (reconstructed_value < 0) { 541 | reconstructed_value = 0; 542 | } else if (reconstructed_value > 255) { 543 | reconstructed_value = 255; 544 | } 545 | 546 | *r0++ = (uint8_t) reconstructed_value; 547 | } 548 | } 549 | } 550 | } 551 | Profiler::end("prediction+quantization"); 552 | } 553 | 554 | 555 | /// Fast left DPCM, uniform quantizer 556 | 557 | void FastLeftUniformQuantizer::transform_direct( 558 | uint8_t *original_data, std::vector &side_information, std::vector &preprocessed) { 559 | 560 | if (header.channels != 1) { 561 | throw std::runtime_error("only one channel supported at the time"); 562 | } 563 | 564 | if (header.transtype != ImageMarlinHeader::TransformType::FastLeft) { 565 | throw std::runtime_error("This class supports only FastLeft transformation"); 566 | } 567 | if (header.qtype != ImageMarlinHeader::QuantizerType::Uniform) { 568 | throw std::runtime_error("This class supports only Uniform quantization"); 569 | } 570 | 571 | switch (header.qstep) { 572 | case 0: 573 | throw std::runtime_error("Invalid qstep=0"); 574 | case 1: 575 | predict_and_quantize_direct<1>( 576 | original_data, side_information, preprocessed); 577 | break; 578 | case 2: 579 | predict_and_quantize_direct<2>( 580 | original_data, side_information, preprocessed); 581 | break; 582 | case 3: 583 | predict_and_quantize_direct<3>( 584 | original_data, side_information, preprocessed); 585 | break; 586 | case 4: 587 | predict_and_quantize_direct<4>( 588 | original_data, side_information, preprocessed); 589 | break; 590 | case 5: 591 | predict_and_quantize_direct<5>( 592 | original_data, side_information, preprocessed); 593 | break; 594 | case 6: 595 | predict_and_quantize_direct<6>( 596 | original_data, side_information, preprocessed); 597 | break; 598 | case 7: 599 | predict_and_quantize_direct<7>( 600 | original_data, side_information, preprocessed); 601 | break; 602 | case 8: 603 | predict_and_quantize_direct<8>( 604 | original_data, side_information, preprocessed); 605 | break; 606 | default: 607 | throw std::runtime_error("This implementation does not support this qstep value"); 608 | } 609 | } 610 | 611 | template 612 | void FastLeftUniformQuantizer::predict_and_quantize_direct( 613 | uint8_t *original_data, 614 | std::vector &side_information, 615 | std::vector &preprocessed) { 616 | 617 | // const size_t brows = (img.rows+blocksize-1)/blocksize; 618 | const size_t pixelCount = header.rows * header.cols * header.channels; 619 | 620 | Profiler::start("quantization"); 621 | if (qs > 1) { 622 | uint8_t* original = original_data; 623 | for (size_t i = 0; i < pixelCount; i++) { 624 | if (qs == 2) { 625 | *original >>= 1; 626 | } else if (qs == 4) { 627 | *original >>= 2; 628 | } else if (qs == 8) { 629 | *original >>= 3; 630 | } else if (qs == 16) { 631 | *original >>= 4; 632 | } else if (qs == 32) { 633 | *original >>= 5; 634 | } else { 635 | *original /= qs; 636 | } 637 | original++; 638 | } 639 | } 640 | Profiler::end("quantization"); 641 | 642 | uint8_t previous_value = original_data[0]; 643 | side_information[0] = original_data[0]; // Only this value is used. TODO: code only the needed SI 644 | uint8_t *transformed = &preprocessed[0]; 645 | uint8_t *original = original_data; 646 | Profiler::start("prediction"); 647 | for (size_t i=0; i &entropy_decoded_data, 658 | View &side_information, 659 | std::vector &reconstructedData) { 660 | reconstructedData.resize(header.rows * header.cols * header.channels); 661 | 662 | if (header.channels != 1) { 663 | throw std::runtime_error("only one channel supported at the time"); 664 | } 665 | const uint8_t *predicted = &entropy_decoded_data[0]; 666 | 667 | uint8_t *reconstructed = &(reconstructedData[0]); 668 | uint8_t last_value = side_information[0]; 669 | 670 | const size_t pixel_count = header.rows * header.cols * header.channels; 671 | Profiler::start("prediction"); 672 | for (size_t i=0; i= first_element_last_interval) { 702 | data[i] = data[i] + offset_last_interval; 703 | } else { 704 | data[i] = data[i] + offset; 705 | } 706 | } 707 | Profiler::end("quantization"); 708 | } 709 | 710 | 711 | } -------------------------------------------------------------------------------- /src/imageTransformer.hpp: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | imageTarnsformer: Implementation of direct and inverse image transformations 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres, portions by Miguel Hernández-Cabronero 8 | 9 | Marlin: A Fast Entropy Codec 10 | 11 | MIT License 12 | 13 | Copyright (c) 2018 Manuel Martinez Torres 14 | 15 | Permission is hereby granted, free of charge, to any person obtaining a copy 16 | of this software and associated documentation files (the "Software"), to deal 17 | in the Software without restriction, including without limitation the rights 18 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 19 | copies of the Software, and to permit persons to whom the Software is 20 | furnished to do so, subject to the following conditions: 21 | 22 | The above copyright notice and this permission notice shall be included in all 23 | copies or substantial portions of the Software. 24 | 25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 30 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 | SOFTWARE. 32 | 33 | ***********************************************************************/ 34 | 35 | #ifndef IMAGETRANSFORMER_HPP 36 | #define IMAGETRANSFORMER_HPP 37 | 38 | #include 39 | 40 | namespace marlin { 41 | 42 | /** 43 | * Transformer that predicts each pixel with the north neighbor (left neighbor for the first row) 44 | */ 45 | class NorthPredictionUniformQuantizer : public ImageMarlinTransformer { 46 | public: 47 | NorthPredictionUniformQuantizer(const ImageMarlinHeader& header_) : header(header_) {} 48 | 49 | void transform_direct( 50 | uint8_t *original_data, 51 | std::vector &side_information, 52 | std::vector &preprocessed); 53 | 54 | void transform_inverse( 55 | std::vector &entropy_decoded_data, 56 | View &side_information, 57 | std::vector &reconstructedData); 58 | 59 | protected: 60 | const ImageMarlinHeader header; 61 | 62 | /** 63 | * Apply the direct prediction and quantization transform. 64 | * 65 | * @tparam qs quantization step to be used 66 | */ 67 | template 68 | void predict_and_quantize_direct( 69 | uint8_t *original_data, 70 | std::vector &side_information, 71 | std::vector &preprocessed); 72 | }; 73 | 74 | /** 75 | * Transformer that predicts each pixel with the north neighbor (left neighbor for the first row) 76 | */ 77 | class NorthPredictionDeadzoneQuantizer : public ImageMarlinTransformer { 78 | public: 79 | NorthPredictionDeadzoneQuantizer(const ImageMarlinHeader& header_) : header(header_) {} 80 | 81 | void transform_direct( 82 | uint8_t *original_data, 83 | std::vector &side_information, 84 | std::vector &preprocessed); 85 | 86 | void transform_inverse( 87 | std::vector &entropy_decoded_data, 88 | View &side_information, 89 | std::vector &reconstructedData); 90 | 91 | protected: 92 | const ImageMarlinHeader header; 93 | 94 | /** 95 | * Apply the direct prediction and quantization transform. 96 | * 97 | * @tparam qs quantization step to be used 98 | */ 99 | template 100 | void predict_and_quantize_direct( 101 | uint8_t *original_data, 102 | std::vector &side_information, 103 | std::vector &preprocessed); 104 | }; 105 | 106 | /** 107 | * Transformer that predicts each pixel with the left neighbor and applies uniform quantization 108 | */ 109 | class FastLeftUniformQuantizer : public ImageMarlinTransformer { 110 | public: 111 | FastLeftUniformQuantizer(const ImageMarlinHeader& header_) : header(header_) {} 112 | 113 | void transform_direct( 114 | uint8_t *original_data, 115 | std::vector &side_information, 116 | std::vector &preprocessed); 117 | 118 | void transform_inverse( 119 | std::vector &entropy_decoded_data, 120 | View &side_information, 121 | std::vector &reconstructedData); 122 | 123 | protected: 124 | const ImageMarlinHeader header; 125 | 126 | /** 127 | * Apply the direct prediction and quantization transform. 128 | * 129 | * @tparam qs quantization step to be used 130 | */ 131 | template 132 | void predict_and_quantize_direct( 133 | uint8_t *original_data, 134 | std::vector &side_information, 135 | std::vector &preprocessed); 136 | }; 137 | 138 | } 139 | 140 | 141 | #endif /* IMAGETRANSFORMER_HPP */ 142 | -------------------------------------------------------------------------------- /src/instantiations.h: -------------------------------------------------------------------------------- 1 | #define INSTANTIATE(A) \ 2 | template class marlin::A; 3 | // template class marlin::A; 4 | 5 | //#define INSTANTIATE_MEMBER(A,B) 6 | // template auto marlin::A::B; 7 | // template auto marlin::A::B; 8 | 9 | 10 | -------------------------------------------------------------------------------- /src/marlin.cc: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | Marlin: A Fast Entropy Codec 4 | 5 | MIT License 6 | 7 | Copyright (c) 2017 Manuel Martinez Torres 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | ***********************************************************************/ 28 | 29 | 30 | #include 31 | 32 | //////////////////////////////////////////////////////////////////////// 33 | // 34 | // Public Methods 35 | 36 | ssize_t Marlin_compress(const Marlin *dict, uint8_t* dst, size_t dstCapacity, const uint8_t* src, size_t srcSize) { 37 | 38 | return dict->compress(marlin::make_view(src,src+srcSize), marlin::make_view(dst,dst+dstCapacity)); 39 | } 40 | 41 | ssize_t Marlin_decompress(const Marlin *dict, uint8_t* dst, size_t dstSize, const uint8_t* src, size_t srcSize) { 42 | 43 | return dict->decompress(marlin::make_view(src,src+srcSize), marlin::make_view(dst,dst+dstSize)); 44 | } 45 | 46 | Marlin *Marlin_build_dictionary(const char *name, const double hist[256]) { 47 | return new Marlin(name,std::vector(&hist[0], &hist[256])); 48 | } 49 | 50 | void Marlin_free_dictionary(Marlin *dict) { 51 | 52 | if (dict != nullptr) 53 | delete dict; 54 | } 55 | 56 | /*const MarlinDictionary **Marlin_get_prebuilt_dictionaries() { 57 | 58 | return nullptr; 59 | } 60 | 61 | const MarlinDictionary * Marlin_estimate_best_dictionary(const MarlinDictionary **dict, const uint8_t* src, size_t srcSize) { 62 | 63 | return nullptr; 64 | }*/ 65 | 66 | -------------------------------------------------------------------------------- /src/profiler.cc: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | profiler: a simple profiler for the Marlin codec (not thread safe) 4 | 5 | MIT License 6 | 7 | Copyright (c) 2018 Manuel Martinez Torres, Miguel Hernández-Cabronero 8 | 9 | Marlin: A Fast Entropy Codec 10 | 11 | MIT License 12 | 13 | Copyright (c) 2018 Manuel Martinez Torres 14 | 15 | Permission is hereby granted, free of charge, to any person obtaining a copy 16 | of this software and associated documentation files (the "Software"), to deal 17 | in the Software without restriction, including without limitation the rights 18 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 19 | copies of the Software, and to permit persons to whom the Software is 20 | furnished to do so, subject to the following conditions: 21 | 22 | The above copyright notice and this permission notice shall be included in all 23 | copies or substantial portions of the Software. 24 | 25 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 30 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 | SOFTWARE. 32 | 33 | ***********************************************************************/ 34 | 35 | #include "profiler.hpp" 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | #ifdef NO_PROFILER 43 | 44 | namespace marlin { 45 | 46 | // Empty implementations that can be easily factored out by the compiler. 47 | 48 | void Profiler::start(std::string event_name) {} 49 | 50 | void Profiler::end(std::string event_name) {} 51 | 52 | void Profiler::report(std::ostream& out, bool csv_format) {} 53 | 54 | void Profiler::report(std::string output_path, bool csv_format) {} 55 | 56 | Profiler::Profiler() {} 57 | 58 | } 59 | 60 | #else 61 | 62 | namespace { 63 | 64 | /// Clock types to be used for all events 65 | const std::vector< std::pair > clock_types_names = { 66 | {CLOCK_PROCESS_CPUTIME_ID, "cpu"}, 67 | {CLOCK_REALTIME, "wall"} 68 | }; 69 | 70 | 71 | /// Represent a named event to be tracked by the profiler 72 | class Event { 73 | 74 | public: 75 | const std::string name; 76 | /// Number of previous instances of this event 77 | uint32_t times; 78 | /// Pointer to the most recently opened event when this is created, 79 | /// or nullptr if this event is not nested in any other. 80 | Event *const parent; 81 | 82 | static Event* getRoot(); 83 | 84 | /** 85 | * Start a child event with the given name and 86 | * return its reference. 87 | */ 88 | Event* start_child(std::string name_); 89 | 90 | /** 91 | * Start the event. 92 | */ 93 | void setStart(); 94 | 95 | /** 96 | * End this run of the event. 97 | * 98 | * Increase accumulated duration and time counter. 99 | */ 100 | void setEnd(); 101 | 102 | /** 103 | * Is this a finished event? 104 | */ 105 | bool finished(); 106 | 107 | /** 108 | * Recursively report the time measurements of this event and all descendents 109 | * in CSV format. 110 | */ 111 | void report_csv(std::ostream& out); 112 | 113 | /** 114 | * Recursively report the time measurements of this event and all descendents 115 | * in plain-text format. 116 | * 117 | * @param indentation_level depth of the event in the event tree 118 | */ 119 | void report_plain(std::ostream& out, uint32_t indentation_level=0); 120 | 121 | /** 122 | * Get a map of durations indexed by clock id. 123 | * This can be called even for an "open" event, that is, 124 | * out for which the end time has not been set. In that case, 125 | * time is calculated up to now. 126 | */ 127 | std::map getDurations(); 128 | 129 | /** 130 | * Recursively release any used memory 131 | */ 132 | ~Event(); 133 | 134 | protected: 135 | /// Clocks with start times 136 | std::map start_clocks; 137 | /// Clocks with end times 138 | std::map end_clocks; 139 | /// Accumulated durations in seconds per clock, not including the current duration. 140 | std::map durations; 141 | /// Map of children indexed by name 142 | std::map children_by_name; 143 | /// List of children names sorted by addition order 144 | std::vector sorted_child_names; 145 | 146 | /** 147 | * Set to now all the timespecs in clock_map 148 | */ 149 | void setClocksNow(std::map &clock_map); 150 | 151 | private: 152 | /** 153 | * Create an empty Event with durations set to 0, 154 | * starting times set to now and no end times. 155 | */ 156 | Event(std::string name_, Event* parent_=nullptr); 157 | }; 158 | 159 | 160 | Event* Event::start_child(std::string name_) { 161 | auto it_found_child = children_by_name.find(name_); 162 | Event * child; 163 | if (it_found_child != children_by_name.end()) { 164 | child = it_found_child->second; 165 | if (! child->finished()) { 166 | std::stringstream ss; 167 | ss << "Cannot re-start running event '" << name_ << "'" << std::endl; 168 | throw std::runtime_error(ss.str()); 169 | } 170 | child->end_clocks.clear(); 171 | child->setStart(); 172 | } else{ 173 | child = new Event(name_, this); 174 | children_by_name.emplace(name_, child); 175 | sorted_child_names.push_back(name_); 176 | } 177 | return child; 178 | } 179 | 180 | /** 181 | * Start the event. 182 | */ 183 | void Event::setStart() { 184 | setClocksNow(start_clocks); 185 | } 186 | 187 | /** 188 | * End this run of the event. 189 | * 190 | * Increase accumulated duration and time counter. 191 | */ 192 | void Event::setEnd() { 193 | times++; 194 | 195 | // Stop timer and calculate new durations 196 | setClocksNow(end_clocks); 197 | durations = getDurations(); 198 | 199 | // Set start time as end time so that further calls 200 | // to getDuration do not add false time 201 | for (auto clockid_name : clock_types_names) { 202 | start_clocks.at(clockid_name.first) = end_clocks.at(clockid_name.first); 203 | } 204 | } 205 | 206 | /** 207 | * Is this a finished event? 208 | */ 209 | bool Event::finished() { 210 | return ! end_clocks.empty(); 211 | } 212 | 213 | /** 214 | * Get a map of durations indexed by clock id. 215 | * This can be called even for an "open" event, that is, 216 | * out for which the end time has not been set. In that case, 217 | * time is calculated up to now. 218 | */ 219 | std::map Event::getDurations() { 220 | std::map duration_map; 221 | 222 | for (auto it : clock_types_names) { 223 | clockid_t clock_id = it.first; 224 | 225 | // Previous duration 226 | double duration = 1.0 * durations.at(clock_id); // Avoid modifying reference 227 | 228 | // Current start time 229 | const auto it_start_time = start_clocks.find(clock_id); 230 | if (it_start_time == start_clocks.end()) { 231 | throw std::runtime_error("Error: event without start time"); 232 | } 233 | const timespec start_time = it_start_time->second; 234 | 235 | // End times or now 236 | const auto it_end_time = end_clocks.find(clock_id); 237 | if (it_end_time == end_clocks.end()) { 238 | // Still running 239 | timespec now; 240 | clock_gettime(clock_id, &now); 241 | duration += (now.tv_sec - start_time.tv_sec) + 1E-9 * (now.tv_nsec - start_time.tv_nsec); 242 | } else { 243 | duration += (it_end_time->second.tv_sec - start_time.tv_sec) \ 244 | + 1e-9 * (it_end_time->second.tv_nsec - start_time.tv_nsec); 245 | } 246 | 247 | duration_map.emplace(clock_id, duration); 248 | } 249 | 250 | return duration_map; 251 | } 252 | 253 | Event::~Event() { 254 | for (auto it : children_by_name) { 255 | delete it.second; 256 | } 257 | } 258 | 259 | void Event::setClocksNow(std::map &clock_map) { 260 | for (auto it : clock_types_names) { 261 | const clockid_t& clock_id = it.first; 262 | timespec now; 263 | clock_gettime(clock_id, &now); 264 | auto emplace_it = clock_map.emplace(clock_id, now); 265 | if (! emplace_it.second) { 266 | // the entry already existed: replace it 267 | emplace_it.first->second = now; 268 | } 269 | } 270 | } 271 | 272 | Event::Event(std::string name_, Event* parent_) : name(name_), times(0), parent(parent_) { 273 | for (auto it : clock_types_names) { 274 | durations.emplace(it.first, 0.0); 275 | } 276 | setStart(); 277 | } 278 | 279 | Event* Event::getRoot() { 280 | static Event root("total"); 281 | return &root; 282 | } 283 | 284 | void Event::report_csv(std::ostream& out) { 285 | static const std::string separator(","); 286 | 287 | if (this == Event::getRoot()) { 288 | // Write the CSV header only for the root event 289 | out << "event_name" << separator << "times"; 290 | for (auto clockid_name : clock_types_names) { 291 | out << separator << clockid_name.second; 292 | } 293 | out << std::endl; 294 | } 295 | 296 | // Report this event 297 | out << name << separator << times; 298 | auto durations_ = getDurations(); 299 | for (auto clockid_name : clock_types_names) { 300 | out << separator << durations_.at(clockid_name.first); 301 | } 302 | out << std::endl; 303 | 304 | // Report all children 305 | for (auto child_name : sorted_child_names) { 306 | children_by_name.at(child_name)->report_csv(out); 307 | } 308 | } 309 | 310 | void Event::report_plain(std::ostream& out, uint32_t indentation_level) { 311 | static const std::string indentation(" "); 312 | 313 | for (uint32_t i=0; i durations_this = getDurations(); 318 | std::map durations_children = getDurations(); 319 | 320 | // Print this node 321 | out << "[" << times << "] " << name << ":"; 322 | for (auto clockid_name : clock_types_names) { 323 | out << " " << clockid_name.second << "=" << durations_this.at(clockid_name.first); 324 | durations_children.at(clockid_name.first) = 0.0; 325 | } 326 | out << std::endl; 327 | 328 | // Print all children and accumulate their total times 329 | for (auto child_name : sorted_child_names) { 330 | Event* child = children_by_name.at(child_name); 331 | child->report_plain(out, indentation_level+1); 332 | for (auto clockid_name : clock_types_names) { 333 | durations_children.at(clockid_name.first) += child->getDurations().at(clockid_name.first); 334 | } 335 | } 336 | 337 | // Print information of time unaccounted for by the children 338 | if (! sorted_child_names.empty()) { 339 | for (uint32_t i = 0; i < indentation_level + 1; i++) { 340 | out << indentation; 341 | } 342 | out << "(remaining@" << name << ") :"; 343 | for (auto clockid_name : clock_types_names) { 344 | out << " " << clockid_name.second << "=" 345 | << durations_this.at(clockid_name.first) - durations_children.at(clockid_name.first); 346 | } 347 | out << std::endl; 348 | } 349 | } 350 | 351 | 352 | 353 | Event* current_event = Event::getRoot(); 354 | } 355 | 356 | namespace marlin { 357 | 358 | void Profiler::start(std::string event_name) { 359 | current_event = current_event->start_child(event_name); 360 | } 361 | 362 | void Profiler::end(std::string event_name) { 363 | if (current_event == Event::getRoot()) { 364 | throw std::runtime_error("Cannot end root event"); 365 | } 366 | if (! event_name.empty()) { 367 | if (current_event->name != event_name) { 368 | std::stringstream ss; 369 | ss << "End name '" << event_name << "' does not match currently open event '" 370 | << current_event->name << "'" << std::endl; 371 | throw std::runtime_error(ss.str()); 372 | } 373 | } 374 | 375 | current_event->setEnd(); 376 | current_event = current_event->parent; 377 | } 378 | 379 | void Profiler::report(std::ostream& out, bool csv_format) { 380 | if (csv_format) { 381 | current_event->report_csv(out); 382 | } else { 383 | current_event->report_plain(out); 384 | } 385 | 386 | } 387 | 388 | void Profiler::report(std::string output_path, bool csv_format) { 389 | if (! output_path.empty()) { 390 | std::ofstream out(output_path); 391 | Profiler::report(out, csv_format); 392 | } 393 | } 394 | 395 | } 396 | 397 | #endif -------------------------------------------------------------------------------- /src/profiler.hpp: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | profiler: a simple profiler for the Marlin codec (not thread safe) 4 | 5 | Usage: 6 | 7 | // For each event to be profiled 8 | Profiler::start("event_name"); 9 | ... 10 | Profiler::end("event_name"); 11 | 12 | Profiler::report(std::cout); 13 | 14 | Notes: 15 | 16 | * Events can be nested, but the start and end calls must be consistent. 17 | 18 | * If the same event name is started and ended several times, total duration is accumulated. 19 | 20 | * To completely disable, define the NO_PROFILER macro. 21 | 22 | MIT License 23 | 24 | Copyright (c) 2018 Manuel Martinez Torres, Miguel Hernández-Cabronero 25 | 26 | Marlin: A Fast Entropy Codec 27 | 28 | MIT License 29 | 30 | Copyright (c) 2018 Manuel Martinez Torres 31 | 32 | Permission is hereby granted, free of charge, to any person obtaining a copy 33 | of this software and associated documentation files (the "Software"), to deal 34 | in the Software without restriction, including without limitation the rights 35 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 36 | copies of the Software, and to permit persons to whom the Software is 37 | furnished to do so, subject to the following conditions: 38 | 39 | The above copyright notice and this permission notice shall be included in all 40 | copies or substantial portions of the Software. 41 | 42 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 43 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 44 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 45 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 46 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 47 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 48 | SOFTWARE. 49 | 50 | ***********************************************************************/ 51 | 52 | #ifndef PROFILER_HPP 53 | #define PROFILER_HPP 54 | 55 | #include 56 | #include 57 | #include 58 | #include 59 | #include 60 | 61 | namespace marlin { 62 | 63 | /** 64 | * Class to allow seamless profiling. 65 | * 66 | * This is not a thread-safe utility. 67 | */ 68 | class Profiler { 69 | 70 | public: 71 | // Only the provided static methods should be used 72 | Profiler(const Profiler& other) = delete; 73 | void operator=(const Profiler& other) = delete; 74 | 75 | /** 76 | * Start a new event now. 77 | */ 78 | static void start(std::string event_name); 79 | 80 | /** 81 | * End the last started event. If a string is provided, 82 | * it is verified that the finishing event has a matching name. 83 | */ 84 | static void end(std::string event_name=""); 85 | 86 | /** 87 | * Report the Profiler results to out. 88 | * 89 | * If csv_format is false, a hierarchical print is shown 90 | * If csv_format is true, flattened results are printed in CSV format. 91 | */ 92 | static void report(std::ostream& out, bool csv_format=false); 93 | 94 | /** 95 | * Create a text file at output_path and print the profiler report there. 96 | * 97 | * If csv_format is false, a hierarchical print is shown 98 | * If csv_format is true, flattened results are printed in CSV format. 99 | */ 100 | static void report(std::string output_path, bool csv_format=false); 101 | 102 | 103 | protected: 104 | // Only the provided static methods should be used 105 | Profiler() {} 106 | }; 107 | 108 | } 109 | 110 | #endif /* PROFILER_HPP */ 111 | -------------------------------------------------------------------------------- /test/correctness.cc: -------------------------------------------------------------------------------- 1 | #include "marlin.h" 2 | #include "../src/distribution.hpp" 3 | #include 4 | 5 | static void printAlpha(std::vector msg) { 6 | for (size_t i=0; i msg) { 13 | for (size_t i=0; i original(Distribution::getResiduals(distribution,sz)); 30 | std::vector compressed(sz); 31 | std::vector uncompressed(sz); 32 | 33 | marlin::Configuration conf; 34 | conf["K"] = 8; 35 | conf["O"] = 1; 36 | // conf["debug"] = 99; 37 | conf["purgeProbabilityThreshold"] = 1e-99; 38 | 39 | Marlin dict("",distribution, conf); 40 | 41 | dict.compress(original, compressed); 42 | dict.decompress(compressed, uncompressed); 43 | 44 | std::cout << "Compressed Size: " << compressed.size() << std::endl; 45 | 46 | printAlpha(original); 47 | printHex(compressed); 48 | printAlpha(uncompressed); 49 | 50 | if (original != uncompressed) { 51 | 52 | std::cout << "P: " << 0.5 << " " << "FAIL! sizes(" << original.size() << "," << uncompressed.size() << ")" << std::endl; 53 | for (size_t i=0; i<10; i++) 54 | printf("%02X:%02X ", original[i], uncompressed[i]); 55 | std::cout << std::endl; 56 | 57 | { 58 | int c = 0; 59 | for (size_t i=0; i original(Distribution::getResiduals(Distribution::pdf(Distribution::Laplace, p),sz)); 83 | std::vector compressed(sz); 84 | std::vector uncompressed(sz); 85 | 86 | std::cout << "Get Dictionary!" << std::endl; 87 | Marlin dict("",Distribution::pdf(256, Distribution::Laplace, p)); 88 | std::cout << "Compress:" << std::endl; 89 | dict.compress(original, compressed); 90 | std::cout << "Compressed to: " << compressed.size() << " (" << double(compressed.size()) / original.size() << "%) " << dict.efficiency << std::endl; 91 | std::cout << "Theoretical efficiency: (" <<100*dict.efficiency << "%)" << std::endl; 92 | std::cout << "Real efficiency: (" << 100*p/(double(compressed.size()) / original.size()) << "%)" << std::endl; 93 | std::cout << "Decompress:" << std::endl; 94 | dict.decompress(compressed, uncompressed); 95 | std::cout << "Done!" << std::endl; 96 | 97 | 98 | 99 | if (original != uncompressed) { 100 | 101 | std::cout << "P: " << p << " " << "FAIL! sizes(" << original.size() << "," << uncompressed.size() << ")" << std::endl; 102 | for (size_t i=0; i<10; i++) 103 | printf("%02X:%02X ", original[i], uncompressed[i]); 104 | std::cout << std::endl; 105 | 106 | { 107 | int c = 0; 108 | for (size_t i=0; i 2 | #include 3 | #include 4 | 5 | static void buildDictionaries( 6 | std::map> &dictionaries, 7 | size_t numDict, 8 | Distribution::Type type, 9 | std::map conf = std::map() 10 | ) { 11 | 12 | 13 | for (size_t p=0; p pdf(256,0.); 25 | 26 | size_t nSamples = 10; 27 | for (double i=0.5/nSamples; i<0.9999999; i+=1./nSamples) { 28 | auto pdf0 = Distribution::pdf(type, (p+i)/double(numDict)); 29 | for (size_t j=0; j(oss.str(),pdf,conf); 34 | } 35 | } 36 | 37 | 38 | int main() { 39 | 40 | auto &&out = std::cout; 41 | 42 | std::map> builtDictionaries; 43 | 44 | buildDictionaries(builtDictionaries,16,Distribution::Laplace); 45 | buildDictionaries(builtDictionaries,16,Distribution::Gaussian); 46 | buildDictionaries(builtDictionaries,16,Distribution::Exponential); 47 | 48 | out << "#include " << std::endl; 49 | for (auto &&dict : builtDictionaries) { 50 | 51 | 52 | out << "static const std::array prebuilt_dictionary_" << dict.first << "_source2marlin = {"; 53 | for (auto &&p: dict.second->source2marlin) out << uint64_t(p) << ","; out << "};" << std::endl; 54 | 55 | out << "static const uint32_t prebuilt_dictionary_" << dict.first << "_compressorTableVector[] = {"; 56 | for (auto &&p: *dict.second->compressorTableVector) out << uint64_t(p) << ","; out << "};" << std::endl; 57 | 58 | out << "static const uint32_t prebuilt_dictionary_" << dict.first << "_compressorTableInitVector[] = {"; 59 | for (auto &&p: *dict.second->compressorTableInitVector) out << uint64_t(p) << ","; out << "};" << std::endl; 60 | 61 | out << "static const uint8_t prebuilt_dictionary_" << dict.first << "_decompressorTableVector[] = {"; 62 | for (auto &&p: *dict.second->decompressorTableVector) out << uint64_t(p) << ","; out << "};" << std::endl; 63 | 64 | out << "static const Marlin prebuilt_dictionary_" << dict.first << "(" << std::endl; 65 | out << " \"" << dict.second->name << "\", // name" << std::endl; 66 | out << " " << dict.second->K << ", // K" << std::endl; 67 | out << " " << dict.second->O << ", // O" << std::endl; 68 | out << " " << dict.second->shift << ", // shift" << std::endl; 69 | out << " " << dict.second->maxWordSize << ", // maxWordSize" << std::endl; 70 | out << " " << dict.second->efficiency << ", // Efficiency" << std::endl; 71 | out << " " << uint64_t(dict.second->unrepresentedSymbolToken) << ", // unrepresentedSymbolToken" << std::endl; 72 | 73 | out << " prebuilt_dictionary_" << dict.first << "_source2marlin, " << std::endl; 74 | out << " &prebuilt_dictionary_" << dict.first << "_compressorTableVector[0], " << std::endl; 75 | out << " &prebuilt_dictionary_" << dict.first << "_compressorTableInitVector[0], " << std::endl; 76 | out << " &prebuilt_dictionary_" << dict.first << "_decompressorTableVector[0], " << std::endl; 77 | 78 | out << " " << uint32_t(dict.second->marlinMostCommonSymbol) << ", // marlinMostCommonSymbol" << std::endl; 79 | out << " " << dict.second->isSkip << " // isSkip" << std::endl; 80 | out << ");" << std::endl; 81 | }; 82 | 83 | out << "static const Marlin *Marlin_all_prebuilt_dictionaries[] = {" << std::endl; 84 | for (auto &&dict : builtDictionaries) 85 | out << " &prebuilt_dictionary_" << dict.first << ", " << std::endl; 86 | out << "nullptr};" << std::endl; 87 | 88 | out << "const Marlin **Marlin_get_prebuilt_dictionaries() {" << std::endl; 89 | out << " return Marlin_all_prebuilt_dictionaries;" << std::endl; 90 | out << "}" << std::endl; 91 | 92 | } 93 | -------------------------------------------------------------------------------- /utils/imageMarlin.cc: -------------------------------------------------------------------------------- 1 | /*********************************************************************** 2 | 3 | imageMarlin: an image codec based on the Marlin entropy coder 4 | 5 | Marlin: A Fast Entropy Codec 6 | 7 | MIT License 8 | 9 | Copyright (c) 2018 Manuel Martinez Torres 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy 12 | of this software and associated documentation files (the "Software"), to deal 13 | in the Software without restriction, including without limitation the rights 14 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 | copies of the Software, and to permit persons to whom the Software is 16 | furnished to do so, subject to the following conditions: 17 | 18 | The above copyright notice and this permission notice shall be included in all 19 | copies or substantial portions of the Software. 20 | 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 27 | SOFTWARE. 28 | 29 | ***********************************************************************/ 30 | 31 | #include 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | #include "../src/profiler.hpp" 39 | 40 | using namespace marlin; 41 | 42 | void usage() { 43 | std::string executable_name("imageMarlin"); 44 | 45 | std::cout << std::endl; 46 | std::cout << "======================================================================" << std::endl; 47 | std::cout << "Marlin utility to compress/decompress images" << std::endl; 48 | std::cout << "======================================================================" << std::endl; 49 | std::cout << "COMPRESSION Syntax: " << executable_name << " c \\" << std::endl 50 | << "\t[-qstep=<" << ImageMarlinHeader::DEFAULT_QSTEP << ">] " 51 | << "[-qtype=<" << (int) ImageMarlinHeader::DEFAULT_QTYPE << ">] " 52 | << "[-rectype=<" << (int) ImageMarlinHeader::DEFAULT_RECONSTRUCTION_TYPE << ">] " 53 | << "[-profile=] [-ttype=] [-entfreq=] [-v|-verbose]" 54 | << std::endl; 55 | std::cout << "DECOMPRESSION Syntax: " << executable_name << "d " 56 | << std::endl; 57 | std::cout << std::endl; 58 | std::cout << "Parameter meaning:" << std::endl; 59 | std::cout << " * c|d: compress (c) / decompress (d)" << std::endl; 60 | std::cout << " * input_path: path to the image (c) / compressed (d) file" << std::endl; 61 | std::cout << " * output_path: path to the compressed (c) / reconstructed (d) file" << std::endl; 62 | std::cout << " * profile: path to the file where profiling information is to be stored" << std::endl; 63 | 64 | std::cout << " * ttype: type of transform (0: north prediction, 1: fast left DPCM), default=" 65 | << (int) ImageMarlinHeader::DEFAULT_TRANSFORM_TYPE << std::endl; 66 | 67 | std::cout << " * qstep: (optional) quantization step, 1 for lossless, default=1" << std::endl; 68 | std::cout << " * qtype: (optional) quantization type (" 69 | << (int) ImageMarlinHeader::QuantizerType::Uniform << ": uniform, " 70 | << (int) ImageMarlinHeader::QuantizerType::Deadzone << ": deadzone) " 71 | << " default=" << (int) ImageMarlinHeader::DEFAULT_QTYPE << std::endl; 72 | std::cout << " * rectype: (optional) quantization reconstruction type" << std::endl 73 | << " (" << (int) ImageMarlinHeader::ReconstructionType::Midpoint << ": interval midpoint, " 74 | << (int) ImageMarlinHeader::ReconstructionType::Lowpoint << ": interval low) " 75 | << " default=" << (int) ImageMarlinHeader::DEFAULT_RECONSTRUCTION_TYPE << std::endl; 76 | std::cout << " * entfreq: entropy is calculated for 1 out of every entfreq blocks. " 77 | << "Default=" << ImageMarlinHeader::DEFAULT_ENTROPY_FREQUENCY << std::endl; 78 | std::cout << " * verbose|v: show extra info" << std::endl; 79 | std::cout << std::endl; 80 | std::cout << "Compression examples:" << std::endl; 81 | std::cout << std::endl; 82 | std::cout << "(c)ompress file.png or file.pgm into file.mar (lossless)" << std::endl; 83 | std::cout << "\t" << executable_name << " c file.png file.mar" << std::endl; 84 | std::cout << "(c)ompress file.png or file.pgm into file.mar (quantization step 7)" << std::endl; 85 | std::cout << "\t" << executable_name << " c file.pgm file.mar -qstep=7" << std::endl; 86 | std::cout << std::endl; 87 | 88 | std::cout << "Decompression examples:" << std::endl; 89 | std::cout << "(d)decompresses file.mar into file.png or file.mar" << std::endl; 90 | std::cout << "\t" << executable_name << " d file.mar file.png" << std::endl; 91 | std::cout << "\t" << executable_name << " d file.mar file.pgm" << std::endl; 92 | std::cout << std::endl; 93 | std::cout << "NOTE: Any input/output format supported by OpenCV can be used " << std::endl 94 | << "for compression/decompression (e.g., .pgm, .png, .bmp)" << std::endl; 95 | std::cout << "======================================================================" << std::endl; 96 | } 97 | 98 | /** 99 | * Parse command line arguments. 100 | */ 101 | void parse_arguments(int argc, char **argv, 102 | bool& mode_compress, 103 | std::string& input_path, 104 | std::string& output_path, 105 | uint32_t& qstep, 106 | uint32_t& blockSize, 107 | std::string& path_profile, 108 | bool& verbose, 109 | ImageMarlinHeader::QuantizerType& qtype, 110 | ImageMarlinHeader::ReconstructionType& rectype, 111 | ImageMarlinHeader::TransformType& transtype, 112 | uint32_t& blockEntropyFrequency 113 | ) { 114 | if (argc < 4) { 115 | throw std::runtime_error("Invalid argument count"); 116 | } 117 | 118 | // Compression/decompression mode 119 | std::string mode_string(argv[1]); 120 | if (mode_string == "c" || mode_string == "C") { 121 | mode_compress = true; 122 | } else if (mode_string == "d" || mode_string == "D") { 123 | mode_compress = false; 124 | } else { 125 | throw std::runtime_error("Invalid c|d flag"); 126 | } 127 | 128 | // Input/output paths 129 | input_path = argv[2]; 130 | std::ifstream ifs(input_path); 131 | if (! ifs.good()) { 132 | throw std::runtime_error("Cannot open input_path for reading"); 133 | } 134 | 135 | output_path = argv[3]; 136 | std::ofstream ofs(output_path); 137 | if (! ofs.good()) { 138 | throw std::runtime_error("Cannot open output_path for writing"); 139 | } 140 | 141 | // Optional parameters 142 | if (argc > 4 && !mode_compress) { 143 | throw std::runtime_error("Optional arguments can only appear for compression."); 144 | } 145 | std::regex re; 146 | for (int i=4; i= 1."); 200 | } 201 | continue; 202 | } 203 | 204 | // path to the profiling file 205 | re = "-profile=(.+)"; 206 | if (std::regex_search(argument, match, re)) { 207 | path_profile = match.str(1); 208 | continue; 209 | } 210 | 211 | re = "-(v|verbose)"; 212 | if (std::regex_search(argument, match, re)) { 213 | verbose = true; 214 | continue; 215 | } 216 | 217 | std::stringstream ss; 218 | ss << "Unrecognized argument " << argument; 219 | throw std::runtime_error(ss.str()); 220 | } 221 | } 222 | 223 | int main(int argc, char **argv) { 224 | // Parse mode, and input/output paths 225 | bool mode_compress; 226 | std::string input_path; 227 | std::string output_path; 228 | uint32_t qstep = ImageMarlinHeader::DEFAULT_QSTEP; 229 | ImageMarlinHeader::QuantizerType qtype = ImageMarlinHeader::DEFAULT_QTYPE; 230 | ImageMarlinHeader::ReconstructionType rectype = ImageMarlinHeader::DEFAULT_RECONSTRUCTION_TYPE; 231 | ImageMarlinHeader::TransformType transtype = ImageMarlinHeader::DEFAULT_TRANSFORM_TYPE; 232 | uint32_t blockSize = ImageMarlinHeader::DEFAULT_BLOCK_WIDTH; 233 | uint32_t entropyFrequency = ImageMarlinHeader::DEFAULT_ENTROPY_FREQUENCY; 234 | std::string path_profile; 235 | bool verbose = false; 236 | 237 | try { 238 | parse_arguments(argc, argv, mode_compress, input_path, output_path, 239 | qstep, blockSize, path_profile, verbose, 240 | qtype, rectype, transtype, entropyFrequency); 241 | } catch (std::runtime_error ex) { 242 | usage(); 243 | std::cerr << std::endl << "ERROR: " << ex.what() << std::endl; 244 | return -1; 245 | } 246 | 247 | if (mode_compress) { 248 | cv::Mat img; 249 | { 250 | img = cv::imread(input_path, cv::IMREAD_UNCHANGED); 251 | if (img.empty()) { 252 | usage(); 253 | std::cerr << "ERROR: Cannot read " << input_path << ". Is it in a supported format?" << std::endl; 254 | return -1; 255 | } 256 | } 257 | 258 | ImageMarlinHeader header( 259 | (uint32_t) img.rows, (uint32_t) img.cols, (uint32_t) img.channels(), 260 | blockSize, qstep, qtype, rectype, transtype, entropyFrequency); 261 | if (verbose) { 262 | header.show(std::cout); 263 | } 264 | std::ofstream off(output_path); 265 | ImageMarlinCoder* compressor = header.newCoder(); 266 | Profiler::start("compression"); 267 | compressor->compress(img, off); 268 | Profiler::end("compression"); 269 | delete compressor; 270 | } else { 271 | std::string compressedData; 272 | { 273 | std::ifstream iss(input_path); 274 | iss.seekg(0, std::ios::end); 275 | size_t sz = iss.tellg(); 276 | compressedData.resize(sz); 277 | iss.seekg(0, std::ios::beg); 278 | iss.read(&compressedData[0], sz); 279 | } 280 | 281 | ImageMarlinHeader decompressedHeader(compressedData); 282 | ImageMarlinDecoder* decompressor = decompressedHeader.newDecoder(); 283 | std::vector decompressedData(decompressedHeader.rows * decompressedHeader.cols); 284 | 285 | Profiler::start("decompression"); 286 | decompressor->decompress(compressedData, decompressedData, decompressedHeader); 287 | Profiler::end("decompression"); 288 | 289 | cv::Mat1b img(decompressedHeader.rows, decompressedHeader.cols, &decompressedData[0]); 290 | cv::imwrite(output_path, img); 291 | if (verbose) { 292 | decompressedHeader.show(std::cout); 293 | } 294 | delete decompressor; 295 | } 296 | 297 | Profiler::report(path_profile, true); 298 | 299 | if (verbose) { 300 | Profiler::report(std::cout, false); 301 | } 302 | 303 | return 0; 304 | } 305 | -------------------------------------------------------------------------------- /utils/testImage.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | struct TestTimer { 10 | timespec c_start, c_end; 11 | void start() { clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &c_start); }; 12 | void stop () { clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &c_end); }; 13 | double operator()() { return (c_end.tv_sec-c_start.tv_sec) + 1.E-9*(c_end.tv_nsec-c_start.tv_nsec); } 14 | }; 15 | 16 | TestTimer tt; 17 | #define TESTTIME(timer, a) \ 18 | timer.start(); a; timer.stop(); \ 19 | std::cerr << "Tested \"" << #a << "\": " << int(timer()*1e6) << "us" << std::endl; 20 | 21 | //////////////////////////////////////////////////////////////////////// 22 | // Pure Marlin Compression Functions 23 | 24 | std::vector compressLaplacianFixedBlockFast(const std::vector &uncompressed, size_t blockSize) { 25 | 26 | const size_t nBlocks = (uncompressed.size()+blockSize-1)/blockSize; 27 | 28 | std::vector> blocksEntropy; 29 | 30 | for (size_t i=0; i hist; hist.fill(0.); 41 | for (size_t j=1; j header(nBlocks*3); 58 | std::vector scratchPad(nBlocks * blockSize); 59 | for (size_t b=0; bcompress(in, out); 69 | 70 | header[3*i+0]=&prebuilt_dictionaries[(entropy*16)/256] - Marlin_get_prebuilt_dictionaries(); 71 | header[3*i+1]=compressedSize & 0xFF; 72 | header[3*i+2]=compressedSize >> 8; 73 | } 74 | 75 | 76 | size_t fullCompressedSize = header.size(); 77 | for (size_t i=0; i out(fullCompressedSize); 83 | 84 | memcpy(&out[0], header.data(), header.size()); 85 | 86 | { 87 | size_t p = header.size(); 88 | for (size_t i=0; i compressFixedBlockSlow(const std::vector &uncompressed, size_t blockSize) { 100 | 101 | const size_t nBlocks = (uncompressed.size()+blockSize-1)/blockSize; 102 | 103 | std::vector> blocksEntropy; 104 | 105 | std::vector header(nBlocks*3); 106 | std::vector bestSizes(nBlocks, blockSize*2); 107 | std::vector> bestBlocks(nBlocks, std::vector(blockSize)); 108 | std::vector scratchPad(blockSize); 109 | 110 | for (auto **dict = Marlin_get_prebuilt_dictionaries(); *dict; dict++) { 111 | 112 | for (size_t i=0; icompress(in, out); 120 | 121 | if (compressedSize> 8; 127 | bestBlocks[i] = scratchPad; 128 | } 129 | } 130 | //std::cout << "kk " << Marlin_get_prebuilt_dictionaries()[int(header[3*i+0])]->name << " " << double(100*bestsz)/blockSize<< std::endl; 131 | } 132 | 133 | 134 | 135 | std::vector compressedData = header; 136 | for (size_t i=0; i uncompressed, marlin::View &compressed, size_t blockSize) { 145 | 146 | const size_t nBlocks = (uncompressed.nBytes()+blockSize-1)/blockSize; 147 | 148 | std::vector> blocksDictionary; 149 | std::vector blocksSize; 150 | std::vector blocksPosition; 151 | 152 | { 153 | size_t position = nBlocks*3; // this is the header's size 154 | for (size_t i=0; idecompress(in, out); 180 | } 181 | return uncompressed.nBytes(); 182 | } 183 | 184 | 185 | //////////////////////////////////////////////////////////////////////// 186 | // Marlin Image Compression Functions 187 | 188 | struct MarlinImageHeader { 189 | uint16_t rows, cols, channels; 190 | uint16_t imageBlockWidth; 191 | }; 192 | 193 | 194 | static std::string compressImage(cv::Mat orig_img, size_t imageBlockWidth = 64, bool fast = true) { 195 | 196 | const size_t bs = imageBlockWidth; 197 | 198 | size_t brows = (orig_img.rows+bs-1)/bs; 199 | size_t bcols = (orig_img.cols+bs-1)/bs; 200 | cv::Mat img; 201 | cv::copyMakeBorder(orig_img, img, 0, brows*bs-orig_img.rows, 0, bcols*bs-orig_img.cols, cv::BORDER_REPLICATE); 202 | 203 | 204 | std::vector dc(bcols*brows*img.channels()); 205 | std::vector preprocessed(bcols*brows*bs*bs*img.channels()); 206 | if (img.channels()==3) { 207 | 208 | cv::Mat3b img3b = img; 209 | // PREPROCESS IMAGE INTO BLOCKS 210 | { 211 | uint8_t *tb = &preprocessed[0*bcols*brows*bs*bs]; 212 | uint8_t *tg = &preprocessed[1*bcols*brows*bs*bs]; 213 | uint8_t *tr = &preprocessed[2*bcols*brows*bs*bs]; 214 | 215 | for (size_t i=0; i uncompressed; 316 | }; 317 | 318 | static cv::Mat uncompressImage( 319 | const std::string &compressedString, 320 | UncompressImage_Context &context) { 321 | 322 | MarlinImageHeader header; 323 | memcpy(&header, compressedString.data(), sizeof(MarlinImageHeader)); 324 | 325 | const size_t bs = header.imageBlockWidth; 326 | 327 | size_t brows = (header.rows+bs-1)/bs; 328 | size_t bcols = (header.cols+bs-1)/bs; 329 | 330 | size_t channels = header.channels; 331 | 332 | auto dc = marlin::make_view( 333 | (const uint8_t *)&compressedString[sizeof(MarlinImageHeader)], 334 | (const uint8_t *)&compressedString[sizeof(MarlinImageHeader) + channels*bcols*brows]); 335 | 336 | auto compressed = marlin::make_view( 337 | (const uint8_t *)&compressedString[sizeof(MarlinImageHeader) + channels*bcols*brows], 338 | (const uint8_t *)&compressedString[compressedString.size()]); 339 | 340 | auto &uncompressed = context.uncompressed; 341 | uncompressed.resize(channels*bcols*brows*bs*bs); 342 | 343 | uncompress(marlin::make_view(uncompressed), compressed, bs*bs); 344 | 345 | 346 | if (channels==1) { 347 | 348 | cv::Mat1b img1b(brows*bs, bcols*bs); 349 | 350 | // PREPROCESS IMAGE INTO BLOCKS 351 | { 352 | const uint8_t *t = &uncompressed[0]; 353 | 354 | for (size_t i=0; i " << std::endl; 458 | std::cout << std::endl; 459 | std::cout << "Usage examples:" << std::endl; 460 | std::cout << std::endl; 461 | std::cout << "(c)ompress file.png or file.pgm into file.mar" << std::endl; 462 | std::cout << "\t" << argv[0] << " c file.png file.mar" << std::endl; 463 | std::cout << "\t" << argv[0] << " c file.pgm file.mar" << std::endl; 464 | std::cout << std::endl; 465 | std::cout << "(d)decompresses file.mar into file.png or file.mar" << std::endl; 466 | std::cout << "\t" << argv[0] << " d file.mar file.png" << std::endl; 467 | std::cout << "\t" << argv[0] << " d file.mar file.pgm" << std::endl; 468 | std::cout << std::endl; 469 | std::cout << "Any input/output format supported by OpenCV can be used for compression/decompression." << std::endl; 470 | exit(-1); 471 | } 472 | 473 | int main(int argc, char **argv) { 474 | 475 | if (argc != 4) { 476 | usage(argv); 477 | } 478 | 479 | std::string mode_string(argv[1]); 480 | std::string input_path(argv[2]); 481 | std::string output_path(argv[3]); 482 | 483 | std::ifstream ifs = std::ifstream(input_path); 484 | if (! ifs.good()) { 485 | std::cerr << "ERROR: Cannot access '" << input_path << "'" << std::endl; 486 | usage(argv); 487 | } 488 | 489 | bool mode_compress = true; 490 | if (mode_string == "c") { 491 | mode_compress = true; 492 | } else if (mode_string == "d") { 493 | mode_compress = false; 494 | } else { 495 | std::cerr << "ERROR: Invalid syntax" << std::endl; 496 | usage(argv); 497 | } 498 | 499 | TestTimer ttmain; 500 | if (mode_compress) { 501 | cv::Mat img = cv::imread(input_path, cv::IMREAD_UNCHANGED); 502 | if (img.empty()) { 503 | std::cerr << "ERROR: Cannot read " << input_path << ". Is it in a supported format?" << std::endl; 504 | usage(argv); 505 | } 506 | 507 | std::cerr << "Read image: " << input_path << " (" << img.rows << "x" << img.cols << ") " \ 508 | << " nChannels: " << img.channels() << std::endl; 509 | 510 | TESTTIME(ttmain, auto compressed = compressImage(img)); 511 | 512 | std::cerr << "Compressed " << compressed.size() << " bytes at " \ 513 | << int(((img.rows*img.cols*img.channels())/ttmain())/(1<<20)) << "MB/s" << std::endl; 514 | 515 | std::ofstream off(output_path); 516 | off.write(compressed.data(), compressed.size()); 517 | } else { 518 | std::string compressed; 519 | { 520 | std::ifstream iss(input_path); 521 | iss.seekg(0, std::ios::end); 522 | size_t sz = iss.tellg(); 523 | compressed.resize(sz); 524 | iss.seekg(0, std::ios::beg); 525 | iss.read(&compressed[0], sz); 526 | } 527 | 528 | UncompressImage_Context context; 529 | uncompressImage(compressed, context); 530 | 531 | std::cerr << "Read marlin compressed image: " << input_path << " of size: " << compressed.size() << std::endl; 532 | TESTTIME(ttmain, auto img = uncompressImage(compressed, context)); 533 | 534 | std::cerr << "Uncompressed to: " 535 | << " (" << img.rows << "x" << img.cols << ") nChannels: " << img.channels() 536 | << " at " << int(((img.rows*img.cols*img.channels())/ttmain())/(1<<20)) << "MB/s" << std::endl; 537 | 538 | cv::imwrite(output_path, img); 539 | } 540 | 541 | 542 | return 0; 543 | } 544 | --------------------------------------------------------------------------------